ddraw: Properly clear the clip list if ddraw_clipper_SetClipList() is called with...
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static BOOL surface_need_pbo(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
524 {
525     if (surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
526         return FALSE;
527     if (!(surface->flags & SFLAG_DYNLOCK))
528         return FALSE;
529     if (surface->flags & (SFLAG_CONVERTED | SFLAG_NONPOW2 | SFLAG_PIN_SYSMEM))
530         return FALSE;
531     if (!gl_info->supported[ARB_PIXEL_BUFFER_OBJECT])
532         return FALSE;
533
534     return TRUE;
535 }
536
537 static void surface_load_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
538 {
539     struct wined3d_context *context;
540     GLenum error;
541
542     context = context_acquire(surface->resource.device, NULL);
543     ENTER_GL();
544
545     GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
546     error = glGetError();
547     if (!surface->pbo || error != GL_NO_ERROR)
548         ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
549
550     TRACE("Binding PBO %u.\n", surface->pbo);
551
552     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
553     checkGLcall("glBindBufferARB");
554
555     GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
556             surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
557     checkGLcall("glBufferDataARB");
558
559     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
560     checkGLcall("glBindBufferARB");
561
562     /* We don't need the system memory anymore and we can't even use it for PBOs. */
563     if (!(surface->flags & SFLAG_CLIENT))
564     {
565         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
566         surface->resource.heapMemory = NULL;
567     }
568     surface->resource.allocatedMemory = NULL;
569     surface->flags |= SFLAG_PBO;
570     LEAVE_GL();
571     context_release(context);
572 }
573
574 static void surface_prepare_system_memory(struct wined3d_surface *surface)
575 {
576     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
577
578     TRACE("surface %p.\n", surface);
579
580     if (!(surface->flags & SFLAG_PBO) && surface_need_pbo(surface, gl_info))
581         surface_load_pbo(surface, gl_info);
582     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
583     {
584         /* Whatever surface we have, make sure that there is memory allocated
585          * for the downloaded copy, or a PBO to map. */
586         if (!surface->resource.heapMemory)
587             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
588
589         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
590                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
591
592         if (surface->flags & SFLAG_INSYSMEM)
593             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
594     }
595 }
596
597 static void surface_evict_sysmem(struct wined3d_surface *surface)
598 {
599     if (surface->flags & SFLAG_DONOTFREE)
600         return;
601
602     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
603     surface->resource.allocatedMemory = NULL;
604     surface->resource.heapMemory = NULL;
605     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
606 }
607
608 /* Context activation is done by the caller. */
609 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
610         struct wined3d_context *context, BOOL srgb)
611 {
612     struct wined3d_device *device = surface->resource.device;
613     DWORD active_sampler;
614
615     /* We don't need a specific texture unit, but after binding the texture
616      * the current unit is dirty. Read the unit back instead of switching to
617      * 0, this avoids messing around with the state manager's GL states. The
618      * current texture unit should always be a valid one.
619      *
620      * To be more specific, this is tricky because we can implicitly be
621      * called from sampler() in state.c. This means we can't touch anything
622      * other than whatever happens to be the currently active texture, or we
623      * would risk marking already applied sampler states dirty again. */
624     active_sampler = device->rev_tex_unit_map[context->active_texture];
625
626     if (active_sampler != WINED3D_UNMAPPED_STAGE)
627         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
628     surface_bind(surface, context, srgb);
629 }
630
631 static void surface_force_reload(struct wined3d_surface *surface)
632 {
633     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
634 }
635
636 static void surface_release_client_storage(struct wined3d_surface *surface)
637 {
638     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
639
640     ENTER_GL();
641     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
642     if (surface->texture_name)
643     {
644         surface_bind_and_dirtify(surface, context, FALSE);
645         glTexImage2D(surface->texture_target, surface->texture_level,
646                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
647     }
648     if (surface->texture_name_srgb)
649     {
650         surface_bind_and_dirtify(surface, context, TRUE);
651         glTexImage2D(surface->texture_target, surface->texture_level,
652                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
653     }
654     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
655     LEAVE_GL();
656
657     context_release(context);
658
659     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
660     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
661     surface_force_reload(surface);
662 }
663
664 static HRESULT surface_private_setup(struct wined3d_surface *surface)
665 {
666     /* TODO: Check against the maximum texture sizes supported by the video card. */
667     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
668     unsigned int pow2Width, pow2Height;
669
670     TRACE("surface %p.\n", surface);
671
672     surface->texture_name = 0;
673     surface->texture_target = GL_TEXTURE_2D;
674
675     /* Non-power2 support */
676     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
677     {
678         pow2Width = surface->resource.width;
679         pow2Height = surface->resource.height;
680     }
681     else
682     {
683         /* Find the nearest pow2 match */
684         pow2Width = pow2Height = 1;
685         while (pow2Width < surface->resource.width)
686             pow2Width <<= 1;
687         while (pow2Height < surface->resource.height)
688             pow2Height <<= 1;
689     }
690     surface->pow2Width = pow2Width;
691     surface->pow2Height = pow2Height;
692
693     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
694     {
695         /* TODO: Add support for non power two compressed textures. */
696         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
697         {
698             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
699                   surface, surface->resource.width, surface->resource.height);
700             return WINED3DERR_NOTAVAILABLE;
701         }
702     }
703
704     if (pow2Width != surface->resource.width
705             || pow2Height != surface->resource.height)
706     {
707         surface->flags |= SFLAG_NONPOW2;
708     }
709
710     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
711             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
712     {
713         /* One of three options:
714          * 1: Do the same as we do with NPOT and scale the texture, (any
715          *    texture ops would require the texture to be scaled which is
716          *    potentially slow)
717          * 2: Set the texture to the maximum size (bad idea).
718          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
719          * 4: Create the surface, but allow it to be used only for DirectDraw
720          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
721          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
722          *    the render target. */
723         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
724         {
725             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
726             return WINED3DERR_NOTAVAILABLE;
727         }
728
729         /* We should never use this surface in combination with OpenGL! */
730         TRACE("Creating an oversized surface: %ux%u.\n",
731                 surface->pow2Width, surface->pow2Height);
732     }
733     else
734     {
735         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
736          * and EXT_PALETTED_TEXTURE is used in combination with texture
737          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
738          * EXT_PALETTED_TEXTURE doesn't work in combination with
739          * ARB_TEXTURE_RECTANGLE. */
740         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
741                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
742                 && gl_info->supported[EXT_PALETTED_TEXTURE]
743                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
744         {
745             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
746             surface->pow2Width = surface->resource.width;
747             surface->pow2Height = surface->resource.height;
748             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
749         }
750     }
751
752     switch (wined3d_settings.offscreen_rendering_mode)
753     {
754         case ORM_FBO:
755             surface->get_drawable_size = get_drawable_size_fbo;
756             break;
757
758         case ORM_BACKBUFFER:
759             surface->get_drawable_size = get_drawable_size_backbuffer;
760             break;
761
762         default:
763             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
764             return WINED3DERR_INVALIDCALL;
765     }
766
767     surface->flags |= SFLAG_INSYSMEM;
768
769     return WINED3D_OK;
770 }
771
772 static void surface_realize_palette(struct wined3d_surface *surface)
773 {
774     struct wined3d_palette *palette = surface->palette;
775
776     TRACE("surface %p.\n", surface);
777
778     if (!palette) return;
779
780     if (surface->resource.format->id == WINED3DFMT_P8_UINT
781             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
782     {
783         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
784         {
785             /* Make sure the texture is up to date. This call doesn't do
786              * anything if the texture is already up to date. */
787             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
788
789             /* We want to force a palette refresh, so mark the drawable as not being up to date */
790             if (!surface_is_offscreen(surface))
791                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
792         }
793         else
794         {
795             if (!(surface->flags & SFLAG_INSYSMEM))
796             {
797                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
798                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
799             }
800             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
801         }
802     }
803
804     if (surface->flags & SFLAG_DIBSECTION)
805     {
806         RGBQUAD col[256];
807         unsigned int i;
808
809         TRACE("Updating the DC's palette.\n");
810
811         for (i = 0; i < 256; ++i)
812         {
813             col[i].rgbRed   = palette->palents[i].peRed;
814             col[i].rgbGreen = palette->palents[i].peGreen;
815             col[i].rgbBlue  = palette->palents[i].peBlue;
816             col[i].rgbReserved = 0;
817         }
818         SetDIBColorTable(surface->hDC, 0, 256, col);
819     }
820
821     /* Propagate the changes to the drawable when we have a palette. */
822     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
823         surface_load_location(surface, surface->draw_binding, NULL);
824 }
825
826 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
827 {
828     HRESULT hr;
829
830     /* If there's no destination surface there is nothing to do. */
831     if (!surface->overlay_dest)
832         return WINED3D_OK;
833
834     /* Blt calls ModifyLocation on the dest surface, which in turn calls
835      * DrawOverlay to update the overlay. Prevent an endless recursion. */
836     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
837         return WINED3D_OK;
838
839     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
840     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
841             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
842     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
843
844     return hr;
845 }
846
847 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
848 {
849     struct wined3d_device *device = surface->resource.device;
850     const RECT *pass_rect = rect;
851
852     TRACE("surface %p, rect %s, flags %#x.\n",
853             surface, wine_dbgstr_rect(rect), flags);
854
855     if (flags & WINED3DLOCK_DISCARD)
856     {
857         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
858         surface_prepare_system_memory(surface);
859         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
860     }
861     else
862     {
863         /* surface_load_location() does not check if the rectangle specifies
864          * the full surface. Most callers don't need that, so do it here. */
865         if (rect && !rect->top && !rect->left
866                 && rect->right == surface->resource.width
867                 && rect->bottom == surface->resource.height)
868             pass_rect = NULL;
869         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
870     }
871
872     if (surface->flags & SFLAG_PBO)
873     {
874         const struct wined3d_gl_info *gl_info;
875         struct wined3d_context *context;
876
877         context = context_acquire(device, NULL);
878         gl_info = context->gl_info;
879
880         ENTER_GL();
881         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
882         checkGLcall("glBindBufferARB");
883
884         /* This shouldn't happen but could occur if some other function
885          * didn't handle the PBO properly. */
886         if (surface->resource.allocatedMemory)
887             ERR("The surface already has PBO memory allocated.\n");
888
889         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
890         checkGLcall("glMapBufferARB");
891
892         /* Make sure the PBO isn't set anymore in order not to break non-PBO
893          * calls. */
894         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
895         checkGLcall("glBindBufferARB");
896
897         LEAVE_GL();
898         context_release(context);
899     }
900
901     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
902     {
903         if (!rect)
904             surface_add_dirty_rect(surface, NULL);
905         else
906         {
907             struct wined3d_box b;
908
909             b.left = rect->left;
910             b.top = rect->top;
911             b.right = rect->right;
912             b.bottom = rect->bottom;
913             b.front = 0;
914             b.back = 1;
915             surface_add_dirty_rect(surface, &b);
916         }
917     }
918 }
919
920 static void surface_unmap(struct wined3d_surface *surface)
921 {
922     struct wined3d_device *device = surface->resource.device;
923     BOOL fullsurface;
924
925     TRACE("surface %p.\n", surface);
926
927     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
928
929     if (surface->flags & SFLAG_PBO)
930     {
931         const struct wined3d_gl_info *gl_info;
932         struct wined3d_context *context;
933
934         TRACE("Freeing PBO memory.\n");
935
936         context = context_acquire(device, NULL);
937         gl_info = context->gl_info;
938
939         ENTER_GL();
940         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
941         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
943         checkGLcall("glUnmapBufferARB");
944         LEAVE_GL();
945         context_release(context);
946
947         surface->resource.allocatedMemory = NULL;
948     }
949
950     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
951
952     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
953     {
954         TRACE("Not dirtified, nothing to do.\n");
955         goto done;
956     }
957
958     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
959             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
960     {
961         if (!surface->dirtyRect.left && !surface->dirtyRect.top
962                 && surface->dirtyRect.right == surface->resource.width
963                 && surface->dirtyRect.bottom == surface->resource.height)
964         {
965             fullsurface = TRUE;
966         }
967         else
968         {
969             /* TODO: Proper partial rectangle tracking. */
970             fullsurface = FALSE;
971             surface->flags |= SFLAG_INSYSMEM;
972         }
973
974         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
975
976         /* Partial rectangle tracking is not commonly implemented, it is only
977          * done for render targets. INSYSMEM was set before to tell
978          * surface_load_location() where to read the rectangle from.
979          * Indrawable is set because all modifications from the partial
980          * sysmem copy are written back to the drawable, thus the surface is
981          * merged again in the drawable. The sysmem copy is not fully up to
982          * date because only a subrectangle was read in Map(). */
983         if (!fullsurface)
984         {
985             surface_modify_location(surface, surface->draw_binding, TRUE);
986             surface_evict_sysmem(surface);
987         }
988
989         surface->dirtyRect.left = surface->resource.width;
990         surface->dirtyRect.top = surface->resource.height;
991         surface->dirtyRect.right = 0;
992         surface->dirtyRect.bottom = 0;
993     }
994     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
995     {
996         FIXME("Depth / stencil buffer locking is not implemented.\n");
997     }
998
999 done:
1000     /* Overlays have to be redrawn manually after changes with the GL implementation */
1001     if (surface->overlay_dest)
1002         surface_draw_overlay(surface);
1003 }
1004
1005 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1006 {
1007     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1008         return FALSE;
1009     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1010         return FALSE;
1011     return TRUE;
1012 }
1013
1014 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1015         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1016 {
1017     const struct wined3d_gl_info *gl_info;
1018     struct wined3d_context *context;
1019     DWORD src_mask, dst_mask;
1020     GLbitfield gl_mask;
1021
1022     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1023             device, src_surface, wine_dbgstr_rect(src_rect),
1024             dst_surface, wine_dbgstr_rect(dst_rect));
1025
1026     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1027     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1028
1029     if (src_mask != dst_mask)
1030     {
1031         ERR("Incompatible formats %s and %s.\n",
1032                 debug_d3dformat(src_surface->resource.format->id),
1033                 debug_d3dformat(dst_surface->resource.format->id));
1034         return;
1035     }
1036
1037     if (!src_mask)
1038     {
1039         ERR("Not a depth / stencil format: %s.\n",
1040                 debug_d3dformat(src_surface->resource.format->id));
1041         return;
1042     }
1043
1044     gl_mask = 0;
1045     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1046         gl_mask |= GL_DEPTH_BUFFER_BIT;
1047     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1048         gl_mask |= GL_STENCIL_BUFFER_BIT;
1049
1050     /* Make sure the locations are up-to-date. Loading the destination
1051      * surface isn't required if the entire surface is overwritten. */
1052     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1053     if (!surface_is_full_rect(dst_surface, dst_rect))
1054         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1055
1056     context = context_acquire(device, NULL);
1057     if (!context->valid)
1058     {
1059         context_release(context);
1060         WARN("Invalid context, skipping blit.\n");
1061         return;
1062     }
1063
1064     gl_info = context->gl_info;
1065
1066     ENTER_GL();
1067
1068     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1069     glReadBuffer(GL_NONE);
1070     checkGLcall("glReadBuffer()");
1071     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1072
1073     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1074     context_set_draw_buffer(context, GL_NONE);
1075     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1076
1077     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1078     {
1079         glDepthMask(GL_TRUE);
1080         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_ZWRITEENABLE));
1081     }
1082     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1083     {
1084         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1085         {
1086             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1087             context_invalidate_state(context, STATE_RENDER(WINED3D_RS_TWOSIDEDSTENCILMODE));
1088         }
1089         glStencilMask(~0U);
1090         context_invalidate_state(context, STATE_RENDER(WINED3D_RS_STENCILWRITEMASK));
1091     }
1092
1093     glDisable(GL_SCISSOR_TEST);
1094     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1095
1096     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1097             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1098     checkGLcall("glBlitFramebuffer()");
1099
1100     LEAVE_GL();
1101
1102     if (wined3d_settings.strict_draw_ordering)
1103         wglFlush(); /* Flush to ensure ordering across contexts. */
1104
1105     context_release(context);
1106 }
1107
1108 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1109  * Depth / stencil is not supported. */
1110 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1111         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1112         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1113 {
1114     const struct wined3d_gl_info *gl_info;
1115     struct wined3d_context *context;
1116     RECT src_rect, dst_rect;
1117     GLenum gl_filter;
1118     GLenum buffer;
1119
1120     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1121     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1122             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1123     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1124             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1125
1126     src_rect = *src_rect_in;
1127     dst_rect = *dst_rect_in;
1128
1129     switch (filter)
1130     {
1131         case WINED3DTEXF_LINEAR:
1132             gl_filter = GL_LINEAR;
1133             break;
1134
1135         default:
1136             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1137         case WINED3DTEXF_NONE:
1138         case WINED3DTEXF_POINT:
1139             gl_filter = GL_NEAREST;
1140             break;
1141     }
1142
1143     /* Resolve the source surface first if needed. */
1144     if (src_location == SFLAG_INRB_MULTISAMPLE
1145             && (src_surface->resource.format->id != dst_surface->resource.format->id
1146                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1147                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1148         src_location = SFLAG_INRB_RESOLVED;
1149
1150     /* Make sure the locations are up-to-date. Loading the destination
1151      * surface isn't required if the entire surface is overwritten. (And is
1152      * in fact harmful if we're being called by surface_load_location() with
1153      * the purpose of loading the destination surface.) */
1154     surface_load_location(src_surface, src_location, NULL);
1155     if (!surface_is_full_rect(dst_surface, &dst_rect))
1156         surface_load_location(dst_surface, dst_location, NULL);
1157
1158     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1159     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1160     else context = context_acquire(device, NULL);
1161
1162     if (!context->valid)
1163     {
1164         context_release(context);
1165         WARN("Invalid context, skipping blit.\n");
1166         return;
1167     }
1168
1169     gl_info = context->gl_info;
1170
1171     if (src_location == SFLAG_INDRAWABLE)
1172     {
1173         TRACE("Source surface %p is onscreen.\n", src_surface);
1174         buffer = surface_get_gl_buffer(src_surface);
1175         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1176     }
1177     else
1178     {
1179         TRACE("Source surface %p is offscreen.\n", src_surface);
1180         buffer = GL_COLOR_ATTACHMENT0;
1181     }
1182
1183     ENTER_GL();
1184     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1185     glReadBuffer(buffer);
1186     checkGLcall("glReadBuffer()");
1187     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1188     LEAVE_GL();
1189
1190     if (dst_location == SFLAG_INDRAWABLE)
1191     {
1192         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1193         buffer = surface_get_gl_buffer(dst_surface);
1194         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1195     }
1196     else
1197     {
1198         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1199         buffer = GL_COLOR_ATTACHMENT0;
1200     }
1201
1202     ENTER_GL();
1203     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1204     context_set_draw_buffer(context, buffer);
1205     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1206     context_invalidate_state(context, STATE_FRAMEBUFFER);
1207
1208     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1209     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE));
1210     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE1));
1211     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE2));
1212     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_COLORWRITEENABLE3));
1213
1214     glDisable(GL_SCISSOR_TEST);
1215     context_invalidate_state(context, STATE_RENDER(WINED3D_RS_SCISSORTESTENABLE));
1216
1217     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1218             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1219     checkGLcall("glBlitFramebuffer()");
1220
1221     LEAVE_GL();
1222
1223     if (wined3d_settings.strict_draw_ordering
1224             || (dst_location == SFLAG_INDRAWABLE
1225             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1226         wglFlush();
1227
1228     context_release(context);
1229 }
1230
1231 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1232         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1233         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1234 {
1235     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1236         return FALSE;
1237
1238     /* Source and/or destination need to be on the GL side */
1239     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1240         return FALSE;
1241
1242     switch (blit_op)
1243     {
1244         case WINED3D_BLIT_OP_COLOR_BLIT:
1245             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1246                 return FALSE;
1247             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             break;
1250
1251         case WINED3D_BLIT_OP_DEPTH_BLIT:
1252             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1253                 return FALSE;
1254             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             break;
1257
1258         default:
1259             return FALSE;
1260     }
1261
1262     if (!(src_format->id == dst_format->id
1263             || (is_identity_fixup(src_format->color_fixup)
1264             && is_identity_fixup(dst_format->color_fixup))))
1265         return FALSE;
1266
1267     return TRUE;
1268 }
1269
1270 /* This function checks if the primary render target uses the 8bit paletted format. */
1271 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1272 {
1273     if (device->fb.render_targets && device->fb.render_targets[0])
1274     {
1275         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1276         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1277                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1278             return TRUE;
1279     }
1280     return FALSE;
1281 }
1282
1283 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1284         DWORD color, struct wined3d_color *float_color)
1285 {
1286     const struct wined3d_format *format = surface->resource.format;
1287     const struct wined3d_device *device = surface->resource.device;
1288
1289     switch (format->id)
1290     {
1291         case WINED3DFMT_P8_UINT:
1292             if (surface->palette)
1293             {
1294                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1295                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1296                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1297             }
1298             else
1299             {
1300                 float_color->r = 0.0f;
1301                 float_color->g = 0.0f;
1302                 float_color->b = 0.0f;
1303             }
1304             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1305             break;
1306
1307         case WINED3DFMT_B5G6R5_UNORM:
1308             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1309             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1310             float_color->b = (color & 0x1f) / 31.0f;
1311             float_color->a = 1.0f;
1312             break;
1313
1314         case WINED3DFMT_B8G8R8_UNORM:
1315         case WINED3DFMT_B8G8R8X8_UNORM:
1316             float_color->r = D3DCOLOR_R(color);
1317             float_color->g = D3DCOLOR_G(color);
1318             float_color->b = D3DCOLOR_B(color);
1319             float_color->a = 1.0f;
1320             break;
1321
1322         case WINED3DFMT_B8G8R8A8_UNORM:
1323             float_color->r = D3DCOLOR_R(color);
1324             float_color->g = D3DCOLOR_G(color);
1325             float_color->b = D3DCOLOR_B(color);
1326             float_color->a = D3DCOLOR_A(color);
1327             break;
1328
1329         default:
1330             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1331             return FALSE;
1332     }
1333
1334     return TRUE;
1335 }
1336
1337 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1338 {
1339     const struct wined3d_format *format = surface->resource.format;
1340
1341     switch (format->id)
1342     {
1343         case WINED3DFMT_S1_UINT_D15_UNORM:
1344             *float_depth = depth / (float)0x00007fff;
1345             break;
1346
1347         case WINED3DFMT_D16_UNORM:
1348             *float_depth = depth / (float)0x0000ffff;
1349             break;
1350
1351         case WINED3DFMT_D24_UNORM_S8_UINT:
1352         case WINED3DFMT_X8D24_UNORM:
1353             *float_depth = depth / (float)0x00ffffff;
1354             break;
1355
1356         case WINED3DFMT_D32_UNORM:
1357             *float_depth = depth / (float)0xffffffff;
1358             break;
1359
1360         default:
1361             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1362             return FALSE;
1363     }
1364
1365     return TRUE;
1366 }
1367
1368 /* Do not call while under the GL lock. */
1369 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1370 {
1371     const struct wined3d_resource *resource = &surface->resource;
1372     struct wined3d_device *device = resource->device;
1373     const struct blit_shader *blitter;
1374
1375     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1376             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1377     if (!blitter)
1378     {
1379         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1380         return WINED3DERR_INVALIDCALL;
1381     }
1382
1383     return blitter->depth_fill(device, surface, rect, depth);
1384 }
1385
1386 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1387         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1388 {
1389     struct wined3d_device *device = src_surface->resource.device;
1390
1391     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1392             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1393             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1394         return WINED3DERR_INVALIDCALL;
1395
1396     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1397
1398     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1399             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1400     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1401
1402     return WINED3D_OK;
1403 }
1404
1405 /* Do not call while under the GL lock. */
1406 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1407         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1408         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1409 {
1410     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1411     struct wined3d_device *device = dst_surface->resource.device;
1412     DWORD src_ds_flags, dst_ds_flags;
1413     RECT src_rect, dst_rect;
1414     BOOL scale, convert;
1415
1416     static const DWORD simple_blit = WINEDDBLT_ASYNC
1417             | WINEDDBLT_COLORFILL
1418             | WINEDDBLT_WAIT
1419             | WINEDDBLT_DEPTHFILL
1420             | WINEDDBLT_DONOTWAIT;
1421
1422     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1423             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1424             flags, fx, debug_d3dtexturefiltertype(filter));
1425     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1426
1427     if (fx)
1428     {
1429         TRACE("dwSize %#x.\n", fx->dwSize);
1430         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1431         TRACE("dwROP %#x.\n", fx->dwROP);
1432         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1433         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1434         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1435         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1436         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1437         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1438         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1439         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1440         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1441         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1442         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1443         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1444         TRACE("dwReserved %#x.\n", fx->dwReserved);
1445         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1446         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1447         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1448         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1449         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1450         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1451                 fx->ddckDestColorkey.color_space_low_value,
1452                 fx->ddckDestColorkey.color_space_high_value);
1453         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1454                 fx->ddckSrcColorkey.color_space_low_value,
1455                 fx->ddckSrcColorkey.color_space_high_value);
1456     }
1457
1458     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1459     {
1460         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1461         return WINEDDERR_SURFACEBUSY;
1462     }
1463
1464     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1465
1466     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1467             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1468             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1469             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1470             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1471     {
1472         WARN("The application gave us a bad destination rectangle.\n");
1473         return WINEDDERR_INVALIDRECT;
1474     }
1475
1476     if (src_surface)
1477     {
1478         surface_get_rect(src_surface, src_rect_in, &src_rect);
1479
1480         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1481                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1482                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1483                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1484                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1485         {
1486             WARN("Application gave us bad source rectangle for Blt.\n");
1487             return WINEDDERR_INVALIDRECT;
1488         }
1489     }
1490     else
1491     {
1492         memset(&src_rect, 0, sizeof(src_rect));
1493     }
1494
1495     if (!fx || !(fx->dwDDFX))
1496         flags &= ~WINEDDBLT_DDFX;
1497
1498     if (flags & WINEDDBLT_WAIT)
1499         flags &= ~WINEDDBLT_WAIT;
1500
1501     if (flags & WINEDDBLT_ASYNC)
1502     {
1503         static unsigned int once;
1504
1505         if (!once++)
1506             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1507         flags &= ~WINEDDBLT_ASYNC;
1508     }
1509
1510     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1511     if (flags & WINEDDBLT_DONOTWAIT)
1512     {
1513         static unsigned int once;
1514
1515         if (!once++)
1516             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1517         flags &= ~WINEDDBLT_DONOTWAIT;
1518     }
1519
1520     if (!device->d3d_initialized)
1521     {
1522         WARN("D3D not initialized, using fallback.\n");
1523         goto cpu;
1524     }
1525
1526     /* We want to avoid invalidating the sysmem location for converted
1527      * surfaces, since otherwise we'd have to convert the data back when
1528      * locking them. */
1529     if (dst_surface->flags & SFLAG_CONVERTED)
1530     {
1531         WARN("Converted surface, using CPU blit.\n");
1532         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1533     }
1534
1535     if (flags & ~simple_blit)
1536     {
1537         WARN("Using fallback for complex blit (%#x).\n", flags);
1538         goto fallback;
1539     }
1540
1541     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1542         src_swapchain = src_surface->container.u.swapchain;
1543     else
1544         src_swapchain = NULL;
1545
1546     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1547         dst_swapchain = dst_surface->container.u.swapchain;
1548     else
1549         dst_swapchain = NULL;
1550
1551     /* This isn't strictly needed. FBO blits for example could deal with
1552      * cross-swapchain blits by first downloading the source to a texture
1553      * before switching to the destination context. We just have this here to
1554      * not have to deal with the issue, since cross-swapchain blits should be
1555      * rare. */
1556     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1557     {
1558         FIXME("Using fallback for cross-swapchain blit.\n");
1559         goto fallback;
1560     }
1561
1562     scale = src_surface
1563             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1564             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1565     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1566
1567     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1568     if (src_surface)
1569         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1570     else
1571         src_ds_flags = 0;
1572
1573     if (src_ds_flags || dst_ds_flags)
1574     {
1575         if (flags & WINEDDBLT_DEPTHFILL)
1576         {
1577             float depth;
1578
1579             TRACE("Depth fill.\n");
1580
1581             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1582                 return WINED3DERR_INVALIDCALL;
1583
1584             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1585                 return WINED3D_OK;
1586         }
1587         else
1588         {
1589             /* Accessing depth / stencil surfaces is supposed to fail while in
1590              * a scene, except for fills, which seem to work. */
1591             if (device->inScene)
1592             {
1593                 WARN("Rejecting depth / stencil access while in scene.\n");
1594                 return WINED3DERR_INVALIDCALL;
1595             }
1596
1597             if (src_ds_flags != dst_ds_flags)
1598             {
1599                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1600                 return WINED3DERR_INVALIDCALL;
1601             }
1602
1603             if (src_rect.top || src_rect.left
1604                     || src_rect.bottom != src_surface->resource.height
1605                     || src_rect.right != src_surface->resource.width)
1606             {
1607                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1608                         wine_dbgstr_rect(&src_rect));
1609                 return WINED3DERR_INVALIDCALL;
1610             }
1611
1612             if (dst_rect.top || dst_rect.left
1613                     || dst_rect.bottom != dst_surface->resource.height
1614                     || dst_rect.right != dst_surface->resource.width)
1615             {
1616                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1617                         wine_dbgstr_rect(&src_rect));
1618                 return WINED3DERR_INVALIDCALL;
1619             }
1620
1621             if (scale)
1622             {
1623                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1624                 return WINED3DERR_INVALIDCALL;
1625             }
1626
1627             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1628                 return WINED3D_OK;
1629         }
1630     }
1631     else
1632     {
1633         /* In principle this would apply to depth blits as well, but we don't
1634          * implement those in the CPU blitter at the moment. */
1635         if ((dst_surface->flags & SFLAG_INSYSMEM)
1636                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1637         {
1638             if (scale)
1639                 TRACE("Not doing sysmem blit because of scaling.\n");
1640             else if (convert)
1641                 TRACE("Not doing sysmem blit because of format conversion.\n");
1642             else
1643                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1644         }
1645
1646         if (flags & WINEDDBLT_COLORFILL)
1647         {
1648             struct wined3d_color color;
1649
1650             TRACE("Color fill.\n");
1651
1652             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1653                 goto fallback;
1654
1655             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1656                 return WINED3D_OK;
1657         }
1658         else
1659         {
1660             TRACE("Color blit.\n");
1661
1662             /* Upload */
1663             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1664             {
1665                 if (scale)
1666                     TRACE("Not doing upload because of scaling.\n");
1667                 else if (convert)
1668                     TRACE("Not doing upload because of format conversion.\n");
1669                 else
1670                 {
1671                     POINT dst_point = {dst_rect.left, dst_rect.top};
1672
1673                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1674                     {
1675                         if (!surface_is_offscreen(dst_surface))
1676                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1677                         return WINED3D_OK;
1678                     }
1679                 }
1680             }
1681
1682             /* Use present for back -> front blits. The idea behind this is
1683              * that present is potentially faster than a blit, in particular
1684              * when FBO blits aren't available. Some ddraw applications like
1685              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1686              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1687              * applications can't blit directly to the frontbuffer. */
1688             if (dst_swapchain && dst_swapchain->back_buffers
1689                     && dst_surface == dst_swapchain->front_buffer
1690                     && src_surface == dst_swapchain->back_buffers[0])
1691             {
1692                 enum wined3d_swap_effect swap_effect = dst_swapchain->desc.swap_effect;
1693
1694                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1695
1696                 /* Set the swap effect to COPY, we don't want the backbuffer
1697                  * to become undefined. */
1698                 dst_swapchain->desc.swap_effect = WINED3D_SWAP_EFFECT_COPY;
1699                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1700                 dst_swapchain->desc.swap_effect = swap_effect;
1701
1702                 return WINED3D_OK;
1703             }
1704
1705             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1706                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1707                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1708             {
1709                 TRACE("Using FBO blit.\n");
1710
1711                 surface_blt_fbo(device, filter,
1712                         src_surface, src_surface->draw_binding, &src_rect,
1713                         dst_surface, dst_surface->draw_binding, &dst_rect);
1714                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1715                 return WINED3D_OK;
1716             }
1717
1718             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1719                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1720                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1721             {
1722                 TRACE("Using arbfp blit.\n");
1723
1724                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1725                     return WINED3D_OK;
1726             }
1727         }
1728     }
1729
1730 fallback:
1731
1732     /* Special cases for render targets. */
1733     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1734             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1735     {
1736         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1737                 src_surface, &src_rect, flags, fx, filter)))
1738             return WINED3D_OK;
1739     }
1740
1741 cpu:
1742
1743     /* For the rest call the X11 surface implementation. For render targets
1744      * this should be implemented OpenGL accelerated in BltOverride, other
1745      * blits are rather rare. */
1746     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1747 }
1748
1749 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1750         struct wined3d_surface *render_target)
1751 {
1752     TRACE("surface %p, render_target %p.\n", surface, render_target);
1753
1754     /* TODO: Check surface sizes, pools, etc. */
1755
1756     if (render_target->resource.multisample_type)
1757         return WINED3DERR_INVALIDCALL;
1758
1759     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1760 }
1761
1762 /* Context activation is done by the caller. */
1763 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1764 {
1765     if (surface->flags & SFLAG_DIBSECTION)
1766     {
1767         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1768     }
1769     else
1770     {
1771         if (!surface->resource.heapMemory)
1772             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1773         else if (!(surface->flags & SFLAG_CLIENT))
1774             ERR("Surface %p has heapMemory %p and flags %#x.\n",
1775                     surface, surface->resource.heapMemory, surface->flags);
1776
1777         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1778                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1779     }
1780
1781     ENTER_GL();
1782     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1783     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1784     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1785             surface->resource.size, surface->resource.allocatedMemory));
1786     checkGLcall("glGetBufferSubDataARB");
1787     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1788     checkGLcall("glDeleteBuffersARB");
1789     LEAVE_GL();
1790
1791     surface->pbo = 0;
1792     surface->flags &= ~SFLAG_PBO;
1793 }
1794
1795 /* Do not call while under the GL lock. */
1796 static void surface_unload(struct wined3d_resource *resource)
1797 {
1798     struct wined3d_surface *surface = surface_from_resource(resource);
1799     struct wined3d_renderbuffer_entry *entry, *entry2;
1800     struct wined3d_device *device = resource->device;
1801     const struct wined3d_gl_info *gl_info;
1802     struct wined3d_context *context;
1803
1804     TRACE("surface %p.\n", surface);
1805
1806     if (resource->pool == WINED3DPOOL_DEFAULT)
1807     {
1808         /* Default pool resources are supposed to be destroyed before Reset is called.
1809          * Implicit resources stay however. So this means we have an implicit render target
1810          * or depth stencil. The content may be destroyed, but we still have to tear down
1811          * opengl resources, so we cannot leave early.
1812          *
1813          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1814          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1815          * or the depth stencil into an FBO the texture or render buffer will be removed
1816          * and all flags get lost
1817          */
1818         if (!(surface->flags & SFLAG_PBO))
1819             surface_init_sysmem(surface);
1820         /* We also get here when the ddraw swapchain is destroyed, for example
1821          * for a mode switch. In this case this surface won't necessarily be
1822          * an implicit surface. We have to mark it lost so that the
1823          * application can restore it after the mode switch. */
1824         surface->flags |= SFLAG_LOST;
1825     }
1826     else
1827     {
1828         /* Load the surface into system memory */
1829         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1830         surface_modify_location(surface, surface->draw_binding, FALSE);
1831     }
1832     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1833     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1834     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1835
1836     context = context_acquire(device, NULL);
1837     gl_info = context->gl_info;
1838
1839     /* Destroy PBOs, but load them into real sysmem before */
1840     if (surface->flags & SFLAG_PBO)
1841         surface_remove_pbo(surface, gl_info);
1842
1843     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1844      * all application-created targets the application has to release the surface
1845      * before calling _Reset
1846      */
1847     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1848     {
1849         ENTER_GL();
1850         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1851         LEAVE_GL();
1852         list_remove(&entry->entry);
1853         HeapFree(GetProcessHeap(), 0, entry);
1854     }
1855     list_init(&surface->renderbuffers);
1856     surface->current_renderbuffer = NULL;
1857
1858     ENTER_GL();
1859
1860     /* If we're in a texture, the texture name belongs to the texture.
1861      * Otherwise, destroy it. */
1862     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1863     {
1864         glDeleteTextures(1, &surface->texture_name);
1865         surface->texture_name = 0;
1866         glDeleteTextures(1, &surface->texture_name_srgb);
1867         surface->texture_name_srgb = 0;
1868     }
1869     if (surface->rb_multisample)
1870     {
1871         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1872         surface->rb_multisample = 0;
1873     }
1874     if (surface->rb_resolved)
1875     {
1876         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1877         surface->rb_resolved = 0;
1878     }
1879
1880     LEAVE_GL();
1881
1882     context_release(context);
1883
1884     resource_unload(resource);
1885 }
1886
1887 static const struct wined3d_resource_ops surface_resource_ops =
1888 {
1889     surface_unload,
1890 };
1891
1892 static const struct wined3d_surface_ops surface_ops =
1893 {
1894     surface_private_setup,
1895     surface_realize_palette,
1896     surface_map,
1897     surface_unmap,
1898 };
1899
1900 /*****************************************************************************
1901  * Initializes the GDI surface, aka creates the DIB section we render to
1902  * The DIB section creation is done by calling GetDC, which will create the
1903  * section and releasing the dc to allow the app to use it. The dib section
1904  * will stay until the surface is released
1905  *
1906  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1907  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1908  * avoid confusion in the shared surface code.
1909  *
1910  * Returns:
1911  *  WINED3D_OK on success
1912  *  The return values of called methods on failure
1913  *
1914  *****************************************************************************/
1915 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1916 {
1917     HRESULT hr;
1918
1919     TRACE("surface %p.\n", surface);
1920
1921     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1922     {
1923         ERR("Overlays not yet supported by GDI surfaces.\n");
1924         return WINED3DERR_INVALIDCALL;
1925     }
1926
1927     /* Sysmem textures have memory already allocated - release it,
1928      * this avoids an unnecessary memcpy. */
1929     hr = surface_create_dib_section(surface);
1930     if (SUCCEEDED(hr))
1931     {
1932         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1933         surface->resource.heapMemory = NULL;
1934         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1935     }
1936
1937     /* We don't mind the nonpow2 stuff in GDI. */
1938     surface->pow2Width = surface->resource.width;
1939     surface->pow2Height = surface->resource.height;
1940
1941     return WINED3D_OK;
1942 }
1943
1944 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1945 {
1946     struct wined3d_palette *palette = surface->palette;
1947
1948     TRACE("surface %p.\n", surface);
1949
1950     if (!palette) return;
1951
1952     if (surface->flags & SFLAG_DIBSECTION)
1953     {
1954         RGBQUAD col[256];
1955         unsigned int i;
1956
1957         TRACE("Updating the DC's palette.\n");
1958
1959         for (i = 0; i < 256; ++i)
1960         {
1961             col[i].rgbRed = palette->palents[i].peRed;
1962             col[i].rgbGreen = palette->palents[i].peGreen;
1963             col[i].rgbBlue = palette->palents[i].peBlue;
1964             col[i].rgbReserved = 0;
1965         }
1966         SetDIBColorTable(surface->hDC, 0, 256, col);
1967     }
1968
1969     /* Update the image because of the palette change. Some games like e.g.
1970      * Red Alert call SetEntries a lot to implement fading. */
1971     /* Tell the swapchain to update the screen. */
1972     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1973     {
1974         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1975         if (surface == swapchain->front_buffer)
1976         {
1977             x11_copy_to_screen(swapchain, NULL);
1978         }
1979     }
1980 }
1981
1982 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1983 {
1984     TRACE("surface %p, rect %s, flags %#x.\n",
1985             surface, wine_dbgstr_rect(rect), flags);
1986
1987     if (!(surface->flags & SFLAG_DIBSECTION))
1988     {
1989         /* This happens on gdi surfaces if the application set a user pointer
1990          * and resets it. Recreate the DIB section. */
1991         surface_create_dib_section(surface);
1992         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1993     }
1994 }
1995
1996 static void gdi_surface_unmap(struct wined3d_surface *surface)
1997 {
1998     TRACE("surface %p.\n", surface);
1999
2000     /* Tell the swapchain to update the screen. */
2001     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2002     {
2003         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2004         if (surface == swapchain->front_buffer)
2005         {
2006             x11_copy_to_screen(swapchain, &surface->lockedRect);
2007         }
2008     }
2009
2010     memset(&surface->lockedRect, 0, sizeof(RECT));
2011 }
2012
2013 static const struct wined3d_surface_ops gdi_surface_ops =
2014 {
2015     gdi_surface_private_setup,
2016     gdi_surface_realize_palette,
2017     gdi_surface_map,
2018     gdi_surface_unmap,
2019 };
2020
2021 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2022 {
2023     GLuint *name;
2024     DWORD flag;
2025
2026     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2027
2028     if(srgb)
2029     {
2030         name = &surface->texture_name_srgb;
2031         flag = SFLAG_INSRGBTEX;
2032     }
2033     else
2034     {
2035         name = &surface->texture_name;
2036         flag = SFLAG_INTEXTURE;
2037     }
2038
2039     if (!*name && new_name)
2040     {
2041         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2042          * surface has no texture name yet. See if we can get rid of this. */
2043         if (surface->flags & flag)
2044             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2045         surface_modify_location(surface, flag, FALSE);
2046     }
2047
2048     *name = new_name;
2049     surface_force_reload(surface);
2050 }
2051
2052 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2053 {
2054     TRACE("surface %p, target %#x.\n", surface, target);
2055
2056     if (surface->texture_target != target)
2057     {
2058         if (target == GL_TEXTURE_RECTANGLE_ARB)
2059         {
2060             surface->flags &= ~SFLAG_NORMCOORD;
2061         }
2062         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2063         {
2064             surface->flags |= SFLAG_NORMCOORD;
2065         }
2066     }
2067     surface->texture_target = target;
2068     surface_force_reload(surface);
2069 }
2070
2071 /* Context activation is done by the caller. */
2072 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2073 {
2074     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2075
2076     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2077     {
2078         struct wined3d_texture *texture = surface->container.u.texture;
2079
2080         TRACE("Passing to container (%p).\n", texture);
2081         texture->texture_ops->texture_bind(texture, context, srgb);
2082     }
2083     else
2084     {
2085         if (surface->texture_level)
2086         {
2087             ERR("Standalone surface %p is non-zero texture level %u.\n",
2088                     surface, surface->texture_level);
2089         }
2090
2091         if (srgb)
2092             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2093
2094         ENTER_GL();
2095
2096         if (!surface->texture_name)
2097         {
2098             glGenTextures(1, &surface->texture_name);
2099             checkGLcall("glGenTextures");
2100
2101             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2102
2103             context_bind_texture(context, surface->texture_target, surface->texture_name);
2104             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2105             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2106             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2107             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2108             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2109             checkGLcall("glTexParameteri");
2110         }
2111         else
2112         {
2113             context_bind_texture(context, surface->texture_target, surface->texture_name);
2114         }
2115
2116         LEAVE_GL();
2117     }
2118 }
2119
2120 /* This call just downloads data, the caller is responsible for binding the
2121  * correct texture. */
2122 /* Context activation is done by the caller. */
2123 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2124 {
2125     const struct wined3d_format *format = surface->resource.format;
2126
2127     /* Only support read back of converted P8 surfaces. */
2128     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2129     {
2130         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2131         return;
2132     }
2133
2134     ENTER_GL();
2135
2136     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2137     {
2138         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2139                 surface, surface->texture_level, format->glFormat, format->glType,
2140                 surface->resource.allocatedMemory);
2141
2142         if (surface->flags & SFLAG_PBO)
2143         {
2144             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2145             checkGLcall("glBindBufferARB");
2146             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2147             checkGLcall("glGetCompressedTexImageARB");
2148             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2149             checkGLcall("glBindBufferARB");
2150         }
2151         else
2152         {
2153             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2154                     surface->texture_level, surface->resource.allocatedMemory));
2155             checkGLcall("glGetCompressedTexImageARB");
2156         }
2157
2158         LEAVE_GL();
2159     }
2160     else
2161     {
2162         void *mem;
2163         GLenum gl_format = format->glFormat;
2164         GLenum gl_type = format->glType;
2165         int src_pitch = 0;
2166         int dst_pitch = 0;
2167
2168         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2169         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2170         {
2171             gl_format = GL_ALPHA;
2172             gl_type = GL_UNSIGNED_BYTE;
2173         }
2174
2175         if (surface->flags & SFLAG_NONPOW2)
2176         {
2177             unsigned char alignment = surface->resource.device->surface_alignment;
2178             src_pitch = format->byte_count * surface->pow2Width;
2179             dst_pitch = wined3d_surface_get_pitch(surface);
2180             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2181             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2182         }
2183         else
2184         {
2185             mem = surface->resource.allocatedMemory;
2186         }
2187
2188         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2189                 surface, surface->texture_level, gl_format, gl_type, mem);
2190
2191         if (surface->flags & SFLAG_PBO)
2192         {
2193             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2194             checkGLcall("glBindBufferARB");
2195
2196             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2197             checkGLcall("glGetTexImage");
2198
2199             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2200             checkGLcall("glBindBufferARB");
2201         }
2202         else
2203         {
2204             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2205             checkGLcall("glGetTexImage");
2206         }
2207         LEAVE_GL();
2208
2209         if (surface->flags & SFLAG_NONPOW2)
2210         {
2211             const BYTE *src_data;
2212             BYTE *dst_data;
2213             UINT y;
2214             /*
2215              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2216              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2217              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2218              *
2219              * We're doing this...
2220              *
2221              * instead of boxing the texture :
2222              * |<-texture width ->|  -->pow2width|   /\
2223              * |111111111111111111|              |   |
2224              * |222 Texture 222222| boxed empty  | texture height
2225              * |3333 Data 33333333|              |   |
2226              * |444444444444444444|              |   \/
2227              * -----------------------------------   |
2228              * |     boxed  empty | boxed empty  | pow2height
2229              * |                  |              |   \/
2230              * -----------------------------------
2231              *
2232              *
2233              * we're repacking the data to the expected texture width
2234              *
2235              * |<-texture width ->|  -->pow2width|   /\
2236              * |111111111111111111222222222222222|   |
2237              * |222333333333333333333444444444444| texture height
2238              * |444444                           |   |
2239              * |                                 |   \/
2240              * |                                 |   |
2241              * |            empty                | pow2height
2242              * |                                 |   \/
2243              * -----------------------------------
2244              *
2245              * == is the same as
2246              *
2247              * |<-texture width ->|    /\
2248              * |111111111111111111|
2249              * |222222222222222222|texture height
2250              * |333333333333333333|
2251              * |444444444444444444|    \/
2252              * --------------------
2253              *
2254              * this also means that any references to allocatedMemory should work with the data as if were a
2255              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2256              *
2257              * internally the texture is still stored in a boxed format so any references to textureName will
2258              * get a boxed texture with width pow2width and not a texture of width resource.width.
2259              *
2260              * Performance should not be an issue, because applications normally do not lock the surfaces when
2261              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2262              * and doesn't have to be re-read. */
2263             src_data = mem;
2264             dst_data = surface->resource.allocatedMemory;
2265             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2266             for (y = 1; y < surface->resource.height; ++y)
2267             {
2268                 /* skip the first row */
2269                 src_data += src_pitch;
2270                 dst_data += dst_pitch;
2271                 memcpy(dst_data, src_data, dst_pitch);
2272             }
2273
2274             HeapFree(GetProcessHeap(), 0, mem);
2275         }
2276     }
2277
2278     /* Surface has now been downloaded */
2279     surface->flags |= SFLAG_INSYSMEM;
2280 }
2281
2282 /* This call just uploads data, the caller is responsible for binding the
2283  * correct texture. */
2284 /* Context activation is done by the caller. */
2285 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2286         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2287         BOOL srgb, const struct wined3d_bo_address *data)
2288 {
2289     UINT update_w = src_rect->right - src_rect->left;
2290     UINT update_h = src_rect->bottom - src_rect->top;
2291
2292     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2293             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2294             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2295
2296     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2297         update_h *= format->heightscale;
2298
2299     ENTER_GL();
2300
2301     if (data->buffer_object)
2302     {
2303         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2304         checkGLcall("glBindBufferARB");
2305     }
2306
2307     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2308     {
2309         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2310         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2311         const BYTE *addr = data->addr;
2312         GLenum internal;
2313
2314         addr += (src_rect->top / format->block_height) * src_pitch;
2315         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2316
2317         if (srgb)
2318             internal = format->glGammaInternal;
2319         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2320             internal = format->rtInternal;
2321         else
2322             internal = format->glInternal;
2323
2324         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2325                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2326                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2327
2328         if (row_length == src_pitch)
2329         {
2330             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2331                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2332         }
2333         else
2334         {
2335             UINT row, y;
2336
2337             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2338              * can't use the unpack row length like below. */
2339             for (row = 0, y = dst_point->y; row < row_count; ++row)
2340             {
2341                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2342                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2343                 y += format->block_height;
2344                 addr += src_pitch;
2345             }
2346         }
2347         checkGLcall("glCompressedTexSubImage2DARB");
2348     }
2349     else
2350     {
2351         const BYTE *addr = data->addr;
2352
2353         addr += src_rect->top * src_pitch;
2354         addr += src_rect->left * format->byte_count;
2355
2356         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2357                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2358                 update_w, update_h, format->glFormat, format->glType, addr);
2359
2360         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2361         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2362                 update_w, update_h, format->glFormat, format->glType, addr);
2363         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2364         checkGLcall("glTexSubImage2D");
2365     }
2366
2367     if (data->buffer_object)
2368     {
2369         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2370         checkGLcall("glBindBufferARB");
2371     }
2372
2373     LEAVE_GL();
2374
2375     if (wined3d_settings.strict_draw_ordering)
2376         wglFlush();
2377
2378     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2379     {
2380         struct wined3d_device *device = surface->resource.device;
2381         unsigned int i;
2382
2383         for (i = 0; i < device->context_count; ++i)
2384         {
2385             context_surface_update(device->contexts[i], surface);
2386         }
2387     }
2388 }
2389
2390 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2391         struct wined3d_surface *src_surface, const RECT *src_rect)
2392 {
2393     const struct wined3d_format *src_format;
2394     const struct wined3d_format *dst_format;
2395     const struct wined3d_gl_info *gl_info;
2396     struct wined3d_context *context;
2397     struct wined3d_bo_address data;
2398     struct wined3d_format format;
2399     UINT update_w, update_h;
2400     CONVERT_TYPES convert;
2401     UINT dst_w, dst_h;
2402     UINT src_w, src_h;
2403     UINT src_pitch;
2404     POINT p;
2405     RECT r;
2406
2407     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2408             dst_surface, wine_dbgstr_point(dst_point),
2409             src_surface, wine_dbgstr_rect(src_rect));
2410
2411     src_format = src_surface->resource.format;
2412     dst_format = dst_surface->resource.format;
2413
2414     if (src_format->id != dst_format->id)
2415     {
2416         WARN("Source and destination surfaces should have the same format.\n");
2417         return WINED3DERR_INVALIDCALL;
2418     }
2419
2420     if (!dst_point)
2421     {
2422         p.x = 0;
2423         p.y = 0;
2424         dst_point = &p;
2425     }
2426     else if (dst_point->x < 0 || dst_point->y < 0)
2427     {
2428         WARN("Invalid destination point.\n");
2429         return WINED3DERR_INVALIDCALL;
2430     }
2431
2432     if (!src_rect)
2433     {
2434         r.left = 0;
2435         r.top = 0;
2436         r.right = src_surface->resource.width;
2437         r.bottom = src_surface->resource.height;
2438         src_rect = &r;
2439     }
2440     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2441             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2442     {
2443         WARN("Invalid source rectangle.\n");
2444         return WINED3DERR_INVALIDCALL;
2445     }
2446
2447     src_w = src_surface->resource.width;
2448     src_h = src_surface->resource.height;
2449
2450     dst_w = dst_surface->resource.width;
2451     dst_h = dst_surface->resource.height;
2452
2453     update_w = src_rect->right - src_rect->left;
2454     update_h = src_rect->bottom - src_rect->top;
2455
2456     if (update_w > dst_w || dst_point->x > dst_w - update_w
2457             || update_h > dst_h || dst_point->y > dst_h - update_h)
2458     {
2459         WARN("Destination out of bounds.\n");
2460         return WINED3DERR_INVALIDCALL;
2461     }
2462
2463     /* NPOT block sizes would be silly. */
2464     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2465             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2466             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2467     {
2468         WARN("Update rect not block-aligned.\n");
2469         return WINED3DERR_INVALIDCALL;
2470     }
2471
2472     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2473     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2474     if (convert != NO_CONVERSION || format.convert)
2475     {
2476         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2477         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2478     }
2479
2480     context = context_acquire(dst_surface->resource.device, NULL);
2481     gl_info = context->gl_info;
2482
2483     /* Only load the surface for partial updates. For newly allocated texture
2484      * the texture wouldn't be the current location, and we'd upload zeroes
2485      * just to overwrite them again. */
2486     if (update_w == dst_w && update_h == dst_h)
2487         surface_prepare_texture(dst_surface, context, FALSE);
2488     else
2489         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2490     surface_bind(dst_surface, context, FALSE);
2491
2492     data.buffer_object = src_surface->pbo;
2493     data.addr = src_surface->resource.allocatedMemory;
2494     src_pitch = wined3d_surface_get_pitch(src_surface);
2495
2496     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2497
2498     invalidate_active_texture(dst_surface->resource.device, context);
2499
2500     context_release(context);
2501
2502     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2503     return WINED3D_OK;
2504 }
2505
2506 /* This call just allocates the texture, the caller is responsible for binding
2507  * the correct texture. */
2508 /* Context activation is done by the caller. */
2509 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2510         const struct wined3d_format *format, BOOL srgb)
2511 {
2512     BOOL enable_client_storage = FALSE;
2513     GLsizei width = surface->pow2Width;
2514     GLsizei height = surface->pow2Height;
2515     const BYTE *mem = NULL;
2516     GLenum internal;
2517
2518     if (srgb)
2519     {
2520         internal = format->glGammaInternal;
2521     }
2522     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2523     {
2524         internal = format->rtInternal;
2525     }
2526     else
2527     {
2528         internal = format->glInternal;
2529     }
2530
2531     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2532
2533     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2534             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2535             internal, width, height, format->glFormat, format->glType);
2536
2537     ENTER_GL();
2538
2539     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2540     {
2541         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2542                 || !surface->resource.allocatedMemory)
2543         {
2544             /* In some cases we want to disable client storage.
2545              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2546              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2547              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2548              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2549              */
2550             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2551             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2552             surface->flags &= ~SFLAG_CLIENT;
2553             enable_client_storage = TRUE;
2554         }
2555         else
2556         {
2557             surface->flags |= SFLAG_CLIENT;
2558
2559             /* Point OpenGL to our allocated texture memory. Do not use
2560              * resource.allocatedMemory here because it might point into a
2561              * PBO. Instead use heapMemory, but get the alignment right. */
2562             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2563                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2564         }
2565     }
2566
2567     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2568     {
2569         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2570                 internal, width, height, 0, surface->resource.size, mem));
2571         checkGLcall("glCompressedTexImage2DARB");
2572     }
2573     else
2574     {
2575         glTexImage2D(surface->texture_target, surface->texture_level,
2576                 internal, width, height, 0, format->glFormat, format->glType, mem);
2577         checkGLcall("glTexImage2D");
2578     }
2579
2580     if(enable_client_storage) {
2581         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2582         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2583     }
2584     LEAVE_GL();
2585 }
2586
2587 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2588  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2589 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2590 /* GL locking is done by the caller */
2591 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2592 {
2593     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2594     struct wined3d_renderbuffer_entry *entry;
2595     GLuint renderbuffer = 0;
2596     unsigned int src_width, src_height;
2597     unsigned int width, height;
2598
2599     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2600     {
2601         width = rt->pow2Width;
2602         height = rt->pow2Height;
2603     }
2604     else
2605     {
2606         width = surface->pow2Width;
2607         height = surface->pow2Height;
2608     }
2609
2610     src_width = surface->pow2Width;
2611     src_height = surface->pow2Height;
2612
2613     /* A depth stencil smaller than the render target is not valid */
2614     if (width > src_width || height > src_height) return;
2615
2616     /* Remove any renderbuffer set if the sizes match */
2617     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2618             || (width == src_width && height == src_height))
2619     {
2620         surface->current_renderbuffer = NULL;
2621         return;
2622     }
2623
2624     /* Look if we've already got a renderbuffer of the correct dimensions */
2625     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2626     {
2627         if (entry->width == width && entry->height == height)
2628         {
2629             renderbuffer = entry->id;
2630             surface->current_renderbuffer = entry;
2631             break;
2632         }
2633     }
2634
2635     if (!renderbuffer)
2636     {
2637         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2638         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2639         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2640                 surface->resource.format->glInternal, width, height);
2641
2642         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2643         entry->width = width;
2644         entry->height = height;
2645         entry->id = renderbuffer;
2646         list_add_head(&surface->renderbuffers, &entry->entry);
2647
2648         surface->current_renderbuffer = entry;
2649     }
2650
2651     checkGLcall("set_compatible_renderbuffer");
2652 }
2653
2654 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2655 {
2656     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2657
2658     TRACE("surface %p.\n", surface);
2659
2660     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2661     {
2662         ERR("Surface %p is not on a swapchain.\n", surface);
2663         return GL_NONE;
2664     }
2665
2666     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2667     {
2668         if (swapchain->render_to_fbo)
2669         {
2670             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2671             return GL_COLOR_ATTACHMENT0;
2672         }
2673         TRACE("Returning GL_BACK\n");
2674         return GL_BACK;
2675     }
2676     else if (surface == swapchain->front_buffer)
2677     {
2678         TRACE("Returning GL_FRONT\n");
2679         return GL_FRONT;
2680     }
2681
2682     FIXME("Higher back buffer, returning GL_BACK\n");
2683     return GL_BACK;
2684 }
2685
2686 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2687 void surface_add_dirty_rect(struct wined3d_surface *surface, const struct wined3d_box *dirty_rect)
2688 {
2689     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2690
2691     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2692         /* No partial locking for textures yet. */
2693         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2694
2695     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2696     if (dirty_rect)
2697     {
2698         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->left);
2699         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->top);
2700         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->right);
2701         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->bottom);
2702     }
2703     else
2704     {
2705         surface->dirtyRect.left = 0;
2706         surface->dirtyRect.top = 0;
2707         surface->dirtyRect.right = surface->resource.width;
2708         surface->dirtyRect.bottom = surface->resource.height;
2709     }
2710
2711     /* if the container is a texture then mark it dirty. */
2712     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2713     {
2714         TRACE("Passing to container.\n");
2715         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2716     }
2717 }
2718
2719 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2720 {
2721     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2722     BOOL ck_changed;
2723
2724     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2725
2726     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2727     {
2728         ERR("Not supported on scratch surfaces.\n");
2729         return WINED3DERR_INVALIDCALL;
2730     }
2731
2732     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2733
2734     /* Reload if either the texture and sysmem have different ideas about the
2735      * color key, or the actual key values changed. */
2736     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2737             && (surface->gl_color_key.color_space_low_value != surface->src_blt_color_key.color_space_low_value
2738             || surface->gl_color_key.color_space_high_value != surface->src_blt_color_key.color_space_high_value)))
2739     {
2740         TRACE("Reloading because of color keying\n");
2741         /* To perform the color key conversion we need a sysmem copy of
2742          * the surface. Make sure we have it. */
2743
2744         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2745         /* Make sure the texture is reloaded because of the color key change,
2746          * this kills performance though :( */
2747         /* TODO: This is not necessarily needed with hw palettized texture support. */
2748         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2749         /* Switching color keying on / off may change the internal format. */
2750         if (ck_changed)
2751             surface_force_reload(surface);
2752     }
2753     else if (!(surface->flags & flag))
2754     {
2755         TRACE("Reloading because surface is dirty.\n");
2756     }
2757     else
2758     {
2759         TRACE("surface is already in texture\n");
2760         return WINED3D_OK;
2761     }
2762
2763     /* No partial locking for textures yet. */
2764     surface_load_location(surface, flag, NULL);
2765     surface_evict_sysmem(surface);
2766
2767     return WINED3D_OK;
2768 }
2769
2770 /* See also float_16_to_32() in wined3d_private.h */
2771 static inline unsigned short float_32_to_16(const float *in)
2772 {
2773     int exp = 0;
2774     float tmp = fabsf(*in);
2775     unsigned int mantissa;
2776     unsigned short ret;
2777
2778     /* Deal with special numbers */
2779     if (*in == 0.0f)
2780         return 0x0000;
2781     if (isnan(*in))
2782         return 0x7c01;
2783     if (isinf(*in))
2784         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2785
2786     if (tmp < powf(2, 10))
2787     {
2788         do
2789         {
2790             tmp = tmp * 2.0f;
2791             exp--;
2792         } while (tmp < powf(2, 10));
2793     }
2794     else if (tmp >= powf(2, 11))
2795     {
2796         do
2797         {
2798             tmp /= 2.0f;
2799             exp++;
2800         } while (tmp >= powf(2, 11));
2801     }
2802
2803     mantissa = (unsigned int)tmp;
2804     if (tmp - mantissa >= 0.5f)
2805         ++mantissa; /* Round to nearest, away from zero. */
2806
2807     exp += 10;  /* Normalize the mantissa. */
2808     exp += 15;  /* Exponent is encoded with excess 15. */
2809
2810     if (exp > 30) /* too big */
2811     {
2812         ret = 0x7c00; /* INF */
2813     }
2814     else if (exp <= 0)
2815     {
2816         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2817         while (exp <= 0)
2818         {
2819             mantissa = mantissa >> 1;
2820             ++exp;
2821         }
2822         ret = mantissa & 0x3ff;
2823     }
2824     else
2825     {
2826         ret = (exp << 10) | (mantissa & 0x3ff);
2827     }
2828
2829     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2830     return ret;
2831 }
2832
2833 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2834 {
2835     ULONG refcount;
2836
2837     TRACE("Surface %p, container %p of type %#x.\n",
2838             surface, surface->container.u.base, surface->container.type);
2839
2840     switch (surface->container.type)
2841     {
2842         case WINED3D_CONTAINER_TEXTURE:
2843             return wined3d_texture_incref(surface->container.u.texture);
2844
2845         case WINED3D_CONTAINER_SWAPCHAIN:
2846             return wined3d_swapchain_incref(surface->container.u.swapchain);
2847
2848         default:
2849             ERR("Unhandled container type %#x.\n", surface->container.type);
2850         case WINED3D_CONTAINER_NONE:
2851             break;
2852     }
2853
2854     refcount = InterlockedIncrement(&surface->resource.ref);
2855     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2856
2857     return refcount;
2858 }
2859
2860 /* Do not call while under the GL lock. */
2861 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2862 {
2863     ULONG refcount;
2864
2865     TRACE("Surface %p, container %p of type %#x.\n",
2866             surface, surface->container.u.base, surface->container.type);
2867
2868     switch (surface->container.type)
2869     {
2870         case WINED3D_CONTAINER_TEXTURE:
2871             return wined3d_texture_decref(surface->container.u.texture);
2872
2873         case WINED3D_CONTAINER_SWAPCHAIN:
2874             return wined3d_swapchain_decref(surface->container.u.swapchain);
2875
2876         default:
2877             ERR("Unhandled container type %#x.\n", surface->container.type);
2878         case WINED3D_CONTAINER_NONE:
2879             break;
2880     }
2881
2882     refcount = InterlockedDecrement(&surface->resource.ref);
2883     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2884
2885     if (!refcount)
2886     {
2887         surface_cleanup(surface);
2888         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2889
2890         TRACE("Destroyed surface %p.\n", surface);
2891         HeapFree(GetProcessHeap(), 0, surface);
2892     }
2893
2894     return refcount;
2895 }
2896
2897 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2898 {
2899     return resource_set_priority(&surface->resource, priority);
2900 }
2901
2902 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2903 {
2904     return resource_get_priority(&surface->resource);
2905 }
2906
2907 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2908 {
2909     TRACE("surface %p.\n", surface);
2910
2911     if (!surface->resource.device->d3d_initialized)
2912     {
2913         ERR("D3D not initialized.\n");
2914         return;
2915     }
2916
2917     surface_internal_preload(surface, SRGB_ANY);
2918 }
2919
2920 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2921 {
2922     TRACE("surface %p.\n", surface);
2923
2924     return surface->resource.parent;
2925 }
2926
2927 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2928 {
2929     TRACE("surface %p.\n", surface);
2930
2931     return &surface->resource;
2932 }
2933
2934 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2935 {
2936     TRACE("surface %p, flags %#x.\n", surface, flags);
2937
2938     switch (flags)
2939     {
2940         case WINEDDGBS_CANBLT:
2941         case WINEDDGBS_ISBLTDONE:
2942             return WINED3D_OK;
2943
2944         default:
2945             return WINED3DERR_INVALIDCALL;
2946     }
2947 }
2948
2949 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2950 {
2951     TRACE("surface %p, flags %#x.\n", surface, flags);
2952
2953     /* XXX: DDERR_INVALIDSURFACETYPE */
2954
2955     switch (flags)
2956     {
2957         case WINEDDGFS_CANFLIP:
2958         case WINEDDGFS_ISFLIPDONE:
2959             return WINED3D_OK;
2960
2961         default:
2962             return WINED3DERR_INVALIDCALL;
2963     }
2964 }
2965
2966 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2967 {
2968     TRACE("surface %p.\n", surface);
2969
2970     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2971     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2972 }
2973
2974 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2975 {
2976     TRACE("surface %p.\n", surface);
2977
2978     surface->flags &= ~SFLAG_LOST;
2979     return WINED3D_OK;
2980 }
2981
2982 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2983 {
2984     TRACE("surface %p, palette %p.\n", surface, palette);
2985
2986     if (surface->palette == palette)
2987     {
2988         TRACE("Nop palette change.\n");
2989         return WINED3D_OK;
2990     }
2991
2992     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2993         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2994
2995     surface->palette = palette;
2996
2997     if (palette)
2998     {
2999         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3000             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3001
3002         surface->surface_ops->surface_realize_palette(surface);
3003     }
3004
3005     return WINED3D_OK;
3006 }
3007
3008 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3009         DWORD flags, const struct wined3d_color_key *color_key)
3010 {
3011     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3012
3013     if (flags & WINEDDCKEY_COLORSPACE)
3014     {
3015         FIXME(" colorkey value not supported (%08x) !\n", flags);
3016         return WINED3DERR_INVALIDCALL;
3017     }
3018
3019     /* Dirtify the surface, but only if a key was changed. */
3020     if (color_key)
3021     {
3022         switch (flags & ~WINEDDCKEY_COLORSPACE)
3023         {
3024             case WINEDDCKEY_DESTBLT:
3025                 surface->dst_blt_color_key = *color_key;
3026                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3027                 break;
3028
3029             case WINEDDCKEY_DESTOVERLAY:
3030                 surface->dst_overlay_color_key = *color_key;
3031                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3032                 break;
3033
3034             case WINEDDCKEY_SRCOVERLAY:
3035                 surface->src_overlay_color_key = *color_key;
3036                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3037                 break;
3038
3039             case WINEDDCKEY_SRCBLT:
3040                 surface->src_blt_color_key = *color_key;
3041                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3042                 break;
3043         }
3044     }
3045     else
3046     {
3047         switch (flags & ~WINEDDCKEY_COLORSPACE)
3048         {
3049             case WINEDDCKEY_DESTBLT:
3050                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3051                 break;
3052
3053             case WINEDDCKEY_DESTOVERLAY:
3054                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3055                 break;
3056
3057             case WINEDDCKEY_SRCOVERLAY:
3058                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3059                 break;
3060
3061             case WINEDDCKEY_SRCBLT:
3062                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3063                 break;
3064         }
3065     }
3066
3067     return WINED3D_OK;
3068 }
3069
3070 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3071 {
3072     TRACE("surface %p.\n", surface);
3073
3074     return surface->palette;
3075 }
3076
3077 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3078 {
3079     const struct wined3d_format *format = surface->resource.format;
3080     DWORD pitch;
3081
3082     TRACE("surface %p.\n", surface);
3083
3084     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3085     {
3086         /* Since compressed formats are block based, pitch means the amount of
3087          * bytes to the next row of block rather than the next row of pixels. */
3088         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3089         pitch = row_block_count * format->block_byte_count;
3090     }
3091     else
3092     {
3093         unsigned char alignment = surface->resource.device->surface_alignment;
3094         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3095         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3096     }
3097
3098     TRACE("Returning %u.\n", pitch);
3099
3100     return pitch;
3101 }
3102
3103 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3104 {
3105     TRACE("surface %p, mem %p.\n", surface, mem);
3106
3107     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3108     {
3109         WARN("Surface is locked or the DC is in use.\n");
3110         return WINED3DERR_INVALIDCALL;
3111     }
3112
3113     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3114     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3115     {
3116         ERR("Not supported on render targets.\n");
3117         return WINED3DERR_INVALIDCALL;
3118     }
3119
3120     if (mem && mem != surface->resource.allocatedMemory)
3121     {
3122         void *release = NULL;
3123
3124         /* Do I have to copy the old surface content? */
3125         if (surface->flags & SFLAG_DIBSECTION)
3126         {
3127             DeleteDC(surface->hDC);
3128             DeleteObject(surface->dib.DIBsection);
3129             surface->dib.bitmap_data = NULL;
3130             surface->resource.allocatedMemory = NULL;
3131             surface->hDC = NULL;
3132             surface->flags &= ~SFLAG_DIBSECTION;
3133         }
3134         else if (!(surface->flags & SFLAG_USERPTR))
3135         {
3136             release = surface->resource.heapMemory;
3137             surface->resource.heapMemory = NULL;
3138         }
3139         surface->resource.allocatedMemory = mem;
3140         surface->flags |= SFLAG_USERPTR;
3141
3142         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3143         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3144
3145         /* For client textures OpenGL has to be notified. */
3146         if (surface->flags & SFLAG_CLIENT)
3147             surface_release_client_storage(surface);
3148
3149         /* Now free the old memory if any. */
3150         HeapFree(GetProcessHeap(), 0, release);
3151     }
3152     else if (surface->flags & SFLAG_USERPTR)
3153     {
3154         /* HeapMemory should be NULL already. */
3155         if (surface->resource.heapMemory)
3156             ERR("User pointer surface has heap memory allocated.\n");
3157
3158         if (!mem)
3159         {
3160             surface->resource.allocatedMemory = NULL;
3161             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3162
3163             if (surface->flags & SFLAG_CLIENT)
3164                 surface_release_client_storage(surface);
3165
3166             surface_prepare_system_memory(surface);
3167         }
3168
3169         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3170     }
3171
3172     return WINED3D_OK;
3173 }
3174
3175 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3176 {
3177     LONG w, h;
3178
3179     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3180
3181     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3182     {
3183         WARN("Not an overlay surface.\n");
3184         return WINEDDERR_NOTAOVERLAYSURFACE;
3185     }
3186
3187     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3188     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3189     surface->overlay_destrect.left = x;
3190     surface->overlay_destrect.top = y;
3191     surface->overlay_destrect.right = x + w;
3192     surface->overlay_destrect.bottom = y + h;
3193
3194     surface_draw_overlay(surface);
3195
3196     return WINED3D_OK;
3197 }
3198
3199 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3200 {
3201     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3202
3203     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3204     {
3205         TRACE("Not an overlay surface.\n");
3206         return WINEDDERR_NOTAOVERLAYSURFACE;
3207     }
3208
3209     if (!surface->overlay_dest)
3210     {
3211         TRACE("Overlay not visible.\n");
3212         *x = 0;
3213         *y = 0;
3214         return WINEDDERR_OVERLAYNOTVISIBLE;
3215     }
3216
3217     *x = surface->overlay_destrect.left;
3218     *y = surface->overlay_destrect.top;
3219
3220     TRACE("Returning position %d, %d.\n", *x, *y);
3221
3222     return WINED3D_OK;
3223 }
3224
3225 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3226         DWORD flags, struct wined3d_surface *ref)
3227 {
3228     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3229
3230     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3231     {
3232         TRACE("Not an overlay surface.\n");
3233         return WINEDDERR_NOTAOVERLAYSURFACE;
3234     }
3235
3236     return WINED3D_OK;
3237 }
3238
3239 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3240         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3241 {
3242     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3243             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3244
3245     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3246     {
3247         WARN("Not an overlay surface.\n");
3248         return WINEDDERR_NOTAOVERLAYSURFACE;
3249     }
3250     else if (!dst_surface)
3251     {
3252         WARN("Dest surface is NULL.\n");
3253         return WINED3DERR_INVALIDCALL;
3254     }
3255
3256     if (src_rect)
3257     {
3258         surface->overlay_srcrect = *src_rect;
3259     }
3260     else
3261     {
3262         surface->overlay_srcrect.left = 0;
3263         surface->overlay_srcrect.top = 0;
3264         surface->overlay_srcrect.right = surface->resource.width;
3265         surface->overlay_srcrect.bottom = surface->resource.height;
3266     }
3267
3268     if (dst_rect)
3269     {
3270         surface->overlay_destrect = *dst_rect;
3271     }
3272     else
3273     {
3274         surface->overlay_destrect.left = 0;
3275         surface->overlay_destrect.top = 0;
3276         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3277         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3278     }
3279
3280     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3281     {
3282         surface->overlay_dest = NULL;
3283         list_remove(&surface->overlay_entry);
3284     }
3285
3286     if (flags & WINEDDOVER_SHOW)
3287     {
3288         if (surface->overlay_dest != dst_surface)
3289         {
3290             surface->overlay_dest = dst_surface;
3291             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3292         }
3293     }
3294     else if (flags & WINEDDOVER_HIDE)
3295     {
3296         /* tests show that the rectangles are erased on hide */
3297         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3298         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3299         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3300         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3301         surface->overlay_dest = NULL;
3302     }
3303
3304     surface_draw_overlay(surface);
3305
3306     return WINED3D_OK;
3307 }
3308
3309 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3310 {
3311     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3312
3313     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3314
3315     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3316     {
3317         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3318         return WINED3DERR_INVALIDCALL;
3319     }
3320
3321     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3322             surface->pow2Width, surface->pow2Height);
3323     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3324     surface->resource.format = format;
3325
3326     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3327     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3328             format->glFormat, format->glInternal, format->glType);
3329
3330     return WINED3D_OK;
3331 }
3332
3333 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3334         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3335 {
3336     unsigned short *dst_s;
3337     const float *src_f;
3338     unsigned int x, y;
3339
3340     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3341
3342     for (y = 0; y < h; ++y)
3343     {
3344         src_f = (const float *)(src + y * pitch_in);
3345         dst_s = (unsigned short *) (dst + y * pitch_out);
3346         for (x = 0; x < w; ++x)
3347         {
3348             dst_s[x] = float_32_to_16(src_f + x);
3349         }
3350     }
3351 }
3352
3353 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3354         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3355 {
3356     static const unsigned char convert_5to8[] =
3357     {
3358         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3359         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3360         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3361         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3362     };
3363     static const unsigned char convert_6to8[] =
3364     {
3365         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3366         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3367         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3368         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3369         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3370         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3371         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3372         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3373     };
3374     unsigned int x, y;
3375
3376     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3377
3378     for (y = 0; y < h; ++y)
3379     {
3380         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3381         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3382         for (x = 0; x < w; ++x)
3383         {
3384             WORD pixel = src_line[x];
3385             dst_line[x] = 0xff000000
3386                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3387                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3388                     | convert_5to8[(pixel & 0x001f)];
3389         }
3390     }
3391 }
3392
3393 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3394  * in both cases we're just setting the X / Alpha channel to 0xff. */
3395 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3396         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3397 {
3398     unsigned int x, y;
3399
3400     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3401
3402     for (y = 0; y < h; ++y)
3403     {
3404         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3405         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3406
3407         for (x = 0; x < w; ++x)
3408         {
3409             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3410         }
3411     }
3412 }
3413
3414 static inline BYTE cliptobyte(int x)
3415 {
3416     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3417 }
3418
3419 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3420         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3421 {
3422     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3423     unsigned int x, y;
3424
3425     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3426
3427     for (y = 0; y < h; ++y)
3428     {
3429         const BYTE *src_line = src + y * pitch_in;
3430         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3431         for (x = 0; x < w; ++x)
3432         {
3433             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3434              *     C = Y - 16; D = U - 128; E = V - 128;
3435              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3436              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3437              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3438              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3439              * U and V are shared between the pixels. */
3440             if (!(x & 1)) /* For every even pixel, read new U and V. */
3441             {
3442                 d = (int) src_line[1] - 128;
3443                 e = (int) src_line[3] - 128;
3444                 r2 = 409 * e + 128;
3445                 g2 = - 100 * d - 208 * e + 128;
3446                 b2 = 516 * d + 128;
3447             }
3448             c2 = 298 * ((int) src_line[0] - 16);
3449             dst_line[x] = 0xff000000
3450                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3451                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3452                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3453                 /* Scale RGB values to 0..255 range,
3454                  * then clip them if still not in range (may be negative),
3455                  * then shift them within DWORD if necessary. */
3456             src_line += 2;
3457         }
3458     }
3459 }
3460
3461 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3462         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3463 {
3464     unsigned int x, y;
3465     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3466
3467     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3468
3469     for (y = 0; y < h; ++y)
3470     {
3471         const BYTE *src_line = src + y * pitch_in;
3472         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3473         for (x = 0; x < w; ++x)
3474         {
3475             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3476              *     C = Y - 16; D = U - 128; E = V - 128;
3477              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3478              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3479              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3480              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3481              * U and V are shared between the pixels. */
3482             if (!(x & 1)) /* For every even pixel, read new U and V. */
3483             {
3484                 d = (int) src_line[1] - 128;
3485                 e = (int) src_line[3] - 128;
3486                 r2 = 409 * e + 128;
3487                 g2 = - 100 * d - 208 * e + 128;
3488                 b2 = 516 * d + 128;
3489             }
3490             c2 = 298 * ((int) src_line[0] - 16);
3491             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3492                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3493                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3494                 /* Scale RGB values to 0..255 range,
3495                  * then clip them if still not in range (may be negative),
3496                  * then shift them within DWORD if necessary. */
3497             src_line += 2;
3498         }
3499     }
3500 }
3501
3502 struct d3dfmt_convertor_desc
3503 {
3504     enum wined3d_format_id from, to;
3505     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3506 };
3507
3508 static const struct d3dfmt_convertor_desc convertors[] =
3509 {
3510     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3511     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3512     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3513     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3514     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3515     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3516 };
3517
3518 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3519         enum wined3d_format_id to)
3520 {
3521     unsigned int i;
3522
3523     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3524     {
3525         if (convertors[i].from == from && convertors[i].to == to)
3526             return &convertors[i];
3527     }
3528
3529     return NULL;
3530 }
3531
3532 /*****************************************************************************
3533  * surface_convert_format
3534  *
3535  * Creates a duplicate of a surface in a different format. Is used by Blt to
3536  * blit between surfaces with different formats.
3537  *
3538  * Parameters
3539  *  source: Source surface
3540  *  fmt: Requested destination format
3541  *
3542  *****************************************************************************/
3543 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3544 {
3545     struct wined3d_mapped_rect src_map, dst_map;
3546     const struct d3dfmt_convertor_desc *conv;
3547     struct wined3d_surface *ret = NULL;
3548     HRESULT hr;
3549
3550     conv = find_convertor(source->resource.format->id, to_fmt);
3551     if (!conv)
3552     {
3553         FIXME("Cannot find a conversion function from format %s to %s.\n",
3554                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3555         return NULL;
3556     }
3557
3558     wined3d_surface_create(source->resource.device, source->resource.width,
3559             source->resource.height, to_fmt, 0 /* level */, 0 /* usage */, WINED3DPOOL_SCRATCH,
3560             WINED3D_MULTISAMPLE_NONE /* TODO: Multisampled conversion */, 0 /* MultiSampleQuality */,
3561             source->surface_type, WINED3D_SURFACE_MAPPABLE | WINED3D_SURFACE_DISCARD,
3562             NULL /* parent */, &wined3d_null_parent_ops, &ret);
3563     if (!ret)
3564     {
3565         ERR("Failed to create a destination surface for conversion.\n");
3566         return NULL;
3567     }
3568
3569     memset(&src_map, 0, sizeof(src_map));
3570     memset(&dst_map, 0, sizeof(dst_map));
3571
3572     hr = wined3d_surface_map(source, &src_map, NULL, WINED3DLOCK_READONLY);
3573     if (FAILED(hr))
3574     {
3575         ERR("Failed to lock the source surface.\n");
3576         wined3d_surface_decref(ret);
3577         return NULL;
3578     }
3579     hr = wined3d_surface_map(ret, &dst_map, NULL, WINED3DLOCK_READONLY);
3580     if (FAILED(hr))
3581     {
3582         ERR("Failed to lock the destination surface.\n");
3583         wined3d_surface_unmap(source);
3584         wined3d_surface_decref(ret);
3585         return NULL;
3586     }
3587
3588     conv->convert(src_map.data, dst_map.data, src_map.row_pitch, dst_map.row_pitch,
3589             source->resource.width, source->resource.height);
3590
3591     wined3d_surface_unmap(ret);
3592     wined3d_surface_unmap(source);
3593
3594     return ret;
3595 }
3596
3597 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3598         unsigned int bpp, UINT pitch, DWORD color)
3599 {
3600     BYTE *first;
3601     int x, y;
3602
3603     /* Do first row */
3604
3605 #define COLORFILL_ROW(type) \
3606 do { \
3607     type *d = (type *)buf; \
3608     for (x = 0; x < width; ++x) \
3609         d[x] = (type)color; \
3610 } while(0)
3611
3612     switch (bpp)
3613     {
3614         case 1:
3615             COLORFILL_ROW(BYTE);
3616             break;
3617
3618         case 2:
3619             COLORFILL_ROW(WORD);
3620             break;
3621
3622         case 3:
3623         {
3624             BYTE *d = buf;
3625             for (x = 0; x < width; ++x, d += 3)
3626             {
3627                 d[0] = (color      ) & 0xFF;
3628                 d[1] = (color >>  8) & 0xFF;
3629                 d[2] = (color >> 16) & 0xFF;
3630             }
3631             break;
3632         }
3633         case 4:
3634             COLORFILL_ROW(DWORD);
3635             break;
3636
3637         default:
3638             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3639             return WINED3DERR_NOTAVAILABLE;
3640     }
3641
3642 #undef COLORFILL_ROW
3643
3644     /* Now copy first row. */
3645     first = buf;
3646     for (y = 1; y < height; ++y)
3647     {
3648         buf += pitch;
3649         memcpy(buf, first, width * bpp);
3650     }
3651
3652     return WINED3D_OK;
3653 }
3654
3655 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3656 {
3657     TRACE("surface %p.\n", surface);
3658
3659     if (!(surface->flags & SFLAG_LOCKED))
3660     {
3661         WARN("Trying to unmap unmapped surface.\n");
3662         return WINEDDERR_NOTLOCKED;
3663     }
3664     surface->flags &= ~SFLAG_LOCKED;
3665
3666     surface->surface_ops->surface_unmap(surface);
3667
3668     return WINED3D_OK;
3669 }
3670
3671 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3672         struct wined3d_mapped_rect *mapped_rect, const RECT *rect, DWORD flags)
3673 {
3674     const struct wined3d_format *format = surface->resource.format;
3675
3676     TRACE("surface %p, mapped_rect %p, rect %s, flags %#x.\n",
3677             surface, mapped_rect, wine_dbgstr_rect(rect), flags);
3678
3679     if (surface->flags & SFLAG_LOCKED)
3680     {
3681         WARN("Surface is already mapped.\n");
3682         return WINED3DERR_INVALIDCALL;
3683     }
3684     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3685             && rect && (rect->left || rect->top
3686             || rect->right != surface->resource.width
3687             || rect->bottom != surface->resource.height))
3688     {
3689         UINT width_mask = format->block_width - 1;
3690         UINT height_mask = format->block_height - 1;
3691
3692         if ((rect->left & width_mask) || (rect->right & width_mask)
3693                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3694         {
3695             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3696                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3697
3698             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3699                 return WINED3DERR_INVALIDCALL;
3700         }
3701     }
3702
3703     surface->flags |= SFLAG_LOCKED;
3704
3705     if (!(surface->flags & SFLAG_LOCKABLE))
3706         WARN("Trying to lock unlockable surface.\n");
3707
3708     /* Performance optimization: Count how often a surface is mapped, if it is
3709      * mapped regularly do not throw away the system memory copy. This avoids
3710      * the need to download the surface from OpenGL all the time. The surface
3711      * is still downloaded if the OpenGL texture is changed. */
3712     if (!(surface->flags & SFLAG_DYNLOCK))
3713     {
3714         if (++surface->lockCount > MAXLOCKCOUNT)
3715         {
3716             TRACE("Surface is mapped regularly, not freeing the system memory copy any more.\n");
3717             surface->flags |= SFLAG_DYNLOCK;
3718         }
3719     }
3720
3721     surface->surface_ops->surface_map(surface, rect, flags);
3722
3723     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3724         mapped_rect->row_pitch = surface->resource.width * format->byte_count;
3725     else
3726         mapped_rect->row_pitch = wined3d_surface_get_pitch(surface);
3727
3728     if (!rect)
3729     {
3730         mapped_rect->data = surface->resource.allocatedMemory;
3731         surface->lockedRect.left = 0;
3732         surface->lockedRect.top = 0;
3733         surface->lockedRect.right = surface->resource.width;
3734         surface->lockedRect.bottom = surface->resource.height;
3735     }
3736     else
3737     {
3738         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3739         {
3740             /* Compressed textures are block based, so calculate the offset of
3741              * the block that contains the top-left pixel of the locked rectangle. */
3742             mapped_rect->data = surface->resource.allocatedMemory
3743                     + ((rect->top / format->block_height) * mapped_rect->row_pitch)
3744                     + ((rect->left / format->block_width) * format->block_byte_count);
3745         }
3746         else
3747         {
3748             mapped_rect->data = surface->resource.allocatedMemory
3749                     + (mapped_rect->row_pitch * rect->top)
3750                     + (rect->left * format->byte_count);
3751         }
3752         surface->lockedRect.left = rect->left;
3753         surface->lockedRect.top = rect->top;
3754         surface->lockedRect.right = rect->right;
3755         surface->lockedRect.bottom = rect->bottom;
3756     }
3757
3758     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3759     TRACE("Returning memory %p, pitch %u.\n", mapped_rect->data, mapped_rect->row_pitch);
3760
3761     return WINED3D_OK;
3762 }
3763
3764 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3765 {
3766     struct wined3d_mapped_rect map;
3767     HRESULT hr;
3768
3769     TRACE("surface %p, dc %p.\n", surface, dc);
3770
3771     if (surface->flags & SFLAG_USERPTR)
3772     {
3773         ERR("Not supported on surfaces with application-provided memory.\n");
3774         return WINEDDERR_NODC;
3775     }
3776
3777     /* Give more detailed info for ddraw. */
3778     if (surface->flags & SFLAG_DCINUSE)
3779         return WINEDDERR_DCALREADYCREATED;
3780
3781     /* Can't GetDC if the surface is locked. */
3782     if (surface->flags & SFLAG_LOCKED)
3783         return WINED3DERR_INVALIDCALL;
3784
3785     /* Create a DIB section if there isn't a dc yet. */
3786     if (!surface->hDC)
3787     {
3788         if (surface->flags & SFLAG_CLIENT)
3789         {
3790             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3791             surface_release_client_storage(surface);
3792         }
3793         hr = surface_create_dib_section(surface);
3794         if (FAILED(hr))
3795             return WINED3DERR_INVALIDCALL;
3796
3797         /* Use the DIB section from now on if we are not using a PBO. */
3798         if (!(surface->flags & SFLAG_PBO))
3799             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3800     }
3801
3802     /* Map the surface. */
3803     hr = wined3d_surface_map(surface, &map, NULL, 0);
3804     if (FAILED(hr))
3805     {
3806         ERR("Map failed, hr %#x.\n", hr);
3807         return hr;
3808     }
3809
3810     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3811      * activates the allocatedMemory. */
3812     if (surface->flags & SFLAG_PBO)
3813         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3814
3815     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3816             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3817     {
3818         /* GetDC on palettized formats is unsupported in D3D9, and the method
3819          * is missing in D3D8, so this should only be used for DX <=7
3820          * surfaces (with non-device palettes). */
3821         const PALETTEENTRY *pal = NULL;
3822
3823         if (surface->palette)
3824         {
3825             pal = surface->palette->palents;
3826         }
3827         else
3828         {
3829             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3830             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3831
3832             if (dds_primary && dds_primary->palette)
3833                 pal = dds_primary->palette->palents;
3834         }
3835
3836         if (pal)
3837         {
3838             RGBQUAD col[256];
3839             unsigned int i;
3840
3841             for (i = 0; i < 256; ++i)
3842             {
3843                 col[i].rgbRed = pal[i].peRed;
3844                 col[i].rgbGreen = pal[i].peGreen;
3845                 col[i].rgbBlue = pal[i].peBlue;
3846                 col[i].rgbReserved = 0;
3847             }
3848             SetDIBColorTable(surface->hDC, 0, 256, col);
3849         }
3850     }
3851
3852     surface->flags |= SFLAG_DCINUSE;
3853
3854     *dc = surface->hDC;
3855     TRACE("Returning dc %p.\n", *dc);
3856
3857     return WINED3D_OK;
3858 }
3859
3860 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3861 {
3862     TRACE("surface %p, dc %p.\n", surface, dc);
3863
3864     if (!(surface->flags & SFLAG_DCINUSE))
3865         return WINEDDERR_NODC;
3866
3867     if (surface->hDC != dc)
3868     {
3869         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3870                 dc, surface->hDC);
3871         return WINEDDERR_NODC;
3872     }
3873
3874     /* Copy the contents of the DIB over to the PBO. */
3875     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3876         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3877
3878     /* We locked first, so unlock now. */
3879     wined3d_surface_unmap(surface);
3880
3881     surface->flags &= ~SFLAG_DCINUSE;
3882
3883     return WINED3D_OK;
3884 }
3885
3886 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3887 {
3888     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3889
3890     if (flags)
3891     {
3892         static UINT once;
3893         if (!once++)
3894             FIXME("Ignoring flags %#x.\n", flags);
3895         else
3896             WARN("Ignoring flags %#x.\n", flags);
3897     }
3898
3899     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3900     {
3901         ERR("Not supported on swapchain surfaces.\n");
3902         return WINEDDERR_NOTFLIPPABLE;
3903     }
3904
3905     /* Flipping is only supported on render targets and overlays. */
3906     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3907     {
3908         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3909         return WINEDDERR_NOTFLIPPABLE;
3910     }
3911
3912     flip_surface(surface, override);
3913
3914     /* Update overlays if they're visible. */
3915     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3916         return surface_draw_overlay(surface);
3917
3918     return WINED3D_OK;
3919 }
3920
3921 /* Do not call while under the GL lock. */
3922 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3923 {
3924     struct wined3d_device *device = surface->resource.device;
3925
3926     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3927
3928     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3929     {
3930         struct wined3d_texture *texture = surface->container.u.texture;
3931
3932         TRACE("Passing to container (%p).\n", texture);
3933         texture->texture_ops->texture_preload(texture, srgb);
3934     }
3935     else
3936     {
3937         struct wined3d_context *context;
3938
3939         TRACE("(%p) : About to load surface\n", surface);
3940
3941         /* TODO: Use already acquired context when possible. */
3942         context = context_acquire(device, NULL);
3943
3944         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3945
3946         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3947         {
3948             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3949             GLclampf tmp;
3950             tmp = 0.9f;
3951             ENTER_GL();
3952             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3953             LEAVE_GL();
3954         }
3955
3956         context_release(context);
3957     }
3958 }
3959
3960 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3961 {
3962     if (!surface->resource.allocatedMemory)
3963     {
3964         if (!surface->resource.heapMemory)
3965         {
3966             if (!(surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3967                     surface->resource.size + RESOURCE_ALIGNMENT)))
3968             {
3969                 ERR("Failed to allocate memory.\n");
3970                 return FALSE;
3971             }
3972         }
3973         else if (!(surface->flags & SFLAG_CLIENT))
3974         {
3975             ERR("Surface %p has heapMemory %p and flags %#x.\n",
3976                     surface, surface->resource.heapMemory, surface->flags);
3977         }
3978
3979         surface->resource.allocatedMemory =
3980             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3981     }
3982     else
3983     {
3984         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3985     }
3986
3987     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3988
3989     return TRUE;
3990 }
3991
3992 /* Read the framebuffer back into the surface */
3993 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3994 {
3995     struct wined3d_device *device = surface->resource.device;
3996     const struct wined3d_gl_info *gl_info;
3997     struct wined3d_context *context;
3998     BYTE *mem;
3999     GLint fmt;
4000     GLint type;
4001     BYTE *row, *top, *bottom;
4002     int i;
4003     BOOL bpp;
4004     RECT local_rect;
4005     BOOL srcIsUpsideDown;
4006     GLint rowLen = 0;
4007     GLint skipPix = 0;
4008     GLint skipRow = 0;
4009
4010     context = context_acquire(device, surface);
4011     context_apply_blit_state(context, device);
4012     gl_info = context->gl_info;
4013
4014     ENTER_GL();
4015
4016     /* Select the correct read buffer, and give some debug output.
4017      * There is no need to keep track of the current read buffer or reset it, every part of the code
4018      * that reads sets the read buffer as desired.
4019      */
4020     if (surface_is_offscreen(surface))
4021     {
4022         /* Mapping the primary render target which is not on a swapchain.
4023          * Read from the back buffer. */
4024         TRACE("Mapping offscreen render target.\n");
4025         glReadBuffer(device->offscreenBuffer);
4026         srcIsUpsideDown = TRUE;
4027     }
4028     else
4029     {
4030         /* Onscreen surfaces are always part of a swapchain */
4031         GLenum buffer = surface_get_gl_buffer(surface);
4032         TRACE("Mapping %#x buffer.\n", buffer);
4033         glReadBuffer(buffer);
4034         checkGLcall("glReadBuffer");
4035         srcIsUpsideDown = FALSE;
4036     }
4037
4038     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4039     if (!rect)
4040     {
4041         local_rect.left = 0;
4042         local_rect.top = 0;
4043         local_rect.right = surface->resource.width;
4044         local_rect.bottom = surface->resource.height;
4045     }
4046     else
4047     {
4048         local_rect = *rect;
4049     }
4050     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4051
4052     switch (surface->resource.format->id)
4053     {
4054         case WINED3DFMT_P8_UINT:
4055         {
4056             if (primary_render_target_is_p8(device))
4057             {
4058                 /* In case of P8 render targets the index is stored in the alpha component */
4059                 fmt = GL_ALPHA;
4060                 type = GL_UNSIGNED_BYTE;
4061                 mem = dest;
4062                 bpp = surface->resource.format->byte_count;
4063             }
4064             else
4065             {
4066                 /* GL can't return palettized data, so read ARGB pixels into a
4067                  * separate block of memory and convert them into palettized format
4068                  * in software. Slow, but if the app means to use palettized render
4069                  * targets and locks it...
4070                  *
4071                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4072                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4073                  * for the color channels when palettizing the colors.
4074                  */
4075                 fmt = GL_RGB;
4076                 type = GL_UNSIGNED_BYTE;
4077                 pitch *= 3;
4078                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4079                 if (!mem)
4080                 {
4081                     ERR("Out of memory\n");
4082                     LEAVE_GL();
4083                     return;
4084                 }
4085                 bpp = surface->resource.format->byte_count * 3;
4086             }
4087         }
4088         break;
4089
4090         default:
4091             mem = dest;
4092             fmt = surface->resource.format->glFormat;
4093             type = surface->resource.format->glType;
4094             bpp = surface->resource.format->byte_count;
4095     }
4096
4097     if (surface->flags & SFLAG_PBO)
4098     {
4099         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4100         checkGLcall("glBindBufferARB");
4101         if (mem)
4102         {
4103             ERR("mem not null for pbo -- unexpected\n");
4104             mem = NULL;
4105         }
4106     }
4107
4108     /* Save old pixel store pack state */
4109     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4110     checkGLcall("glGetIntegerv");
4111     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4112     checkGLcall("glGetIntegerv");
4113     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4114     checkGLcall("glGetIntegerv");
4115
4116     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4117     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4118     checkGLcall("glPixelStorei");
4119     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4120     checkGLcall("glPixelStorei");
4121     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4122     checkGLcall("glPixelStorei");
4123
4124     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4125             local_rect.right - local_rect.left,
4126             local_rect.bottom - local_rect.top,
4127             fmt, type, mem);
4128     checkGLcall("glReadPixels");
4129
4130     /* Reset previous pixel store pack state */
4131     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4132     checkGLcall("glPixelStorei");
4133     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4134     checkGLcall("glPixelStorei");
4135     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4136     checkGLcall("glPixelStorei");
4137
4138     if (surface->flags & SFLAG_PBO)
4139     {
4140         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4141         checkGLcall("glBindBufferARB");
4142
4143         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4144          * to get a pointer to it and perform the flipping in software. This is a lot
4145          * faster than calling glReadPixels for each line. In case we want more speed
4146          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4147         if (!srcIsUpsideDown)
4148         {
4149             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4150             checkGLcall("glBindBufferARB");
4151
4152             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4153             checkGLcall("glMapBufferARB");
4154         }
4155     }
4156
4157     /* TODO: Merge this with the palettization loop below for P8 targets */
4158     if(!srcIsUpsideDown) {
4159         UINT len, off;
4160         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4161             Flip the lines in software */
4162         len = (local_rect.right - local_rect.left) * bpp;
4163         off = local_rect.left * bpp;
4164
4165         row = HeapAlloc(GetProcessHeap(), 0, len);
4166         if(!row) {
4167             ERR("Out of memory\n");
4168             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4169                 HeapFree(GetProcessHeap(), 0, mem);
4170             LEAVE_GL();
4171             return;
4172         }
4173
4174         top = mem + pitch * local_rect.top;
4175         bottom = mem + pitch * (local_rect.bottom - 1);
4176         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4177             memcpy(row, top + off, len);
4178             memcpy(top + off, bottom + off, len);
4179             memcpy(bottom + off, row, len);
4180             top += pitch;
4181             bottom -= pitch;
4182         }
4183         HeapFree(GetProcessHeap(), 0, row);
4184
4185         /* Unmap the temp PBO buffer */
4186         if (surface->flags & SFLAG_PBO)
4187         {
4188             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4189             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4190         }
4191     }
4192
4193     LEAVE_GL();
4194     context_release(context);
4195
4196     /* For P8 textures we need to perform an inverse palette lookup. This is
4197      * done by searching for a palette index which matches the RGB value.
4198      * Note this isn't guaranteed to work when there are multiple entries for
4199      * the same color but we have no choice. In case of P8 render targets,
4200      * the index is stored in the alpha component so no conversion is needed. */
4201     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4202     {
4203         const PALETTEENTRY *pal = NULL;
4204         DWORD width = pitch / 3;
4205         int x, y, c;
4206
4207         if (surface->palette)
4208         {
4209             pal = surface->palette->palents;
4210         }
4211         else
4212         {
4213             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4214             HeapFree(GetProcessHeap(), 0, mem);
4215             return;
4216         }
4217
4218         for(y = local_rect.top; y < local_rect.bottom; y++) {
4219             for(x = local_rect.left; x < local_rect.right; x++) {
4220                 /*                      start              lines            pixels      */
4221                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4222                 const BYTE *green = blue  + 1;
4223                 const BYTE *red = green + 1;
4224
4225                 for(c = 0; c < 256; c++) {
4226                     if(*red   == pal[c].peRed   &&
4227                        *green == pal[c].peGreen &&
4228                        *blue  == pal[c].peBlue)
4229                     {
4230                         *((BYTE *) dest + y * width + x) = c;
4231                         break;
4232                     }
4233                 }
4234             }
4235         }
4236         HeapFree(GetProcessHeap(), 0, mem);
4237     }
4238 }
4239
4240 /* Read the framebuffer contents into a texture. Note that this function
4241  * doesn't do any kind of flipping. Using this on an onscreen surface will
4242  * result in a flipped D3D texture. */
4243 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4244 {
4245     struct wined3d_device *device = surface->resource.device;
4246     struct wined3d_context *context;
4247
4248     context = context_acquire(device, surface);
4249     device_invalidate_state(device, STATE_FRAMEBUFFER);
4250
4251     surface_prepare_texture(surface, context, srgb);
4252     surface_bind_and_dirtify(surface, context, srgb);
4253
4254     TRACE("Reading back offscreen render target %p.\n", surface);
4255
4256     ENTER_GL();
4257
4258     if (surface_is_offscreen(surface))
4259         glReadBuffer(device->offscreenBuffer);
4260     else
4261         glReadBuffer(surface_get_gl_buffer(surface));
4262     checkGLcall("glReadBuffer");
4263
4264     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4265             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4266     checkGLcall("glCopyTexSubImage2D");
4267
4268     LEAVE_GL();
4269
4270     context_release(context);
4271 }
4272
4273 /* Context activation is done by the caller. */
4274 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4275         struct wined3d_context *context, BOOL srgb)
4276 {
4277     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4278     CONVERT_TYPES convert;
4279     struct wined3d_format format;
4280
4281     if (surface->flags & alloc_flag) return;
4282
4283     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4284     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4285     else surface->flags &= ~SFLAG_CONVERTED;
4286
4287     surface_bind_and_dirtify(surface, context, srgb);
4288     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4289     surface->flags |= alloc_flag;
4290 }
4291
4292 /* Context activation is done by the caller. */
4293 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4294 {
4295     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4296     {
4297         struct wined3d_texture *texture = surface->container.u.texture;
4298         UINT sub_count = texture->level_count * texture->layer_count;
4299         UINT i;
4300
4301         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4302
4303         for (i = 0; i < sub_count; ++i)
4304         {
4305             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4306             surface_prepare_texture_internal(s, context, srgb);
4307         }
4308
4309         return;
4310     }
4311
4312     surface_prepare_texture_internal(surface, context, srgb);
4313 }
4314
4315 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4316 {
4317     if (multisample)
4318     {
4319         if (surface->rb_multisample)
4320             return;
4321
4322         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4323         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4324         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4325                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4326         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4327     }
4328     else
4329     {
4330         if (surface->rb_resolved)
4331             return;
4332
4333         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4334         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4335         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4336                 surface->pow2Width, surface->pow2Height);
4337         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4338     }
4339 }
4340
4341 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4342         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4343 {
4344     struct wined3d_device *device = surface->resource.device;
4345     UINT pitch = wined3d_surface_get_pitch(surface);
4346     const struct wined3d_gl_info *gl_info;
4347     struct wined3d_context *context;
4348     RECT local_rect;
4349     UINT w, h;
4350
4351     surface_get_rect(surface, rect, &local_rect);
4352
4353     mem += local_rect.top * pitch + local_rect.left * bpp;
4354     w = local_rect.right - local_rect.left;
4355     h = local_rect.bottom - local_rect.top;
4356
4357     /* Activate the correct context for the render target */
4358     context = context_acquire(device, surface);
4359     context_apply_blit_state(context, device);
4360     gl_info = context->gl_info;
4361
4362     ENTER_GL();
4363
4364     if (!surface_is_offscreen(surface))
4365     {
4366         GLenum buffer = surface_get_gl_buffer(surface);
4367         TRACE("Unlocking %#x buffer.\n", buffer);
4368         context_set_draw_buffer(context, buffer);
4369
4370         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4371         glPixelZoom(1.0f, -1.0f);
4372     }
4373     else
4374     {
4375         /* Primary offscreen render target */
4376         TRACE("Offscreen render target.\n");
4377         context_set_draw_buffer(context, device->offscreenBuffer);
4378
4379         glPixelZoom(1.0f, 1.0f);
4380     }
4381
4382     glRasterPos3i(local_rect.left, local_rect.top, 1);
4383     checkGLcall("glRasterPos3i");
4384
4385     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4386     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4387
4388     if (surface->flags & SFLAG_PBO)
4389     {
4390         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4391         checkGLcall("glBindBufferARB");
4392     }
4393
4394     glDrawPixels(w, h, fmt, type, mem);
4395     checkGLcall("glDrawPixels");
4396
4397     if (surface->flags & SFLAG_PBO)
4398     {
4399         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4400         checkGLcall("glBindBufferARB");
4401     }
4402
4403     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4404     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4405
4406     LEAVE_GL();
4407
4408     if (wined3d_settings.strict_draw_ordering
4409             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4410             && surface->container.u.swapchain->front_buffer == surface))
4411         wglFlush();
4412
4413     context_release(context);
4414 }
4415
4416 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4417         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4418 {
4419     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4420     const struct wined3d_device *device = surface->resource.device;
4421     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4422     BOOL blit_supported = FALSE;
4423
4424     /* Copy the default values from the surface. Below we might perform fixups */
4425     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4426     *format = *surface->resource.format;
4427     *convert = NO_CONVERSION;
4428
4429     /* Ok, now look if we have to do any conversion */
4430     switch (surface->resource.format->id)
4431     {
4432         case WINED3DFMT_P8_UINT:
4433             /* Below the call to blit_supported is disabled for Wine 1.2
4434              * because the function isn't operating correctly yet. At the
4435              * moment 8-bit blits are handled in software and if certain GL
4436              * extensions are around, surface conversion is performed at
4437              * upload time. The blit_supported call recognizes it as a
4438              * destination fixup. This type of upload 'fixup' and 8-bit to
4439              * 8-bit blits need to be handled by the blit_shader.
4440              * TODO: get rid of this #if 0. */
4441 #if 0
4442             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4443                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4444                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4445 #endif
4446             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4447
4448             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4449              * texturing. Further also use conversion in case of color keying.
4450              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4451              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4452              * conflicts with this.
4453              */
4454             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4455                     || colorkey_active || !use_texturing)
4456             {
4457                 format->glFormat = GL_RGBA;
4458                 format->glInternal = GL_RGBA;
4459                 format->glType = GL_UNSIGNED_BYTE;
4460                 format->conv_byte_count = 4;
4461                 if (colorkey_active)
4462                     *convert = CONVERT_PALETTED_CK;
4463                 else
4464                     *convert = CONVERT_PALETTED;
4465             }
4466             break;
4467
4468         case WINED3DFMT_B2G3R3_UNORM:
4469             /* **********************
4470                 GL_UNSIGNED_BYTE_3_3_2
4471                 ********************** */
4472             if (colorkey_active) {
4473                 /* This texture format will never be used.. So do not care about color keying
4474                     up until the point in time it will be needed :-) */
4475                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4476             }
4477             break;
4478
4479         case WINED3DFMT_B5G6R5_UNORM:
4480             if (colorkey_active)
4481             {
4482                 *convert = CONVERT_CK_565;
4483                 format->glFormat = GL_RGBA;
4484                 format->glInternal = GL_RGB5_A1;
4485                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4486                 format->conv_byte_count = 2;
4487             }
4488             break;
4489
4490         case WINED3DFMT_B5G5R5X1_UNORM:
4491             if (colorkey_active)
4492             {
4493                 *convert = CONVERT_CK_5551;
4494                 format->glFormat = GL_BGRA;
4495                 format->glInternal = GL_RGB5_A1;
4496                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4497                 format->conv_byte_count = 2;
4498             }
4499             break;
4500
4501         case WINED3DFMT_B8G8R8_UNORM:
4502             if (colorkey_active)
4503             {
4504                 *convert = CONVERT_CK_RGB24;
4505                 format->glFormat = GL_RGBA;
4506                 format->glInternal = GL_RGBA8;
4507                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4508                 format->conv_byte_count = 4;
4509             }
4510             break;
4511
4512         case WINED3DFMT_B8G8R8X8_UNORM:
4513             if (colorkey_active)
4514             {
4515                 *convert = CONVERT_RGB32_888;
4516                 format->glFormat = GL_RGBA;
4517                 format->glInternal = GL_RGBA8;
4518                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4519                 format->conv_byte_count = 4;
4520             }
4521             break;
4522
4523         default:
4524             break;
4525     }
4526
4527     return WINED3D_OK;
4528 }
4529
4530 static BOOL color_in_range(const struct wined3d_color_key *color_key, DWORD color)
4531 {
4532     /* FIXME: Is this really how color keys are supposed to work? I think it
4533      * makes more sense to compare the individual channels. */
4534     return color >= color_key->color_space_low_value
4535             && color <= color_key->color_space_high_value;
4536 }
4537
4538 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4539 {
4540     const struct wined3d_device *device = surface->resource.device;
4541     const struct wined3d_palette *pal = surface->palette;
4542     BOOL index_in_alpha = FALSE;
4543     unsigned int i;
4544
4545     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4546      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4547      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4548      * duplicate entries. Store the color key in the unused alpha component to speed the
4549      * download up and to make conversion unneeded. */
4550     index_in_alpha = primary_render_target_is_p8(device);
4551
4552     if (!pal)
4553     {
4554         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4555         if (index_in_alpha)
4556         {
4557             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4558              * there's no palette at this time. */
4559             for (i = 0; i < 256; i++) table[i][3] = i;
4560         }
4561     }
4562     else
4563     {
4564         TRACE("Using surface palette %p\n", pal);
4565         /* Get the surface's palette */
4566         for (i = 0; i < 256; ++i)
4567         {
4568             table[i][0] = pal->palents[i].peRed;
4569             table[i][1] = pal->palents[i].peGreen;
4570             table[i][2] = pal->palents[i].peBlue;
4571
4572             /* When index_in_alpha is set the palette index is stored in the
4573              * alpha component. In case of a readback we can then read
4574              * GL_ALPHA. Color keying is handled in BltOverride using a
4575              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4576              * color key itself is passed to glAlphaFunc in other cases the
4577              * alpha component of pixels that should be masked away is set to 0. */
4578             if (index_in_alpha)
4579                 table[i][3] = i;
4580             else if (colorkey && color_in_range(&surface->src_blt_color_key, i))
4581                 table[i][3] = 0x00;
4582             else if (pal->flags & WINEDDPCAPS_ALPHA)
4583                 table[i][3] = pal->palents[i].peFlags;
4584             else
4585                 table[i][3] = 0xFF;
4586         }
4587     }
4588 }
4589
4590 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4591         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4592 {
4593     const BYTE *source;
4594     BYTE *dest;
4595     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4596
4597     switch (convert) {
4598         case NO_CONVERSION:
4599         {
4600             memcpy(dst, src, pitch * height);
4601             break;
4602         }
4603         case CONVERT_PALETTED:
4604         case CONVERT_PALETTED_CK:
4605         {
4606             BYTE table[256][4];
4607             unsigned int x, y;
4608
4609             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4610
4611             for (y = 0; y < height; y++)
4612             {
4613                 source = src + pitch * y;
4614                 dest = dst + outpitch * y;
4615                 /* This is an 1 bpp format, using the width here is fine */
4616                 for (x = 0; x < width; x++) {
4617                     BYTE color = *source++;
4618                     *dest++ = table[color][0];
4619                     *dest++ = table[color][1];
4620                     *dest++ = table[color][2];
4621                     *dest++ = table[color][3];
4622                 }
4623             }
4624         }
4625         break;
4626
4627         case CONVERT_CK_565:
4628         {
4629             /* Converting the 565 format in 5551 packed to emulate color-keying.
4630
4631               Note : in all these conversion, it would be best to average the averaging
4632                       pixels to get the color of the pixel that will be color-keyed to
4633                       prevent 'color bleeding'. This will be done later on if ever it is
4634                       too visible.
4635
4636               Note2: Nvidia documents say that their driver does not support alpha + color keying
4637                      on the same surface and disables color keying in such a case
4638             */
4639             unsigned int x, y;
4640             const WORD *Source;
4641             WORD *Dest;
4642
4643             TRACE("Color keyed 565\n");
4644
4645             for (y = 0; y < height; y++) {
4646                 Source = (const WORD *)(src + y * pitch);
4647                 Dest = (WORD *) (dst + y * outpitch);
4648                 for (x = 0; x < width; x++ ) {
4649                     WORD color = *Source++;
4650                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4651                     if (!color_in_range(&surface->src_blt_color_key, color))
4652                         *Dest |= 0x0001;
4653                     Dest++;
4654                 }
4655             }
4656         }
4657         break;
4658
4659         case CONVERT_CK_5551:
4660         {
4661             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4662             unsigned int x, y;
4663             const WORD *Source;
4664             WORD *Dest;
4665             TRACE("Color keyed 5551\n");
4666             for (y = 0; y < height; y++) {
4667                 Source = (const WORD *)(src + y * pitch);
4668                 Dest = (WORD *) (dst + y * outpitch);
4669                 for (x = 0; x < width; x++ ) {
4670                     WORD color = *Source++;
4671                     *Dest = color;
4672                     if (!color_in_range(&surface->src_blt_color_key, color))
4673                         *Dest |= (1 << 15);
4674                     else
4675                         *Dest &= ~(1 << 15);
4676                     Dest++;
4677                 }
4678             }
4679         }
4680         break;
4681
4682         case CONVERT_CK_RGB24:
4683         {
4684             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4685             unsigned int x, y;
4686             for (y = 0; y < height; y++)
4687             {
4688                 source = src + pitch * y;
4689                 dest = dst + outpitch * y;
4690                 for (x = 0; x < width; x++) {
4691                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4692                     DWORD dstcolor = color << 8;
4693                     if (!color_in_range(&surface->src_blt_color_key, color))
4694                         dstcolor |= 0xff;
4695                     *(DWORD*)dest = dstcolor;
4696                     source += 3;
4697                     dest += 4;
4698                 }
4699             }
4700         }
4701         break;
4702
4703         case CONVERT_RGB32_888:
4704         {
4705             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4706             unsigned int x, y;
4707             for (y = 0; y < height; y++)
4708             {
4709                 source = src + pitch * y;
4710                 dest = dst + outpitch * y;
4711                 for (x = 0; x < width; x++) {
4712                     DWORD color = 0xffffff & *(const DWORD*)source;
4713                     DWORD dstcolor = color << 8;
4714                     if (!color_in_range(&surface->src_blt_color_key, color))
4715                         dstcolor |= 0xff;
4716                     *(DWORD*)dest = dstcolor;
4717                     source += 4;
4718                     dest += 4;
4719                 }
4720             }
4721         }
4722         break;
4723
4724         default:
4725             ERR("Unsupported conversion type %#x.\n", convert);
4726     }
4727     return WINED3D_OK;
4728 }
4729
4730 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4731 {
4732     /* Flip the surface contents */
4733     /* Flip the DC */
4734     {
4735         HDC tmp;
4736         tmp = front->hDC;
4737         front->hDC = back->hDC;
4738         back->hDC = tmp;
4739     }
4740
4741     /* Flip the DIBsection */
4742     {
4743         HBITMAP tmp = front->dib.DIBsection;
4744         front->dib.DIBsection = back->dib.DIBsection;
4745         back->dib.DIBsection = tmp;
4746     }
4747
4748     /* Flip the surface data */
4749     {
4750         void* tmp;
4751
4752         tmp = front->dib.bitmap_data;
4753         front->dib.bitmap_data = back->dib.bitmap_data;
4754         back->dib.bitmap_data = tmp;
4755
4756         tmp = front->resource.allocatedMemory;
4757         front->resource.allocatedMemory = back->resource.allocatedMemory;
4758         back->resource.allocatedMemory = tmp;
4759
4760         tmp = front->resource.heapMemory;
4761         front->resource.heapMemory = back->resource.heapMemory;
4762         back->resource.heapMemory = tmp;
4763     }
4764
4765     /* Flip the PBO */
4766     {
4767         GLuint tmp_pbo = front->pbo;
4768         front->pbo = back->pbo;
4769         back->pbo = tmp_pbo;
4770     }
4771
4772     /* Flip the opengl texture */
4773     {
4774         GLuint tmp;
4775
4776         tmp = back->texture_name;
4777         back->texture_name = front->texture_name;
4778         front->texture_name = tmp;
4779
4780         tmp = back->texture_name_srgb;
4781         back->texture_name_srgb = front->texture_name_srgb;
4782         front->texture_name_srgb = tmp;
4783
4784         tmp = back->rb_multisample;
4785         back->rb_multisample = front->rb_multisample;
4786         front->rb_multisample = tmp;
4787
4788         tmp = back->rb_resolved;
4789         back->rb_resolved = front->rb_resolved;
4790         front->rb_resolved = tmp;
4791
4792         resource_unload(&back->resource);
4793         resource_unload(&front->resource);
4794     }
4795
4796     {
4797         DWORD tmp_flags = back->flags;
4798         back->flags = front->flags;
4799         front->flags = tmp_flags;
4800     }
4801 }
4802
4803 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4804  * pixel copy calls. */
4805 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4806         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4807 {
4808     struct wined3d_device *device = dst_surface->resource.device;
4809     float xrel, yrel;
4810     UINT row;
4811     struct wined3d_context *context;
4812     BOOL upsidedown = FALSE;
4813     RECT dst_rect = *dst_rect_in;
4814
4815     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4816      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4817      */
4818     if(dst_rect.top > dst_rect.bottom) {
4819         UINT tmp = dst_rect.bottom;
4820         dst_rect.bottom = dst_rect.top;
4821         dst_rect.top = tmp;
4822         upsidedown = TRUE;
4823     }
4824
4825     context = context_acquire(device, src_surface);
4826     context_apply_blit_state(context, device);
4827     surface_internal_preload(dst_surface, SRGB_RGB);
4828     ENTER_GL();
4829
4830     /* Bind the target texture */
4831     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4832     if (surface_is_offscreen(src_surface))
4833     {
4834         TRACE("Reading from an offscreen target\n");
4835         upsidedown = !upsidedown;
4836         glReadBuffer(device->offscreenBuffer);
4837     }
4838     else
4839     {
4840         glReadBuffer(surface_get_gl_buffer(src_surface));
4841     }
4842     checkGLcall("glReadBuffer");
4843
4844     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4845     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4846
4847     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4848     {
4849         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4850
4851         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4852             ERR("Texture filtering not supported in direct blit\n");
4853         }
4854     }
4855     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4856             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4857     {
4858         ERR("Texture filtering not supported in direct blit\n");
4859     }
4860
4861     if (upsidedown
4862             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4863             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4864     {
4865         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4866
4867         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4868                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4869                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4870                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4871     }
4872     else
4873     {
4874         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4875         /* I have to process this row by row to swap the image,
4876          * otherwise it would be upside down, so stretching in y direction
4877          * doesn't cost extra time
4878          *
4879          * However, stretching in x direction can be avoided if not necessary
4880          */
4881         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4882             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4883             {
4884                 /* Well, that stuff works, but it's very slow.
4885                  * find a better way instead
4886                  */
4887                 UINT col;
4888
4889                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4890                 {
4891                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4892                             dst_rect.left + col /* x offset */, row /* y offset */,
4893                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4894                 }
4895             }
4896             else
4897             {
4898                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4899                         dst_rect.left /* x offset */, row /* y offset */,
4900                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4901             }
4902         }
4903     }
4904     checkGLcall("glCopyTexSubImage2D");
4905
4906     LEAVE_GL();
4907     context_release(context);
4908
4909     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4910      * path is never entered
4911      */
4912     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4913 }
4914
4915 /* Uses the hardware to stretch and flip the image */
4916 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4917         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4918 {
4919     struct wined3d_device *device = dst_surface->resource.device;
4920     struct wined3d_swapchain *src_swapchain = NULL;
4921     GLuint src, backup = 0;
4922     float left, right, top, bottom; /* Texture coordinates */
4923     UINT fbwidth = src_surface->resource.width;
4924     UINT fbheight = src_surface->resource.height;
4925     struct wined3d_context *context;
4926     GLenum drawBuffer = GL_BACK;
4927     GLenum texture_target;
4928     BOOL noBackBufferBackup;
4929     BOOL src_offscreen;
4930     BOOL upsidedown = FALSE;
4931     RECT dst_rect = *dst_rect_in;
4932
4933     TRACE("Using hwstretch blit\n");
4934     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4935     context = context_acquire(device, src_surface);
4936     context_apply_blit_state(context, device);
4937     surface_internal_preload(dst_surface, SRGB_RGB);
4938
4939     src_offscreen = surface_is_offscreen(src_surface);
4940     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4941     if (!noBackBufferBackup && !src_surface->texture_name)
4942     {
4943         /* Get it a description */
4944         surface_internal_preload(src_surface, SRGB_RGB);
4945     }
4946     ENTER_GL();
4947
4948     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4949      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4950      */
4951     if (context->aux_buffers >= 2)
4952     {
4953         /* Got more than one aux buffer? Use the 2nd aux buffer */
4954         drawBuffer = GL_AUX1;
4955     }
4956     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4957     {
4958         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4959         drawBuffer = GL_AUX0;
4960     }
4961
4962     if(noBackBufferBackup) {
4963         glGenTextures(1, &backup);
4964         checkGLcall("glGenTextures");
4965         context_bind_texture(context, GL_TEXTURE_2D, backup);
4966         texture_target = GL_TEXTURE_2D;
4967     } else {
4968         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4969          * we are reading from the back buffer, the backup can be used as source texture
4970          */
4971         texture_target = src_surface->texture_target;
4972         context_bind_texture(context, texture_target, src_surface->texture_name);
4973         glEnable(texture_target);
4974         checkGLcall("glEnable(texture_target)");
4975
4976         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4977         src_surface->flags &= ~SFLAG_INTEXTURE;
4978     }
4979
4980     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4981      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4982      */
4983     if(dst_rect.top > dst_rect.bottom) {
4984         UINT tmp = dst_rect.bottom;
4985         dst_rect.bottom = dst_rect.top;
4986         dst_rect.top = tmp;
4987         upsidedown = TRUE;
4988     }
4989
4990     if (src_offscreen)
4991     {
4992         TRACE("Reading from an offscreen target\n");
4993         upsidedown = !upsidedown;
4994         glReadBuffer(device->offscreenBuffer);
4995     }
4996     else
4997     {
4998         glReadBuffer(surface_get_gl_buffer(src_surface));
4999     }
5000
5001     /* TODO: Only back up the part that will be overwritten */
5002     glCopyTexSubImage2D(texture_target, 0,
5003                         0, 0 /* read offsets */,
5004                         0, 0,
5005                         fbwidth,
5006                         fbheight);
5007
5008     checkGLcall("glCopyTexSubImage2D");
5009
5010     /* No issue with overriding these - the sampler is dirty due to blit usage */
5011     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5012             wined3d_gl_mag_filter(magLookup, Filter));
5013     checkGLcall("glTexParameteri");
5014     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5015             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5016     checkGLcall("glTexParameteri");
5017
5018     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5019         src_swapchain = src_surface->container.u.swapchain;
5020     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5021     {
5022         src = backup ? backup : src_surface->texture_name;
5023     }
5024     else
5025     {
5026         glReadBuffer(GL_FRONT);
5027         checkGLcall("glReadBuffer(GL_FRONT)");
5028
5029         glGenTextures(1, &src);
5030         checkGLcall("glGenTextures(1, &src)");
5031         context_bind_texture(context, GL_TEXTURE_2D, src);
5032
5033         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5034          * out for power of 2 sizes
5035          */
5036         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5037                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5038         checkGLcall("glTexImage2D");
5039         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5040                             0, 0 /* read offsets */,
5041                             0, 0,
5042                             fbwidth,
5043                             fbheight);
5044
5045         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5046         checkGLcall("glTexParameteri");
5047         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5048         checkGLcall("glTexParameteri");
5049
5050         glReadBuffer(GL_BACK);
5051         checkGLcall("glReadBuffer(GL_BACK)");
5052
5053         if(texture_target != GL_TEXTURE_2D) {
5054             glDisable(texture_target);
5055             glEnable(GL_TEXTURE_2D);
5056             texture_target = GL_TEXTURE_2D;
5057         }
5058     }
5059     checkGLcall("glEnd and previous");
5060
5061     left = src_rect->left;
5062     right = src_rect->right;
5063
5064     if (!upsidedown)
5065     {
5066         top = src_surface->resource.height - src_rect->top;
5067         bottom = src_surface->resource.height - src_rect->bottom;
5068     }
5069     else
5070     {
5071         top = src_surface->resource.height - src_rect->bottom;
5072         bottom = src_surface->resource.height - src_rect->top;
5073     }
5074
5075     if (src_surface->flags & SFLAG_NORMCOORD)
5076     {
5077         left /= src_surface->pow2Width;
5078         right /= src_surface->pow2Width;
5079         top /= src_surface->pow2Height;
5080         bottom /= src_surface->pow2Height;
5081     }
5082
5083     /* draw the source texture stretched and upside down. The correct surface is bound already */
5084     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5085     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5086
5087     context_set_draw_buffer(context, drawBuffer);
5088     glReadBuffer(drawBuffer);
5089
5090     glBegin(GL_QUADS);
5091         /* bottom left */
5092         glTexCoord2f(left, bottom);
5093         glVertex2i(0, 0);
5094
5095         /* top left */
5096         glTexCoord2f(left, top);
5097         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5098
5099         /* top right */
5100         glTexCoord2f(right, top);
5101         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5102
5103         /* bottom right */
5104         glTexCoord2f(right, bottom);
5105         glVertex2i(dst_rect.right - dst_rect.left, 0);
5106     glEnd();
5107     checkGLcall("glEnd and previous");
5108
5109     if (texture_target != dst_surface->texture_target)
5110     {
5111         glDisable(texture_target);
5112         glEnable(dst_surface->texture_target);
5113         texture_target = dst_surface->texture_target;
5114     }
5115
5116     /* Now read the stretched and upside down image into the destination texture */
5117     context_bind_texture(context, texture_target, dst_surface->texture_name);
5118     glCopyTexSubImage2D(texture_target,
5119                         0,
5120                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5121                         0, 0, /* We blitted the image to the origin */
5122                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5123     checkGLcall("glCopyTexSubImage2D");
5124
5125     if(drawBuffer == GL_BACK) {
5126         /* Write the back buffer backup back */
5127         if(backup) {
5128             if(texture_target != GL_TEXTURE_2D) {
5129                 glDisable(texture_target);
5130                 glEnable(GL_TEXTURE_2D);
5131                 texture_target = GL_TEXTURE_2D;
5132             }
5133             context_bind_texture(context, GL_TEXTURE_2D, backup);
5134         }
5135         else
5136         {
5137             if (texture_target != src_surface->texture_target)
5138             {
5139                 glDisable(texture_target);
5140                 glEnable(src_surface->texture_target);
5141                 texture_target = src_surface->texture_target;
5142             }
5143             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5144         }
5145
5146         glBegin(GL_QUADS);
5147             /* top left */
5148             glTexCoord2f(0.0f, 0.0f);
5149             glVertex2i(0, fbheight);
5150
5151             /* bottom left */
5152             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5153             glVertex2i(0, 0);
5154
5155             /* bottom right */
5156             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5157                     (float)fbheight / (float)src_surface->pow2Height);
5158             glVertex2i(fbwidth, 0);
5159
5160             /* top right */
5161             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5162             glVertex2i(fbwidth, fbheight);
5163         glEnd();
5164     }
5165     glDisable(texture_target);
5166     checkGLcall("glDisable(texture_target)");
5167
5168     /* Cleanup */
5169     if (src != src_surface->texture_name && src != backup)
5170     {
5171         glDeleteTextures(1, &src);
5172         checkGLcall("glDeleteTextures(1, &src)");
5173     }
5174     if(backup) {
5175         glDeleteTextures(1, &backup);
5176         checkGLcall("glDeleteTextures(1, &backup)");
5177     }
5178
5179     LEAVE_GL();
5180
5181     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5182
5183     context_release(context);
5184
5185     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5186      * path is never entered
5187      */
5188     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5189 }
5190
5191 /* Front buffer coordinates are always full screen coordinates, but our GL
5192  * drawable is limited to the window's client area. The sysmem and texture
5193  * copies do have the full screen size. Note that GL has a bottom-left
5194  * origin, while D3D has a top-left origin. */
5195 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5196 {
5197     UINT drawable_height;
5198
5199     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5200             && surface == surface->container.u.swapchain->front_buffer)
5201     {
5202         POINT offset = {0, 0};
5203         RECT windowsize;
5204
5205         ScreenToClient(window, &offset);
5206         OffsetRect(rect, offset.x, offset.y);
5207
5208         GetClientRect(window, &windowsize);
5209         drawable_height = windowsize.bottom - windowsize.top;
5210     }
5211     else
5212     {
5213         drawable_height = surface->resource.height;
5214     }
5215
5216     rect->top = drawable_height - rect->top;
5217     rect->bottom = drawable_height - rect->bottom;
5218 }
5219
5220 static void surface_blt_to_drawable(const struct wined3d_device *device,
5221         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5222         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5223         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5224 {
5225     struct wined3d_context *context;
5226     RECT src_rect, dst_rect;
5227
5228     src_rect = *src_rect_in;
5229     dst_rect = *dst_rect_in;
5230
5231     /* Make sure the surface is up-to-date. This should probably use
5232      * surface_load_location() and worry about the destination surface too,
5233      * unless we're overwriting it completely. */
5234     surface_internal_preload(src_surface, SRGB_RGB);
5235
5236     /* Activate the destination context, set it up for blitting */
5237     context = context_acquire(device, dst_surface);
5238     context_apply_blit_state(context, device);
5239
5240     if (!surface_is_offscreen(dst_surface))
5241         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5242
5243     device->blitter->set_shader(device->blit_priv, context, src_surface);
5244
5245     ENTER_GL();
5246
5247     if (color_key)
5248     {
5249         glEnable(GL_ALPHA_TEST);
5250         checkGLcall("glEnable(GL_ALPHA_TEST)");
5251
5252         /* When the primary render target uses P8, the alpha component
5253          * contains the palette index. Which means that the colorkey is one of
5254          * the palette entries. In other cases pixels that should be masked
5255          * away have alpha set to 0. */
5256         if (primary_render_target_is_p8(device))
5257             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->src_blt_color_key.color_space_low_value / 256.0f);
5258         else
5259             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5260         checkGLcall("glAlphaFunc");
5261     }
5262     else
5263     {
5264         glDisable(GL_ALPHA_TEST);
5265         checkGLcall("glDisable(GL_ALPHA_TEST)");
5266     }
5267
5268     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5269
5270     if (color_key)
5271     {
5272         glDisable(GL_ALPHA_TEST);
5273         checkGLcall("glDisable(GL_ALPHA_TEST)");
5274     }
5275
5276     LEAVE_GL();
5277
5278     /* Leave the opengl state valid for blitting */
5279     device->blitter->unset_shader(context->gl_info);
5280
5281     if (wined3d_settings.strict_draw_ordering
5282             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5283             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5284         wglFlush(); /* Flush to ensure ordering across contexts. */
5285
5286     context_release(context);
5287 }
5288
5289 /* Do not call while under the GL lock. */
5290 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const struct wined3d_color *color)
5291 {
5292     struct wined3d_device *device = s->resource.device;
5293     const struct blit_shader *blitter;
5294
5295     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5296             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5297     if (!blitter)
5298     {
5299         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5300         return WINED3DERR_INVALIDCALL;
5301     }
5302
5303     return blitter->color_fill(device, s, rect, color);
5304 }
5305
5306 /* Do not call while under the GL lock. */
5307 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5308         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5309         WINED3DTEXTUREFILTERTYPE Filter)
5310 {
5311     struct wined3d_device *device = dst_surface->resource.device;
5312     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5313     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5314
5315     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5316             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5317             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5318
5319     /* Get the swapchain. One of the surfaces has to be a primary surface */
5320     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5321     {
5322         WARN("Destination is in sysmem, rejecting gl blt\n");
5323         return WINED3DERR_INVALIDCALL;
5324     }
5325
5326     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5327         dstSwapchain = dst_surface->container.u.swapchain;
5328
5329     if (src_surface)
5330     {
5331         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5332         {
5333             WARN("Src is in sysmem, rejecting gl blt\n");
5334             return WINED3DERR_INVALIDCALL;
5335         }
5336
5337         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5338             srcSwapchain = src_surface->container.u.swapchain;
5339     }
5340
5341     /* Early sort out of cases where no render target is used */
5342     if (!dstSwapchain && !srcSwapchain
5343             && src_surface != device->fb.render_targets[0]
5344             && dst_surface != device->fb.render_targets[0])
5345     {
5346         TRACE("No surface is render target, not using hardware blit.\n");
5347         return WINED3DERR_INVALIDCALL;
5348     }
5349
5350     /* No destination color keying supported */
5351     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5352     {
5353         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5354         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5355         return WINED3DERR_INVALIDCALL;
5356     }
5357
5358     if (dstSwapchain && dstSwapchain == srcSwapchain)
5359     {
5360         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5361         return WINED3DERR_INVALIDCALL;
5362     }
5363
5364     if (dstSwapchain && srcSwapchain)
5365     {
5366         FIXME("Implement hardware blit between two different swapchains\n");
5367         return WINED3DERR_INVALIDCALL;
5368     }
5369
5370     if (dstSwapchain)
5371     {
5372         /* Handled with regular texture -> swapchain blit */
5373         if (src_surface == device->fb.render_targets[0])
5374             TRACE("Blit from active render target to a swapchain\n");
5375     }
5376     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5377     {
5378         FIXME("Implement blit from a swapchain to the active render target\n");
5379         return WINED3DERR_INVALIDCALL;
5380     }
5381
5382     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5383     {
5384         /* Blit from render target to texture */
5385         BOOL stretchx;
5386
5387         /* P8 read back is not implemented */
5388         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5389                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5390         {
5391             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5392             return WINED3DERR_INVALIDCALL;
5393         }
5394
5395         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5396         {
5397             TRACE("Color keying not supported by frame buffer to texture blit\n");
5398             return WINED3DERR_INVALIDCALL;
5399             /* Destination color key is checked above */
5400         }
5401
5402         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5403             stretchx = TRUE;
5404         else
5405             stretchx = FALSE;
5406
5407         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5408          * flip the image nor scale it.
5409          *
5410          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5411          * -> If the app wants a image width an unscaled width, copy it line per line
5412          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5413          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5414          *    back buffer. This is slower than reading line per line, thus not used for flipping
5415          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5416          *    pixel by pixel. */
5417         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5418                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5419         {
5420             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5421             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5422         } else {
5423             TRACE("Using hardware stretching to flip / stretch the texture\n");
5424             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5425         }
5426
5427         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5428         {
5429             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5430             dst_surface->resource.allocatedMemory = NULL;
5431             dst_surface->resource.heapMemory = NULL;
5432         }
5433         else
5434         {
5435             dst_surface->flags &= ~SFLAG_INSYSMEM;
5436         }
5437
5438         return WINED3D_OK;
5439     }
5440     else if (src_surface)
5441     {
5442         /* Blit from offscreen surface to render target */
5443         struct wined3d_color_key old_blt_key = src_surface->src_blt_color_key;
5444         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5445
5446         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5447
5448         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5449                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5450                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5451         {
5452             FIXME("Unsupported blit operation falling back to software\n");
5453             return WINED3DERR_INVALIDCALL;
5454         }
5455
5456         /* Color keying: Check if we have to do a color keyed blt,
5457          * and if not check if a color key is activated.
5458          *
5459          * Just modify the color keying parameters in the surface and restore them afterwards
5460          * The surface keeps track of the color key last used to load the opengl surface.
5461          * PreLoad will catch the change to the flags and color key and reload if necessary.
5462          */
5463         if (flags & WINEDDBLT_KEYSRC)
5464         {
5465             /* Use color key from surface */
5466         }
5467         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5468         {
5469             /* Use color key from DDBltFx */
5470             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5471             src_surface->src_blt_color_key = DDBltFx->ddckSrcColorkey;
5472         }
5473         else
5474         {
5475             /* Do not use color key */
5476             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5477         }
5478
5479         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5480                 src_surface, src_rect, dst_surface, dst_rect);
5481
5482         /* Restore the color key parameters */
5483         src_surface->CKeyFlags = oldCKeyFlags;
5484         src_surface->src_blt_color_key = old_blt_key;
5485
5486         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5487
5488         return WINED3D_OK;
5489     }
5490
5491     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5492     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5493     return WINED3DERR_INVALIDCALL;
5494 }
5495
5496 /* GL locking is done by the caller */
5497 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5498         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5499 {
5500     struct wined3d_device *device = surface->resource.device;
5501     const struct wined3d_gl_info *gl_info = context->gl_info;
5502     GLint compare_mode = GL_NONE;
5503     struct blt_info info;
5504     GLint old_binding = 0;
5505     RECT rect;
5506
5507     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5508
5509     glDisable(GL_CULL_FACE);
5510     glDisable(GL_BLEND);
5511     glDisable(GL_ALPHA_TEST);
5512     glDisable(GL_SCISSOR_TEST);
5513     glDisable(GL_STENCIL_TEST);
5514     glEnable(GL_DEPTH_TEST);
5515     glDepthFunc(GL_ALWAYS);
5516     glDepthMask(GL_TRUE);
5517     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5518     glViewport(x, y, w, h);
5519
5520     SetRect(&rect, 0, h, w, 0);
5521     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5522     context_active_texture(context, context->gl_info, 0);
5523     glGetIntegerv(info.binding, &old_binding);
5524     glBindTexture(info.bind_target, texture);
5525     if (gl_info->supported[ARB_SHADOW])
5526     {
5527         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5528         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5529     }
5530
5531     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5532             gl_info, info.tex_type, &surface->ds_current_size);
5533
5534     glBegin(GL_TRIANGLE_STRIP);
5535     glTexCoord3fv(info.coords[0]);
5536     glVertex2f(-1.0f, -1.0f);
5537     glTexCoord3fv(info.coords[1]);
5538     glVertex2f(1.0f, -1.0f);
5539     glTexCoord3fv(info.coords[2]);
5540     glVertex2f(-1.0f, 1.0f);
5541     glTexCoord3fv(info.coords[3]);
5542     glVertex2f(1.0f, 1.0f);
5543     glEnd();
5544
5545     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5546     glBindTexture(info.bind_target, old_binding);
5547
5548     glPopAttrib();
5549
5550     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5551 }
5552
5553 void surface_modify_ds_location(struct wined3d_surface *surface,
5554         DWORD location, UINT w, UINT h)
5555 {
5556     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5557
5558     if (location & ~SFLAG_DS_LOCATIONS)
5559         FIXME("Invalid location (%#x) specified.\n", location);
5560
5561     surface->ds_current_size.cx = w;
5562     surface->ds_current_size.cy = h;
5563     surface->flags &= ~SFLAG_DS_LOCATIONS;
5564     surface->flags |= location;
5565 }
5566
5567 /* Context activation is done by the caller. */
5568 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5569 {
5570     struct wined3d_device *device = surface->resource.device;
5571     GLsizei w, h;
5572
5573     TRACE("surface %p, new location %#x.\n", surface, location);
5574
5575     /* TODO: Make this work for modes other than FBO */
5576     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5577
5578     if (!(surface->flags & location))
5579     {
5580         w = surface->ds_current_size.cx;
5581         h = surface->ds_current_size.cy;
5582         surface->ds_current_size.cx = 0;
5583         surface->ds_current_size.cy = 0;
5584     }
5585     else
5586     {
5587         w = surface->resource.width;
5588         h = surface->resource.height;
5589     }
5590
5591     if (surface->ds_current_size.cx == surface->resource.width
5592             && surface->ds_current_size.cy == surface->resource.height)
5593     {
5594         TRACE("Location (%#x) is already up to date.\n", location);
5595         return;
5596     }
5597
5598     if (surface->current_renderbuffer)
5599     {
5600         FIXME("Not supported with fixed up depth stencil.\n");
5601         return;
5602     }
5603
5604     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5605     {
5606         /* This mostly happens when a depth / stencil is used without being
5607          * cleared first. In principle we could upload from sysmem, or
5608          * explicitly clear before first usage. For the moment there don't
5609          * appear to be a lot of applications depending on this, so a FIXME
5610          * should do. */
5611         FIXME("No up to date depth stencil location.\n");
5612         surface->flags |= location;
5613         surface->ds_current_size.cx = surface->resource.width;
5614         surface->ds_current_size.cy = surface->resource.height;
5615         return;
5616     }
5617
5618     if (location == SFLAG_DS_OFFSCREEN)
5619     {
5620         GLint old_binding = 0;
5621         GLenum bind_target;
5622
5623         /* The render target is allowed to be smaller than the depth/stencil
5624          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5625          * than the offscreen surface. Don't overwrite the offscreen surface
5626          * with undefined data. */
5627         w = min(w, context->swapchain->desc.backbuffer_width);
5628         h = min(h, context->swapchain->desc.backbuffer_height);
5629
5630         TRACE("Copying onscreen depth buffer to depth texture.\n");
5631
5632         ENTER_GL();
5633
5634         if (!device->depth_blt_texture)
5635         {
5636             glGenTextures(1, &device->depth_blt_texture);
5637         }
5638
5639         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5640          * directly on the FBO texture. That's because we need to flip. */
5641         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5642                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5643         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5644         {
5645             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5646             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5647         }
5648         else
5649         {
5650             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5651             bind_target = GL_TEXTURE_2D;
5652         }
5653         glBindTexture(bind_target, device->depth_blt_texture);
5654         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5655          * internal format, because the internal format might include stencil
5656          * data. In principle we should copy stencil data as well, but unless
5657          * the driver supports stencil export it's hard to do, and doesn't
5658          * seem to be needed in practice. If the hardware doesn't support
5659          * writing stencil data, the glCopyTexImage2D() call might trigger
5660          * software fallbacks. */
5661         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5662         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5663         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5664         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5665         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5666         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5667         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5668         glBindTexture(bind_target, old_binding);
5669
5670         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5671                 NULL, surface, SFLAG_INTEXTURE);
5672         context_set_draw_buffer(context, GL_NONE);
5673         glReadBuffer(GL_NONE);
5674
5675         /* Do the actual blit */
5676         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5677         checkGLcall("depth_blt");
5678
5679         context_invalidate_state(context, STATE_FRAMEBUFFER);
5680
5681         LEAVE_GL();
5682
5683         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5684     }
5685     else if (location == SFLAG_DS_ONSCREEN)
5686     {
5687         TRACE("Copying depth texture to onscreen depth buffer.\n");
5688
5689         ENTER_GL();
5690
5691         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5692                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5693         surface_depth_blt(surface, context, surface->texture_name,
5694                 0, surface->pow2Height - h, w, h, surface->texture_target);
5695         checkGLcall("depth_blt");
5696
5697         context_invalidate_state(context, STATE_FRAMEBUFFER);
5698
5699         LEAVE_GL();
5700
5701         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5702     }
5703     else
5704     {
5705         ERR("Invalid location (%#x) specified.\n", location);
5706     }
5707
5708     surface->flags |= location;
5709     surface->ds_current_size.cx = surface->resource.width;
5710     surface->ds_current_size.cy = surface->resource.height;
5711 }
5712
5713 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5714 {
5715     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5716     struct wined3d_surface *overlay;
5717
5718     TRACE("surface %p, location %s, persistent %#x.\n",
5719             surface, debug_surflocation(location), persistent);
5720
5721     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5722             && (location & SFLAG_INDRAWABLE))
5723         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5724
5725     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5726             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5727         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5728
5729     if (persistent)
5730     {
5731         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5732                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5733         {
5734             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5735             {
5736                 TRACE("Passing to container.\n");
5737                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5738             }
5739         }
5740         surface->flags &= ~SFLAG_LOCATIONS;
5741         surface->flags |= location;
5742
5743         /* Redraw emulated overlays, if any */
5744         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5745         {
5746             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5747             {
5748                 surface_draw_overlay(overlay);
5749             }
5750         }
5751     }
5752     else
5753     {
5754         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5755         {
5756             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5757             {
5758                 TRACE("Passing to container\n");
5759                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5760             }
5761         }
5762         surface->flags &= ~location;
5763     }
5764
5765     if (!(surface->flags & SFLAG_LOCATIONS))
5766     {
5767         ERR("Surface %p does not have any up to date location.\n", surface);
5768     }
5769 }
5770
5771 static DWORD resource_access_from_location(DWORD location)
5772 {
5773     switch (location)
5774     {
5775         case SFLAG_INSYSMEM:
5776             return WINED3D_RESOURCE_ACCESS_CPU;
5777
5778         case SFLAG_INDRAWABLE:
5779         case SFLAG_INSRGBTEX:
5780         case SFLAG_INTEXTURE:
5781         case SFLAG_INRB_MULTISAMPLE:
5782         case SFLAG_INRB_RESOLVED:
5783             return WINED3D_RESOURCE_ACCESS_GPU;
5784
5785         default:
5786             FIXME("Unhandled location %#x.\n", location);
5787             return 0;
5788     }
5789 }
5790
5791 static void surface_load_sysmem(struct wined3d_surface *surface,
5792         const struct wined3d_gl_info *gl_info, const RECT *rect)
5793 {
5794     surface_prepare_system_memory(surface);
5795
5796     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5797         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5798
5799     /* Download the surface to system memory. */
5800     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5801     {
5802         struct wined3d_device *device = surface->resource.device;
5803         struct wined3d_context *context;
5804
5805         /* TODO: Use already acquired context when possible. */
5806         context = context_acquire(device, NULL);
5807
5808         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5809         surface_download_data(surface, gl_info);
5810
5811         context_release(context);
5812
5813         return;
5814     }
5815
5816     if (surface->flags & SFLAG_INDRAWABLE)
5817     {
5818         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5819                 wined3d_surface_get_pitch(surface));
5820         return;
5821     }
5822
5823     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5824             surface, surface->flags & SFLAG_LOCATIONS);
5825 }
5826
5827 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5828         const struct wined3d_gl_info *gl_info, const RECT *rect)
5829 {
5830     struct wined3d_device *device = surface->resource.device;
5831     struct wined3d_format format;
5832     CONVERT_TYPES convert;
5833     UINT byte_count;
5834     BYTE *mem;
5835
5836     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5837     {
5838         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5839         return WINED3DERR_INVALIDCALL;
5840     }
5841
5842     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5843         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5844
5845     if (surface->flags & SFLAG_INTEXTURE)
5846     {
5847         RECT r;
5848
5849         surface_get_rect(surface, rect, &r);
5850         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5851
5852         return WINED3D_OK;
5853     }
5854
5855     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5856     {
5857         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5858          * path through sysmem. */
5859         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5860     }
5861
5862     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5863
5864     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5865      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5866      * called. */
5867     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5868     {
5869         struct wined3d_context *context;
5870
5871         TRACE("Removing the pbo attached to surface %p.\n", surface);
5872
5873         /* TODO: Use already acquired context when possible. */
5874         context = context_acquire(device, NULL);
5875
5876         surface_remove_pbo(surface, gl_info);
5877
5878         context_release(context);
5879     }
5880
5881     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5882     {
5883         UINT height = surface->resource.height;
5884         UINT width = surface->resource.width;
5885         UINT src_pitch, dst_pitch;
5886
5887         byte_count = format.conv_byte_count;
5888         src_pitch = wined3d_surface_get_pitch(surface);
5889
5890         /* Stick to the alignment for the converted surface too, makes it
5891          * easier to load the surface. */
5892         dst_pitch = width * byte_count;
5893         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5894
5895         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5896         {
5897             ERR("Out of memory (%u).\n", dst_pitch * height);
5898             return E_OUTOFMEMORY;
5899         }
5900
5901         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5902                 src_pitch, width, height, dst_pitch, convert, surface);
5903
5904         surface->flags |= SFLAG_CONVERTED;
5905     }
5906     else
5907     {
5908         surface->flags &= ~SFLAG_CONVERTED;
5909         mem = surface->resource.allocatedMemory;
5910         byte_count = format.byte_count;
5911     }
5912
5913     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5914
5915     /* Don't delete PBO memory. */
5916     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5917         HeapFree(GetProcessHeap(), 0, mem);
5918
5919     return WINED3D_OK;
5920 }
5921
5922 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5923         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5924 {
5925     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5926     struct wined3d_device *device = surface->resource.device;
5927     struct wined3d_context *context;
5928     UINT width, src_pitch, dst_pitch;
5929     struct wined3d_bo_address data;
5930     struct wined3d_format format;
5931     POINT dst_point = {0, 0};
5932     CONVERT_TYPES convert;
5933     BYTE *mem;
5934
5935     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5936             && surface_is_offscreen(surface)
5937             && (surface->flags & SFLAG_INDRAWABLE))
5938     {
5939         surface_load_fb_texture(surface, srgb);
5940
5941         return WINED3D_OK;
5942     }
5943
5944     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5945             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5946             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5947                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5948                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5949     {
5950         if (srgb)
5951             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5952                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5953         else
5954             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5955                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5956
5957         return WINED3D_OK;
5958     }
5959
5960     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5961             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5962             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5963                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5964                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5965     {
5966         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5967         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5968         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5969
5970         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5971                 &rect, surface, dst_location, &rect);
5972
5973         return WINED3D_OK;
5974     }
5975
5976     /* Upload from system memory */
5977
5978     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5979             TRUE /* We will use textures */, &format, &convert);
5980
5981     if (srgb)
5982     {
5983         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5984         {
5985             /* Performance warning... */
5986             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5987             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5988         }
5989     }
5990     else
5991     {
5992         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5993         {
5994             /* Performance warning... */
5995             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5996             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5997         }
5998     }
5999
6000     if (!(surface->flags & SFLAG_INSYSMEM))
6001     {
6002         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6003         /* Lets hope we get it from somewhere... */
6004         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6005     }
6006
6007     /* TODO: Use already acquired context when possible. */
6008     context = context_acquire(device, NULL);
6009
6010     surface_prepare_texture(surface, context, srgb);
6011     surface_bind_and_dirtify(surface, context, srgb);
6012
6013     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6014     {
6015         surface->flags |= SFLAG_GLCKEY;
6016         surface->gl_color_key = surface->src_blt_color_key;
6017     }
6018     else surface->flags &= ~SFLAG_GLCKEY;
6019
6020     width = surface->resource.width;
6021     src_pitch = wined3d_surface_get_pitch(surface);
6022
6023     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6024      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6025      * called. */
6026     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6027     {
6028         TRACE("Removing the pbo attached to surface %p.\n", surface);
6029         surface_remove_pbo(surface, gl_info);
6030     }
6031
6032     if (format.convert)
6033     {
6034         /* This code is entered for texture formats which need a fixup. */
6035         UINT height = surface->resource.height;
6036
6037         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6038         dst_pitch = width * format.conv_byte_count;
6039         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6040
6041         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6042         {
6043             ERR("Out of memory (%u).\n", dst_pitch * height);
6044             context_release(context);
6045             return E_OUTOFMEMORY;
6046         }
6047         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6048         format.byte_count = format.conv_byte_count;
6049         src_pitch = dst_pitch;
6050     }
6051     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6052     {
6053         /* This code is only entered for color keying fixups */
6054         UINT height = surface->resource.height;
6055
6056         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6057         dst_pitch = width * format.conv_byte_count;
6058         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6059
6060         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6061         {
6062             ERR("Out of memory (%u).\n", dst_pitch * height);
6063             context_release(context);
6064             return E_OUTOFMEMORY;
6065         }
6066         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6067                 width, height, dst_pitch, convert, surface);
6068         format.byte_count = format.conv_byte_count;
6069         src_pitch = dst_pitch;
6070     }
6071     else
6072     {
6073         mem = surface->resource.allocatedMemory;
6074     }
6075
6076     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6077     data.addr = mem;
6078     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6079
6080     context_release(context);
6081
6082     /* Don't delete PBO memory. */
6083     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6084         HeapFree(GetProcessHeap(), 0, mem);
6085
6086     return WINED3D_OK;
6087 }
6088
6089 static void surface_multisample_resolve(struct wined3d_surface *surface)
6090 {
6091     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6092
6093     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6094         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6095
6096     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6097             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6098 }
6099
6100 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6101 {
6102     struct wined3d_device *device = surface->resource.device;
6103     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6104     HRESULT hr;
6105
6106     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6107
6108     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6109     {
6110         if (location == SFLAG_INTEXTURE)
6111         {
6112             struct wined3d_context *context = context_acquire(device, NULL);
6113             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6114             context_release(context);
6115             return WINED3D_OK;
6116         }
6117         else
6118         {
6119             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6120             return WINED3DERR_INVALIDCALL;
6121         }
6122     }
6123
6124     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6125         location = SFLAG_INTEXTURE;
6126
6127     if (surface->flags & location)
6128     {
6129         TRACE("Location already up to date.\n");
6130
6131         if (location == SFLAG_INSYSMEM && !(surface->flags & SFLAG_PBO)
6132                 && surface_need_pbo(surface, gl_info))
6133             surface_load_pbo(surface, gl_info);
6134
6135         return WINED3D_OK;
6136     }
6137
6138     if (WARN_ON(d3d_surface))
6139     {
6140         DWORD required_access = resource_access_from_location(location);
6141         if ((surface->resource.access_flags & required_access) != required_access)
6142             WARN("Operation requires %#x access, but surface only has %#x.\n",
6143                     required_access, surface->resource.access_flags);
6144     }
6145
6146     if (!(surface->flags & SFLAG_LOCATIONS))
6147     {
6148         ERR("Surface %p does not have any up to date location.\n", surface);
6149         surface->flags |= SFLAG_LOST;
6150         return WINED3DERR_DEVICELOST;
6151     }
6152
6153     switch (location)
6154     {
6155         case SFLAG_INSYSMEM:
6156             surface_load_sysmem(surface, gl_info, rect);
6157             break;
6158
6159         case SFLAG_INDRAWABLE:
6160             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6161                 return hr;
6162             break;
6163
6164         case SFLAG_INRB_RESOLVED:
6165             surface_multisample_resolve(surface);
6166             break;
6167
6168         case SFLAG_INTEXTURE:
6169         case SFLAG_INSRGBTEX:
6170             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6171                 return hr;
6172             break;
6173
6174         default:
6175             ERR("Don't know how to handle location %#x.\n", location);
6176             break;
6177     }
6178
6179     if (!rect)
6180     {
6181         surface->flags |= location;
6182
6183         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6184             surface_evict_sysmem(surface);
6185     }
6186
6187     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6188             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6189     {
6190         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6191     }
6192
6193     return WINED3D_OK;
6194 }
6195
6196 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6197 {
6198     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6199
6200     /* Not on a swapchain - must be offscreen */
6201     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6202
6203     /* The front buffer is always onscreen */
6204     if (surface == swapchain->front_buffer) return FALSE;
6205
6206     /* If the swapchain is rendered to an FBO, the backbuffer is
6207      * offscreen, otherwise onscreen */
6208     return swapchain->render_to_fbo;
6209 }
6210
6211 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6212 /* Context activation is done by the caller. */
6213 static void ffp_blit_free(struct wined3d_device *device) { }
6214
6215 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6216 /* Context activation is done by the caller. */
6217 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6218 {
6219     BYTE table[256][4];
6220     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6221
6222     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6223
6224     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6225     ENTER_GL();
6226     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6227     LEAVE_GL();
6228 }
6229
6230 /* Context activation is done by the caller. */
6231 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6232 {
6233     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6234
6235     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6236      * else the surface is converted in software at upload time in LoadLocation.
6237      */
6238     if (!(surface->flags & SFLAG_CONVERTED) && fixup == COMPLEX_FIXUP_P8
6239             && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6240         ffp_blit_p8_upload_palette(surface, context->gl_info);
6241
6242     ENTER_GL();
6243     glEnable(surface->texture_target);
6244     checkGLcall("glEnable(surface->texture_target)");
6245     LEAVE_GL();
6246     return WINED3D_OK;
6247 }
6248
6249 /* Context activation is done by the caller. */
6250 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6251 {
6252     ENTER_GL();
6253     glDisable(GL_TEXTURE_2D);
6254     checkGLcall("glDisable(GL_TEXTURE_2D)");
6255     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6256     {
6257         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6258         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6259     }
6260     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6261     {
6262         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6263         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6264     }
6265     LEAVE_GL();
6266 }
6267
6268 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6269         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6270         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6271 {
6272     enum complex_fixup src_fixup;
6273
6274     switch (blit_op)
6275     {
6276         case WINED3D_BLIT_OP_COLOR_BLIT:
6277             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6278                 return FALSE;
6279
6280             src_fixup = get_complex_fixup(src_format->color_fixup);
6281             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6282             {
6283                 TRACE("Checking support for fixup:\n");
6284                 dump_color_fixup_desc(src_format->color_fixup);
6285             }
6286
6287             if (!is_identity_fixup(dst_format->color_fixup))
6288             {
6289                 TRACE("Destination fixups are not supported\n");
6290                 return FALSE;
6291             }
6292
6293             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6294             {
6295                 TRACE("P8 fixup supported\n");
6296                 return TRUE;
6297             }
6298
6299             /* We only support identity conversions. */
6300             if (is_identity_fixup(src_format->color_fixup))
6301             {
6302                 TRACE("[OK]\n");
6303                 return TRUE;
6304             }
6305
6306             TRACE("[FAILED]\n");
6307             return FALSE;
6308
6309         case WINED3D_BLIT_OP_COLOR_FILL:
6310             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6311                 return FALSE;
6312
6313             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6314             {
6315                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6316                     return FALSE;
6317             }
6318             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6319             {
6320                 TRACE("Color fill not supported\n");
6321                 return FALSE;
6322             }
6323
6324             /* FIXME: We should reject color fills on formats with fixups,
6325              * but this would break P8 color fills for example. */
6326
6327             return TRUE;
6328
6329         case WINED3D_BLIT_OP_DEPTH_FILL:
6330             return TRUE;
6331
6332         default:
6333             TRACE("Unsupported blit_op=%d\n", blit_op);
6334             return FALSE;
6335     }
6336 }
6337
6338 /* Do not call while under the GL lock. */
6339 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6340         const RECT *dst_rect, const struct wined3d_color *color)
6341 {
6342     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6343     struct wined3d_fb_state fb = {&dst_surface, NULL};
6344
6345     return device_clear_render_targets(device, 1, &fb,
6346             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6347 }
6348
6349 /* Do not call while under the GL lock. */
6350 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6351         struct wined3d_surface *surface, const RECT *rect, float depth)
6352 {
6353     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6354     struct wined3d_fb_state fb = {NULL, surface};
6355
6356     return device_clear_render_targets(device, 0, &fb,
6357             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6358 }
6359
6360 const struct blit_shader ffp_blit =  {
6361     ffp_blit_alloc,
6362     ffp_blit_free,
6363     ffp_blit_set,
6364     ffp_blit_unset,
6365     ffp_blit_supported,
6366     ffp_blit_color_fill,
6367     ffp_blit_depth_fill,
6368 };
6369
6370 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6371 {
6372     return WINED3D_OK;
6373 }
6374
6375 /* Context activation is done by the caller. */
6376 static void cpu_blit_free(struct wined3d_device *device)
6377 {
6378 }
6379
6380 /* Context activation is done by the caller. */
6381 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6382 {
6383     return WINED3D_OK;
6384 }
6385
6386 /* Context activation is done by the caller. */
6387 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6388 {
6389 }
6390
6391 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6392         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6393         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6394 {
6395     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6396     {
6397         return TRUE;
6398     }
6399
6400     return FALSE;
6401 }
6402
6403 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6404         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6405         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6406 {
6407     UINT row_block_count;
6408     const BYTE *src_row;
6409     BYTE *dst_row;
6410     UINT x, y;
6411
6412     src_row = src_data;
6413     dst_row = dst_data;
6414
6415     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6416
6417     if (!flags)
6418     {
6419         for (y = 0; y < update_h; y += format->block_height)
6420         {
6421             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6422             src_row += src_pitch;
6423             dst_row += dst_pitch;
6424         }
6425
6426         return WINED3D_OK;
6427     }
6428
6429     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6430     {
6431         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6432
6433         switch (format->id)
6434         {
6435             case WINED3DFMT_DXT1:
6436                 for (y = 0; y < update_h; y += format->block_height)
6437                 {
6438                     struct block
6439                     {
6440                         WORD color[2];
6441                         BYTE control_row[4];
6442                     };
6443
6444                     const struct block *s = (const struct block *)src_row;
6445                     struct block *d = (struct block *)dst_row;
6446
6447                     for (x = 0; x < row_block_count; ++x)
6448                     {
6449                         d[x].color[0] = s[x].color[0];
6450                         d[x].color[1] = s[x].color[1];
6451                         d[x].control_row[0] = s[x].control_row[3];
6452                         d[x].control_row[1] = s[x].control_row[2];
6453                         d[x].control_row[2] = s[x].control_row[1];
6454                         d[x].control_row[3] = s[x].control_row[0];
6455                     }
6456                     src_row -= src_pitch;
6457                     dst_row += dst_pitch;
6458                 }
6459                 return WINED3D_OK;
6460
6461             case WINED3DFMT_DXT3:
6462                 for (y = 0; y < update_h; y += format->block_height)
6463                 {
6464                     struct block
6465                     {
6466                         WORD alpha_row[4];
6467                         WORD color[2];
6468                         BYTE control_row[4];
6469                     };
6470
6471                     const struct block *s = (const struct block *)src_row;
6472                     struct block *d = (struct block *)dst_row;
6473
6474                     for (x = 0; x < row_block_count; ++x)
6475                     {
6476                         d[x].alpha_row[0] = s[x].alpha_row[3];
6477                         d[x].alpha_row[1] = s[x].alpha_row[2];
6478                         d[x].alpha_row[2] = s[x].alpha_row[1];
6479                         d[x].alpha_row[3] = s[x].alpha_row[0];
6480                         d[x].color[0] = s[x].color[0];
6481                         d[x].color[1] = s[x].color[1];
6482                         d[x].control_row[0] = s[x].control_row[3];
6483                         d[x].control_row[1] = s[x].control_row[2];
6484                         d[x].control_row[2] = s[x].control_row[1];
6485                         d[x].control_row[3] = s[x].control_row[0];
6486                     }
6487                     src_row -= src_pitch;
6488                     dst_row += dst_pitch;
6489                 }
6490                 return WINED3D_OK;
6491
6492             default:
6493                 FIXME("Compressed flip not implemented for format %s.\n",
6494                         debug_d3dformat(format->id));
6495                 return E_NOTIMPL;
6496         }
6497     }
6498
6499     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6500             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6501
6502     return E_NOTIMPL;
6503 }
6504
6505 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6506         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6507         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6508 {
6509     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6510     const struct wined3d_format *src_format, *dst_format;
6511     struct wined3d_surface *orig_src = src_surface;
6512     struct wined3d_mapped_rect dst_map, src_map;
6513     HRESULT hr = WINED3D_OK;
6514     const BYTE *sbuf;
6515     RECT xdst,xsrc;
6516     BYTE *dbuf;
6517     int x, y;
6518
6519     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6520             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6521             flags, fx, debug_d3dtexturefiltertype(filter));
6522
6523     xsrc = *src_rect;
6524
6525     if (!src_surface)
6526     {
6527         RECT full_rect;
6528
6529         full_rect.left = 0;
6530         full_rect.top = 0;
6531         full_rect.right = dst_surface->resource.width;
6532         full_rect.bottom = dst_surface->resource.height;
6533         IntersectRect(&xdst, &full_rect, dst_rect);
6534     }
6535     else
6536     {
6537         BOOL clip_horiz, clip_vert;
6538
6539         xdst = *dst_rect;
6540         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6541         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6542
6543         if (clip_vert || clip_horiz)
6544         {
6545             /* Now check if this is a special case or not... */
6546             if ((flags & WINEDDBLT_DDFX)
6547                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6548                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6549             {
6550                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6551                 return WINED3D_OK;
6552             }
6553
6554             if (clip_horiz)
6555             {
6556                 if (xdst.left < 0)
6557                 {
6558                     xsrc.left -= xdst.left;
6559                     xdst.left = 0;
6560                 }
6561                 if (xdst.right > dst_surface->resource.width)
6562                 {
6563                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6564                     xdst.right = (int)dst_surface->resource.width;
6565                 }
6566             }
6567
6568             if (clip_vert)
6569             {
6570                 if (xdst.top < 0)
6571                 {
6572                     xsrc.top -= xdst.top;
6573                     xdst.top = 0;
6574                 }
6575                 if (xdst.bottom > dst_surface->resource.height)
6576                 {
6577                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6578                     xdst.bottom = (int)dst_surface->resource.height;
6579                 }
6580             }
6581
6582             /* And check if after clipping something is still to be done... */
6583             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6584                     || (xdst.left >= (int)dst_surface->resource.width)
6585                     || (xdst.top >= (int)dst_surface->resource.height)
6586                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6587                     || (xsrc.left >= (int)src_surface->resource.width)
6588                     || (xsrc.top >= (int)src_surface->resource.height))
6589             {
6590                 TRACE("Nothing to be done after clipping.\n");
6591                 return WINED3D_OK;
6592             }
6593         }
6594     }
6595
6596     if (src_surface == dst_surface)
6597     {
6598         wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6599         src_map = dst_map;
6600         src_format = dst_surface->resource.format;
6601         dst_format = src_format;
6602     }
6603     else
6604     {
6605         dst_format = dst_surface->resource.format;
6606         if (src_surface)
6607         {
6608             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6609             {
6610                 src_surface = surface_convert_format(src_surface, dst_format->id);
6611                 if (!src_surface)
6612                 {
6613                     /* The conv function writes a FIXME */
6614                     WARN("Cannot convert source surface format to dest format.\n");
6615                     goto release;
6616                 }
6617             }
6618             wined3d_surface_map(src_surface, &src_map, NULL, WINED3DLOCK_READONLY);
6619             src_format = src_surface->resource.format;
6620         }
6621         else
6622         {
6623             src_format = dst_format;
6624         }
6625         if (dst_rect)
6626             wined3d_surface_map(dst_surface, &dst_map, &xdst, 0);
6627         else
6628             wined3d_surface_map(dst_surface, &dst_map, NULL, 0);
6629     }
6630
6631     bpp = dst_surface->resource.format->byte_count;
6632     srcheight = xsrc.bottom - xsrc.top;
6633     srcwidth = xsrc.right - xsrc.left;
6634     dstheight = xdst.bottom - xdst.top;
6635     dstwidth = xdst.right - xdst.left;
6636     width = (xdst.right - xdst.left) * bpp;
6637
6638     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6639     {
6640         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6641
6642         if (src_surface == dst_surface)
6643         {
6644             FIXME("Only plain blits supported on compressed surfaces.\n");
6645             hr = E_NOTIMPL;
6646             goto release;
6647         }
6648
6649         if (srcheight != dstheight || srcwidth != dstwidth)
6650         {
6651             WARN("Stretching not supported on compressed surfaces.\n");
6652             hr = WINED3DERR_INVALIDCALL;
6653             goto release;
6654         }
6655
6656         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6657         {
6658             WARN("Rectangle not block-aligned.\n");
6659             hr = WINED3DERR_INVALIDCALL;
6660             goto release;
6661         }
6662
6663         hr = surface_cpu_blt_compressed(src_map.data, dst_map.data,
6664                 src_map.row_pitch, dst_map.row_pitch, dstwidth, dstheight,
6665                 src_format, flags, fx);
6666         goto release;
6667     }
6668
6669     if (dst_rect && src_surface != dst_surface)
6670         dbuf = dst_map.data;
6671     else
6672         dbuf = (BYTE *)dst_map.data + (xdst.top * dst_map.row_pitch) + (xdst.left * bpp);
6673
6674     /* First, all the 'source-less' blits */
6675     if (flags & WINEDDBLT_COLORFILL)
6676     {
6677         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, fx->u5.dwFillColor);
6678         flags &= ~WINEDDBLT_COLORFILL;
6679     }
6680
6681     if (flags & WINEDDBLT_DEPTHFILL)
6682     {
6683         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6684     }
6685     if (flags & WINEDDBLT_ROP)
6686     {
6687         /* Catch some degenerate cases here. */
6688         switch (fx->dwROP)
6689         {
6690             case BLACKNESS:
6691                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, 0);
6692                 break;
6693             case 0xAA0029: /* No-op */
6694                 break;
6695             case WHITENESS:
6696                 hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dst_map.row_pitch, ~0U);
6697                 break;
6698             case SRCCOPY: /* Well, we do that below? */
6699                 break;
6700             default:
6701                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6702                 goto error;
6703         }
6704         flags &= ~WINEDDBLT_ROP;
6705     }
6706     if (flags & WINEDDBLT_DDROPS)
6707     {
6708         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6709     }
6710     /* Now the 'with source' blits. */
6711     if (src_surface)
6712     {
6713         const BYTE *sbase;
6714         int sx, xinc, sy, yinc;
6715
6716         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6717             goto release;
6718
6719         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6720                 && (srcwidth != dstwidth || srcheight != dstheight))
6721         {
6722             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6723             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6724         }
6725
6726         sbase = (BYTE *)src_map.data + (xsrc.top * src_map.row_pitch) + xsrc.left * bpp;
6727         xinc = (srcwidth << 16) / dstwidth;
6728         yinc = (srcheight << 16) / dstheight;
6729
6730         if (!flags)
6731         {
6732             /* No effects, we can cheat here. */
6733             if (dstwidth == srcwidth)
6734             {
6735                 if (dstheight == srcheight)
6736                 {
6737                     /* No stretching in either direction. This needs to be as
6738                      * fast as possible. */
6739                     sbuf = sbase;
6740
6741                     /* Check for overlapping surfaces. */
6742                     if (src_surface != dst_surface || xdst.top < xsrc.top
6743                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6744                     {
6745                         /* No overlap, or dst above src, so copy from top downwards. */
6746                         for (y = 0; y < dstheight; ++y)
6747                         {
6748                             memcpy(dbuf, sbuf, width);
6749                             sbuf += src_map.row_pitch;
6750                             dbuf += dst_map.row_pitch;
6751                         }
6752                     }
6753                     else if (xdst.top > xsrc.top)
6754                     {
6755                         /* Copy from bottom upwards. */
6756                         sbuf += src_map.row_pitch * dstheight;
6757                         dbuf += dst_map.row_pitch * dstheight;
6758                         for (y = 0; y < dstheight; ++y)
6759                         {
6760                             sbuf -= src_map.row_pitch;
6761                             dbuf -= dst_map.row_pitch;
6762                             memcpy(dbuf, sbuf, width);
6763                         }
6764                     }
6765                     else
6766                     {
6767                         /* Src and dst overlapping on the same line, use memmove. */
6768                         for (y = 0; y < dstheight; ++y)
6769                         {
6770                             memmove(dbuf, sbuf, width);
6771                             sbuf += src_map.row_pitch;
6772                             dbuf += dst_map.row_pitch;
6773                         }
6774                     }
6775                 }
6776                 else
6777                 {
6778                     /* Stretching in y direction only. */
6779                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6780                     {
6781                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6782                         memcpy(dbuf, sbuf, width);
6783                         dbuf += dst_map.row_pitch;
6784                     }
6785                 }
6786             }
6787             else
6788             {
6789                 /* Stretching in X direction. */
6790                 int last_sy = -1;
6791                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6792                 {
6793                     sbuf = sbase + (sy >> 16) * src_map.row_pitch;
6794
6795                     if ((sy >> 16) == (last_sy >> 16))
6796                     {
6797                         /* This source row is the same as last source row -
6798                          * Copy the already stretched row. */
6799                         memcpy(dbuf, dbuf - dst_map.row_pitch, width);
6800                     }
6801                     else
6802                     {
6803 #define STRETCH_ROW(type) \
6804 do { \
6805     const type *s = (const type *)sbuf; \
6806     type *d = (type *)dbuf; \
6807     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6808         d[x] = s[sx >> 16]; \
6809 } while(0)
6810
6811                         switch(bpp)
6812                         {
6813                             case 1:
6814                                 STRETCH_ROW(BYTE);
6815                                 break;
6816                             case 2:
6817                                 STRETCH_ROW(WORD);
6818                                 break;
6819                             case 4:
6820                                 STRETCH_ROW(DWORD);
6821                                 break;
6822                             case 3:
6823                             {
6824                                 const BYTE *s;
6825                                 BYTE *d = dbuf;
6826                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6827                                 {
6828                                     DWORD pixel;
6829
6830                                     s = sbuf + 3 * (sx >> 16);
6831                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6832                                     d[0] = (pixel      ) & 0xff;
6833                                     d[1] = (pixel >>  8) & 0xff;
6834                                     d[2] = (pixel >> 16) & 0xff;
6835                                     d += 3;
6836                                 }
6837                                 break;
6838                             }
6839                             default:
6840                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6841                                 hr = WINED3DERR_NOTAVAILABLE;
6842                                 goto error;
6843                         }
6844 #undef STRETCH_ROW
6845                     }
6846                     dbuf += dst_map.row_pitch;
6847                     last_sy = sy;
6848                 }
6849             }
6850         }
6851         else
6852         {
6853             LONG dstyinc = dst_map.row_pitch, dstxinc = bpp;
6854             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6855             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6856             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6857             {
6858                 /* The color keying flags are checked for correctness in ddraw */
6859                 if (flags & WINEDDBLT_KEYSRC)
6860                 {
6861                     keylow  = src_surface->src_blt_color_key.color_space_low_value;
6862                     keyhigh = src_surface->src_blt_color_key.color_space_high_value;
6863                 }
6864                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6865                 {
6866                     keylow = fx->ddckSrcColorkey.color_space_low_value;
6867                     keyhigh = fx->ddckSrcColorkey.color_space_high_value;
6868                 }
6869
6870                 if (flags & WINEDDBLT_KEYDEST)
6871                 {
6872                     /* Destination color keys are taken from the source surface! */
6873                     destkeylow = src_surface->dst_blt_color_key.color_space_low_value;
6874                     destkeyhigh = src_surface->dst_blt_color_key.color_space_high_value;
6875                 }
6876                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6877                 {
6878                     destkeylow = fx->ddckDestColorkey.color_space_low_value;
6879                     destkeyhigh = fx->ddckDestColorkey.color_space_high_value;
6880                 }
6881
6882                 if (bpp == 1)
6883                 {
6884                     keymask = 0xff;
6885                 }
6886                 else
6887                 {
6888                     keymask = src_format->red_mask
6889                             | src_format->green_mask
6890                             | src_format->blue_mask;
6891                 }
6892                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6893             }
6894
6895             if (flags & WINEDDBLT_DDFX)
6896             {
6897                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6898                 LONG tmpxy;
6899                 dTopLeft     = dbuf;
6900                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6901                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dst_map.row_pitch);
6902                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6903
6904                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6905                 {
6906                     /* I don't think we need to do anything about this flag */
6907                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6908                 }
6909                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6910                 {
6911                     tmp          = dTopRight;
6912                     dTopRight    = dTopLeft;
6913                     dTopLeft     = tmp;
6914                     tmp          = dBottomRight;
6915                     dBottomRight = dBottomLeft;
6916                     dBottomLeft  = tmp;
6917                     dstxinc = dstxinc * -1;
6918                 }
6919                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6920                 {
6921                     tmp          = dTopLeft;
6922                     dTopLeft     = dBottomLeft;
6923                     dBottomLeft  = tmp;
6924                     tmp          = dTopRight;
6925                     dTopRight    = dBottomRight;
6926                     dBottomRight = tmp;
6927                     dstyinc = dstyinc * -1;
6928                 }
6929                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6930                 {
6931                     /* I don't think we need to do anything about this flag */
6932                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6933                 }
6934                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6935                 {
6936                     tmp          = dBottomRight;
6937                     dBottomRight = dTopLeft;
6938                     dTopLeft     = tmp;
6939                     tmp          = dBottomLeft;
6940                     dBottomLeft  = dTopRight;
6941                     dTopRight    = tmp;
6942                     dstxinc = dstxinc * -1;
6943                     dstyinc = dstyinc * -1;
6944                 }
6945                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6946                 {
6947                     tmp          = dTopLeft;
6948                     dTopLeft     = dBottomLeft;
6949                     dBottomLeft  = dBottomRight;
6950                     dBottomRight = dTopRight;
6951                     dTopRight    = tmp;
6952                     tmpxy   = dstxinc;
6953                     dstxinc = dstyinc;
6954                     dstyinc = tmpxy;
6955                     dstxinc = dstxinc * -1;
6956                 }
6957                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6958                 {
6959                     tmp          = dTopLeft;
6960                     dTopLeft     = dTopRight;
6961                     dTopRight    = dBottomRight;
6962                     dBottomRight = dBottomLeft;
6963                     dBottomLeft  = tmp;
6964                     tmpxy   = dstxinc;
6965                     dstxinc = dstyinc;
6966                     dstyinc = tmpxy;
6967                     dstyinc = dstyinc * -1;
6968                 }
6969                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6970                 {
6971                     /* I don't think we need to do anything about this flag */
6972                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6973                 }
6974                 dbuf = dTopLeft;
6975                 flags &= ~(WINEDDBLT_DDFX);
6976             }
6977
6978 #define COPY_COLORKEY_FX(type) \
6979 do { \
6980     const type *s; \
6981     type *d = (type *)dbuf, *dx, tmp; \
6982     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6983     { \
6984         s = (const type *)(sbase + (sy >> 16) * src_map.row_pitch); \
6985         dx = d; \
6986         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6987         { \
6988             tmp = s[sx >> 16]; \
6989             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6990                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6991             { \
6992                 dx[0] = tmp; \
6993             } \
6994             dx = (type *)(((BYTE *)dx) + dstxinc); \
6995         } \
6996         d = (type *)(((BYTE *)d) + dstyinc); \
6997     } \
6998 } while(0)
6999
7000             switch (bpp)
7001             {
7002                 case 1:
7003                     COPY_COLORKEY_FX(BYTE);
7004                     break;
7005                 case 2:
7006                     COPY_COLORKEY_FX(WORD);
7007                     break;
7008                 case 4:
7009                     COPY_COLORKEY_FX(DWORD);
7010                     break;
7011                 case 3:
7012                 {
7013                     const BYTE *s;
7014                     BYTE *d = dbuf, *dx;
7015                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7016                     {
7017                         sbuf = sbase + (sy >> 16) * src_map.row_pitch;
7018                         dx = d;
7019                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7020                         {
7021                             DWORD pixel, dpixel = 0;
7022                             s = sbuf + 3 * (sx>>16);
7023                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7024                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7025                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7026                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7027                             {
7028                                 dx[0] = (pixel      ) & 0xff;
7029                                 dx[1] = (pixel >>  8) & 0xff;
7030                                 dx[2] = (pixel >> 16) & 0xff;
7031                             }
7032                             dx += dstxinc;
7033                         }
7034                         d += dstyinc;
7035                     }
7036                     break;
7037                 }
7038                 default:
7039                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7040                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7041                     hr = WINED3DERR_NOTAVAILABLE;
7042                     goto error;
7043 #undef COPY_COLORKEY_FX
7044             }
7045         }
7046     }
7047
7048 error:
7049     if (flags && FIXME_ON(d3d_surface))
7050     {
7051         FIXME("\tUnsupported flags: %#x.\n", flags);
7052     }
7053
7054 release:
7055     wined3d_surface_unmap(dst_surface);
7056     if (src_surface && src_surface != dst_surface)
7057         wined3d_surface_unmap(src_surface);
7058     /* Release the converted surface, if any. */
7059     if (src_surface && src_surface != orig_src)
7060         wined3d_surface_decref(src_surface);
7061
7062     return hr;
7063 }
7064
7065 /* Do not call while under the GL lock. */
7066 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7067         const RECT *dst_rect, const struct wined3d_color *color)
7068 {
7069     static const RECT src_rect;
7070     WINEDDBLTFX BltFx;
7071
7072     memset(&BltFx, 0, sizeof(BltFx));
7073     BltFx.dwSize = sizeof(BltFx);
7074     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7075     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7076             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7077 }
7078
7079 /* Do not call while under the GL lock. */
7080 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7081         struct wined3d_surface *surface, const RECT *rect, float depth)
7082 {
7083     FIXME("Depth filling not implemented by cpu_blit.\n");
7084     return WINED3DERR_INVALIDCALL;
7085 }
7086
7087 const struct blit_shader cpu_blit =  {
7088     cpu_blit_alloc,
7089     cpu_blit_free,
7090     cpu_blit_set,
7091     cpu_blit_unset,
7092     cpu_blit_supported,
7093     cpu_blit_color_fill,
7094     cpu_blit_depth_fill,
7095 };
7096
7097 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7098         UINT width, UINT height, UINT level, enum wined3d_multisample_type multisample_type,
7099         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7100         WINED3DPOOL pool, DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops)
7101 {
7102     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7103     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7104     BOOL lockable = flags & WINED3D_SURFACE_MAPPABLE;
7105     unsigned int resource_size;
7106     HRESULT hr;
7107
7108     if (multisample_quality > 0)
7109     {
7110         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7111         multisample_quality = 0;
7112     }
7113
7114     /* Quick lockable sanity check.
7115      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7116      * this function is too deep to need to care about things like this.
7117      * Levels need to be checked too, since they all affect what can be done. */
7118     switch (pool)
7119     {
7120         case WINED3DPOOL_SCRATCH:
7121             if (!lockable)
7122             {
7123                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7124                         "which are mutually exclusive, setting lockable to TRUE.\n");
7125                 lockable = TRUE;
7126             }
7127             break;
7128
7129         case WINED3DPOOL_SYSTEMMEM:
7130             if (!lockable)
7131                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7132             break;
7133
7134         case WINED3DPOOL_MANAGED:
7135             if (usage & WINED3DUSAGE_DYNAMIC)
7136                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7137             break;
7138
7139         case WINED3DPOOL_DEFAULT:
7140             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7141                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7142             break;
7143
7144         default:
7145             FIXME("Unknown pool %#x.\n", pool);
7146             break;
7147     };
7148
7149     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7150         FIXME("Trying to create a render target that isn't in the default pool.\n");
7151
7152     /* FIXME: Check that the format is supported by the device. */
7153
7154     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7155     if (!resource_size)
7156         return WINED3DERR_INVALIDCALL;
7157
7158     surface->surface_type = surface_type;
7159
7160     switch (surface_type)
7161     {
7162         case SURFACE_OPENGL:
7163             surface->surface_ops = &surface_ops;
7164             break;
7165
7166         case SURFACE_GDI:
7167             surface->surface_ops = &gdi_surface_ops;
7168             break;
7169
7170         default:
7171             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7172             return WINED3DERR_INVALIDCALL;
7173     }
7174
7175     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7176             multisample_type, multisample_quality, usage, pool, width, height, 1,
7177             resource_size, parent, parent_ops, &surface_resource_ops);
7178     if (FAILED(hr))
7179     {
7180         WARN("Failed to initialize resource, returning %#x.\n", hr);
7181         return hr;
7182     }
7183
7184     /* "Standalone" surface. */
7185     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7186
7187     surface->texture_level = level;
7188     list_init(&surface->overlays);
7189
7190     /* Flags */
7191     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7192     if (flags & WINED3D_SURFACE_DISCARD)
7193         surface->flags |= SFLAG_DISCARD;
7194     if (flags & WINED3D_SURFACE_PIN_SYSMEM)
7195         surface->flags |= SFLAG_PIN_SYSMEM;
7196     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7197         surface->flags |= SFLAG_LOCKABLE;
7198     /* I'm not sure if this qualifies as a hack or as an optimization. It
7199      * seems reasonable to assume that lockable render targets will get
7200      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7201      * creation. However, the other reason we want to do this is that several
7202      * ddraw applications access surface memory while the surface isn't
7203      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7204      * future locks prevents these from crashing. */
7205     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7206         surface->flags |= SFLAG_DYNLOCK;
7207
7208     /* Mark the texture as dirty so that it gets loaded first time around. */
7209     surface_add_dirty_rect(surface, NULL);
7210     list_init(&surface->renderbuffers);
7211
7212     TRACE("surface %p, memory %p, size %u\n",
7213             surface, surface->resource.allocatedMemory, surface->resource.size);
7214
7215     /* Call the private setup routine */
7216     hr = surface->surface_ops->surface_private_setup(surface);
7217     if (FAILED(hr))
7218     {
7219         ERR("Private setup failed, returning %#x\n", hr);
7220         surface_cleanup(surface);
7221         return hr;
7222     }
7223
7224     /* Similar to lockable rendertargets above, creating the DIB section
7225      * during surface initialization prevents the sysmem pointer from changing
7226      * after a wined3d_surface_getdc() call. */
7227     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7228             && SUCCEEDED(surface_create_dib_section(surface)))
7229     {
7230         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7231         surface->resource.heapMemory = NULL;
7232         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7233     }
7234
7235     return hr;
7236 }
7237
7238 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7239         enum wined3d_format_id format_id, UINT level, DWORD usage, WINED3DPOOL pool,
7240         enum wined3d_multisample_type multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7241         DWORD flags, void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7242 {
7243     struct wined3d_surface *object;
7244     HRESULT hr;
7245
7246     TRACE("device %p, width %u, height %u, format %s, level %u\n",
7247             device, width, height, debug_d3dformat(format_id), level);
7248     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7249             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7250     TRACE("surface_type %#x, flags %#x, parent %p, parent_ops %p.\n", surface_type, flags, parent, parent_ops);
7251
7252     if (surface_type == SURFACE_OPENGL && !device->adapter)
7253     {
7254         ERR("OpenGL surfaces are not available without OpenGL.\n");
7255         return WINED3DERR_NOTAVAILABLE;
7256     }
7257
7258     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7259     if (!object)
7260     {
7261         ERR("Failed to allocate surface memory.\n");
7262         return WINED3DERR_OUTOFVIDEOMEMORY;
7263     }
7264
7265     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level,
7266             multisample_type, multisample_quality, device, usage, format_id, pool, flags, parent, parent_ops);
7267     if (FAILED(hr))
7268     {
7269         WARN("Failed to initialize surface, returning %#x.\n", hr);
7270         HeapFree(GetProcessHeap(), 0, object);
7271         return hr;
7272     }
7273
7274     TRACE("Created surface %p.\n", object);
7275     *surface = object;
7276
7277     return hr;
7278 }