mshtml: Wine Gecko 1.4 release.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         DeleteDC(surface->hDC);
101         DeleteObject(surface->dib.DIBsection);
102         surface->dib.bitmap_data = NULL;
103         surface->resource.allocatedMemory = NULL;
104     }
105
106     if (surface->flags & SFLAG_USERPTR)
107         wined3d_surface_set_mem(surface, NULL);
108     if (surface->overlay_dest)
109         list_remove(&surface->overlay_entry);
110
111     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
112     {
113         list_remove(&overlay->overlay_entry);
114         overlay->overlay_dest = NULL;
115     }
116
117     resource_cleanup(&surface->resource);
118 }
119
120 void surface_update_draw_binding(struct wined3d_surface *surface)
121 {
122     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
123         surface->draw_binding = SFLAG_INDRAWABLE;
124     else if (surface->resource.multisample_type)
125         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
126     else
127         surface->draw_binding = SFLAG_INTEXTURE;
128 }
129
130 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
131 {
132     TRACE("surface %p, container %p.\n", surface, container);
133
134     if (!container && type != WINED3D_CONTAINER_NONE)
135         ERR("Setting NULL container of type %#x.\n", type);
136
137     if (type == WINED3D_CONTAINER_SWAPCHAIN)
138     {
139         surface->get_drawable_size = get_drawable_size_swapchain;
140     }
141     else
142     {
143         switch (wined3d_settings.offscreen_rendering_mode)
144         {
145             case ORM_FBO:
146                 surface->get_drawable_size = get_drawable_size_fbo;
147                 break;
148
149             case ORM_BACKBUFFER:
150                 surface->get_drawable_size = get_drawable_size_backbuffer;
151                 break;
152
153             default:
154                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
155                 return;
156         }
157     }
158
159     surface->container.type = type;
160     surface->container.u.base = container;
161     surface_update_draw_binding(surface);
162 }
163
164 struct blt_info
165 {
166     GLenum binding;
167     GLenum bind_target;
168     enum tex_types tex_type;
169     GLfloat coords[4][3];
170 };
171
172 struct float_rect
173 {
174     float l;
175     float t;
176     float r;
177     float b;
178 };
179
180 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
181 {
182     f->l = ((r->left * 2.0f) / w) - 1.0f;
183     f->t = ((r->top * 2.0f) / h) - 1.0f;
184     f->r = ((r->right * 2.0f) / w) - 1.0f;
185     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
186 }
187
188 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
189 {
190     GLfloat (*coords)[3] = info->coords;
191     struct float_rect f;
192
193     switch (target)
194     {
195         default:
196             FIXME("Unsupported texture target %#x\n", target);
197             /* Fall back to GL_TEXTURE_2D */
198         case GL_TEXTURE_2D:
199             info->binding = GL_TEXTURE_BINDING_2D;
200             info->bind_target = GL_TEXTURE_2D;
201             info->tex_type = tex_2d;
202             coords[0][0] = (float)rect->left / w;
203             coords[0][1] = (float)rect->top / h;
204             coords[0][2] = 0.0f;
205
206             coords[1][0] = (float)rect->right / w;
207             coords[1][1] = (float)rect->top / h;
208             coords[1][2] = 0.0f;
209
210             coords[2][0] = (float)rect->left / w;
211             coords[2][1] = (float)rect->bottom / h;
212             coords[2][2] = 0.0f;
213
214             coords[3][0] = (float)rect->right / w;
215             coords[3][1] = (float)rect->bottom / h;
216             coords[3][2] = 0.0f;
217             break;
218
219         case GL_TEXTURE_RECTANGLE_ARB:
220             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
221             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
222             info->tex_type = tex_rect;
223             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
224             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
225             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
226             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
227             break;
228
229         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
230             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
231             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
232             info->tex_type = tex_cube;
233             cube_coords_float(rect, w, h, &f);
234
235             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
236             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
237             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
238             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
239             break;
240
241         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
242             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
243             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
244             info->tex_type = tex_cube;
245             cube_coords_float(rect, w, h, &f);
246
247             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
248             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
249             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
250             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
251             break;
252
253         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
254             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
255             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
256             info->tex_type = tex_cube;
257             cube_coords_float(rect, w, h, &f);
258
259             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
260             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
261             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
262             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
263             break;
264
265         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
266             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
267             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
268             info->tex_type = tex_cube;
269             cube_coords_float(rect, w, h, &f);
270
271             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
272             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
273             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
274             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
275             break;
276
277         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
278             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
279             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
280             info->tex_type = tex_cube;
281             cube_coords_float(rect, w, h, &f);
282
283             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
284             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
285             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
286             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
287             break;
288
289         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
290             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
291             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
292             info->tex_type = tex_cube;
293             cube_coords_float(rect, w, h, &f);
294
295             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
296             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
297             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
298             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
299             break;
300     }
301 }
302
303 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
304 {
305     if (rect_in)
306         *rect_out = *rect_in;
307     else
308     {
309         rect_out->left = 0;
310         rect_out->top = 0;
311         rect_out->right = surface->resource.width;
312         rect_out->bottom = surface->resource.height;
313     }
314 }
315
316 /* GL locking and context activation is done by the caller */
317 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
318         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
319 {
320     struct blt_info info;
321
322     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
323
324     glEnable(info.bind_target);
325     checkGLcall("glEnable(bind_target)");
326
327     context_bind_texture(context, info.bind_target, src_surface->texture_name);
328
329     /* Filtering for StretchRect */
330     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
331             wined3d_gl_mag_filter(magLookup, Filter));
332     checkGLcall("glTexParameteri");
333     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
334             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
337     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
338     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
339         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
340     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
341     checkGLcall("glTexEnvi");
342
343     /* Draw a quad */
344     glBegin(GL_TRIANGLE_STRIP);
345     glTexCoord3fv(info.coords[0]);
346     glVertex2i(dst_rect->left, dst_rect->top);
347
348     glTexCoord3fv(info.coords[1]);
349     glVertex2i(dst_rect->right, dst_rect->top);
350
351     glTexCoord3fv(info.coords[2]);
352     glVertex2i(dst_rect->left, dst_rect->bottom);
353
354     glTexCoord3fv(info.coords[3]);
355     glVertex2i(dst_rect->right, dst_rect->bottom);
356     glEnd();
357
358     /* Unbind the texture */
359     context_bind_texture(context, info.bind_target, 0);
360
361     /* We changed the filtering settings on the texture. Inform the
362      * container about this to get the filters reset properly next draw. */
363     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
364     {
365         struct wined3d_texture *texture = src_surface->container.u.texture;
366         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
367         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
368         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
369         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
370     }
371 }
372
373 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
374 {
375     const struct wined3d_format *format = surface->resource.format;
376     SYSTEM_INFO sysInfo;
377     BITMAPINFO *b_info;
378     int extraline = 0;
379     DWORD *masks;
380     UINT usage;
381     HDC dc;
382
383     TRACE("surface %p.\n", surface);
384
385     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
386     {
387         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
388         return WINED3DERR_INVALIDCALL;
389     }
390
391     switch (format->byte_count)
392     {
393         case 2:
394         case 4:
395             /* Allocate extra space to store the RGB bit masks. */
396             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
397             break;
398
399         case 3:
400             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
401             break;
402
403         default:
404             /* Allocate extra space for a palette. */
405             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
406                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
407             break;
408     }
409
410     if (!b_info)
411         return E_OUTOFMEMORY;
412
413     /* Some applications access the surface in via DWORDs, and do not take
414      * the necessary care at the end of the surface. So we need at least
415      * 4 extra bytes at the end of the surface. Check against the page size,
416      * if the last page used for the surface has at least 4 spare bytes we're
417      * safe, otherwise add an extra line to the DIB section. */
418     GetSystemInfo(&sysInfo);
419     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
420     {
421         extraline = 1;
422         TRACE("Adding an extra line to the DIB section.\n");
423     }
424
425     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
426     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
427     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
428     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
429     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
430             * wined3d_surface_get_pitch(surface);
431     b_info->bmiHeader.biPlanes = 1;
432     b_info->bmiHeader.biBitCount = format->byte_count * 8;
433
434     b_info->bmiHeader.biXPelsPerMeter = 0;
435     b_info->bmiHeader.biYPelsPerMeter = 0;
436     b_info->bmiHeader.biClrUsed = 0;
437     b_info->bmiHeader.biClrImportant = 0;
438
439     /* Get the bit masks */
440     masks = (DWORD *)b_info->bmiColors;
441     switch (surface->resource.format->id)
442     {
443         case WINED3DFMT_B8G8R8_UNORM:
444             usage = DIB_RGB_COLORS;
445             b_info->bmiHeader.biCompression = BI_RGB;
446             break;
447
448         case WINED3DFMT_B5G5R5X1_UNORM:
449         case WINED3DFMT_B5G5R5A1_UNORM:
450         case WINED3DFMT_B4G4R4A4_UNORM:
451         case WINED3DFMT_B4G4R4X4_UNORM:
452         case WINED3DFMT_B2G3R3_UNORM:
453         case WINED3DFMT_B2G3R3A8_UNORM:
454         case WINED3DFMT_R10G10B10A2_UNORM:
455         case WINED3DFMT_R8G8B8A8_UNORM:
456         case WINED3DFMT_R8G8B8X8_UNORM:
457         case WINED3DFMT_B10G10R10A2_UNORM:
458         case WINED3DFMT_B5G6R5_UNORM:
459         case WINED3DFMT_R16G16B16A16_UNORM:
460             usage = 0;
461             b_info->bmiHeader.biCompression = BI_BITFIELDS;
462             masks[0] = format->red_mask;
463             masks[1] = format->green_mask;
464             masks[2] = format->blue_mask;
465             break;
466
467         default:
468             /* Don't know palette */
469             b_info->bmiHeader.biCompression = BI_RGB;
470             usage = 0;
471             break;
472     }
473
474     if (!(dc = GetDC(0)))
475     {
476         HeapFree(GetProcessHeap(), 0, b_info);
477         return HRESULT_FROM_WIN32(GetLastError());
478     }
479
480     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
481             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
482             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
483     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
484     ReleaseDC(0, dc);
485
486     if (!surface->dib.DIBsection)
487     {
488         ERR("Failed to create DIB section.\n");
489         HeapFree(GetProcessHeap(), 0, b_info);
490         return HRESULT_FROM_WIN32(GetLastError());
491     }
492
493     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
494     /* Copy the existing surface to the dib section. */
495     if (surface->resource.allocatedMemory)
496     {
497         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
498                 surface->resource.height * wined3d_surface_get_pitch(surface));
499     }
500     else
501     {
502         /* This is to make maps read the GL texture although memory is allocated. */
503         surface->flags &= ~SFLAG_INSYSMEM;
504     }
505     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
506
507     HeapFree(GetProcessHeap(), 0, b_info);
508
509     /* Now allocate a DC. */
510     surface->hDC = CreateCompatibleDC(0);
511     SelectObject(surface->hDC, surface->dib.DIBsection);
512     TRACE("Using wined3d palette %p.\n", surface->palette);
513     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
514
515     surface->flags |= SFLAG_DIBSECTION;
516
517     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
518     surface->resource.heapMemory = NULL;
519
520     return WINED3D_OK;
521 }
522
523 static void surface_prepare_system_memory(struct wined3d_surface *surface)
524 {
525     struct wined3d_device *device = surface->resource.device;
526     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
527
528     TRACE("surface %p.\n", surface);
529
530     /* Performance optimization: Count how often a surface is locked, if it is
531      * locked regularly do not throw away the system memory copy. This avoids
532      * the need to download the surface from OpenGL all the time. The surface
533      * is still downloaded if the OpenGL texture is changed. */
534     if (!(surface->flags & SFLAG_DYNLOCK))
535     {
536         if (++surface->lockCount > MAXLOCKCOUNT)
537         {
538             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
539             surface->flags |= SFLAG_DYNLOCK;
540         }
541     }
542
543     /* Create a PBO for dynamically locked surfaces but don't do it for
544      * converted or NPOT surfaces. Also don't create a PBO for systemmem
545      * surfaces. */
546     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
547             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
548             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
549     {
550         struct wined3d_context *context;
551         GLenum error;
552
553         context = context_acquire(device, NULL);
554         ENTER_GL();
555
556         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
557         error = glGetError();
558         if (!surface->pbo || error != GL_NO_ERROR)
559             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
560
561         TRACE("Binding PBO %u.\n", surface->pbo);
562
563         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
564         checkGLcall("glBindBufferARB");
565
566         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
567                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
568         checkGLcall("glBufferDataARB");
569
570         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
571         checkGLcall("glBindBufferARB");
572
573         /* We don't need the system memory anymore and we can't even use it for PBOs. */
574         if (!(surface->flags & SFLAG_CLIENT))
575         {
576             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
577             surface->resource.heapMemory = NULL;
578         }
579         surface->resource.allocatedMemory = NULL;
580         surface->flags |= SFLAG_PBO;
581         LEAVE_GL();
582         context_release(context);
583     }
584     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
585     {
586         /* Whatever surface we have, make sure that there is memory allocated
587          * for the downloaded copy, or a PBO to map. */
588         if (!surface->resource.heapMemory)
589             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
590
591         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
592                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
593
594         if (surface->flags & SFLAG_INSYSMEM)
595             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
596     }
597 }
598
599 static void surface_evict_sysmem(struct wined3d_surface *surface)
600 {
601     if (surface->flags & SFLAG_DONOTFREE)
602         return;
603
604     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
605     surface->resource.allocatedMemory = NULL;
606     surface->resource.heapMemory = NULL;
607     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
608 }
609
610 /* Context activation is done by the caller. */
611 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
612         struct wined3d_context *context, BOOL srgb)
613 {
614     struct wined3d_device *device = surface->resource.device;
615     DWORD active_sampler;
616
617     /* We don't need a specific texture unit, but after binding the texture
618      * the current unit is dirty. Read the unit back instead of switching to
619      * 0, this avoids messing around with the state manager's GL states. The
620      * current texture unit should always be a valid one.
621      *
622      * To be more specific, this is tricky because we can implicitly be
623      * called from sampler() in state.c. This means we can't touch anything
624      * other than whatever happens to be the currently active texture, or we
625      * would risk marking already applied sampler states dirty again. */
626     active_sampler = device->rev_tex_unit_map[context->active_texture];
627
628     if (active_sampler != WINED3D_UNMAPPED_STAGE)
629         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
630     surface_bind(surface, context, srgb);
631 }
632
633 static void surface_force_reload(struct wined3d_surface *surface)
634 {
635     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
636 }
637
638 static void surface_release_client_storage(struct wined3d_surface *surface)
639 {
640     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
641
642     ENTER_GL();
643     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
644     if (surface->texture_name)
645     {
646         surface_bind_and_dirtify(surface, context, FALSE);
647         glTexImage2D(surface->texture_target, surface->texture_level,
648                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
649     }
650     if (surface->texture_name_srgb)
651     {
652         surface_bind_and_dirtify(surface, context, TRUE);
653         glTexImage2D(surface->texture_target, surface->texture_level,
654                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
655     }
656     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
657     LEAVE_GL();
658
659     context_release(context);
660
661     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
662     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
663     surface_force_reload(surface);
664 }
665
666 static HRESULT surface_private_setup(struct wined3d_surface *surface)
667 {
668     /* TODO: Check against the maximum texture sizes supported by the video card. */
669     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
670     unsigned int pow2Width, pow2Height;
671
672     TRACE("surface %p.\n", surface);
673
674     surface->texture_name = 0;
675     surface->texture_target = GL_TEXTURE_2D;
676
677     /* Non-power2 support */
678     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
679     {
680         pow2Width = surface->resource.width;
681         pow2Height = surface->resource.height;
682     }
683     else
684     {
685         /* Find the nearest pow2 match */
686         pow2Width = pow2Height = 1;
687         while (pow2Width < surface->resource.width)
688             pow2Width <<= 1;
689         while (pow2Height < surface->resource.height)
690             pow2Height <<= 1;
691     }
692     surface->pow2Width = pow2Width;
693     surface->pow2Height = pow2Height;
694
695     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
696     {
697         /* TODO: Add support for non power two compressed textures. */
698         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
699         {
700             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
701                   surface, surface->resource.width, surface->resource.height);
702             return WINED3DERR_NOTAVAILABLE;
703         }
704     }
705
706     if (pow2Width != surface->resource.width
707             || pow2Height != surface->resource.height)
708     {
709         surface->flags |= SFLAG_NONPOW2;
710     }
711
712     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
713             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
714     {
715         /* One of three options:
716          * 1: Do the same as we do with NPOT and scale the texture, (any
717          *    texture ops would require the texture to be scaled which is
718          *    potentially slow)
719          * 2: Set the texture to the maximum size (bad idea).
720          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
721          * 4: Create the surface, but allow it to be used only for DirectDraw
722          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
723          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
724          *    the render target. */
725         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
726         {
727             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
728             return WINED3DERR_NOTAVAILABLE;
729         }
730
731         /* We should never use this surface in combination with OpenGL! */
732         TRACE("Creating an oversized surface: %ux%u.\n",
733                 surface->pow2Width, surface->pow2Height);
734     }
735     else
736     {
737         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
738          * and EXT_PALETTED_TEXTURE is used in combination with texture
739          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
740          * EXT_PALETTED_TEXTURE doesn't work in combination with
741          * ARB_TEXTURE_RECTANGLE. */
742         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
743                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
744                 && gl_info->supported[EXT_PALETTED_TEXTURE]
745                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
746         {
747             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
748             surface->pow2Width = surface->resource.width;
749             surface->pow2Height = surface->resource.height;
750             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
751         }
752     }
753
754     switch (wined3d_settings.offscreen_rendering_mode)
755     {
756         case ORM_FBO:
757             surface->get_drawable_size = get_drawable_size_fbo;
758             break;
759
760         case ORM_BACKBUFFER:
761             surface->get_drawable_size = get_drawable_size_backbuffer;
762             break;
763
764         default:
765             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
766             return WINED3DERR_INVALIDCALL;
767     }
768
769     surface->flags |= SFLAG_INSYSMEM;
770
771     return WINED3D_OK;
772 }
773
774 static void surface_realize_palette(struct wined3d_surface *surface)
775 {
776     struct wined3d_palette *palette = surface->palette;
777
778     TRACE("surface %p.\n", surface);
779
780     if (!palette) return;
781
782     if (surface->resource.format->id == WINED3DFMT_P8_UINT
783             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
784     {
785         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
786         {
787             /* Make sure the texture is up to date. This call doesn't do
788              * anything if the texture is already up to date. */
789             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
790
791             /* We want to force a palette refresh, so mark the drawable as not being up to date */
792             if (!surface_is_offscreen(surface))
793                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
794         }
795         else
796         {
797             if (!(surface->flags & SFLAG_INSYSMEM))
798             {
799                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
800                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
801             }
802             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
803         }
804     }
805
806     if (surface->flags & SFLAG_DIBSECTION)
807     {
808         RGBQUAD col[256];
809         unsigned int i;
810
811         TRACE("Updating the DC's palette.\n");
812
813         for (i = 0; i < 256; ++i)
814         {
815             col[i].rgbRed   = palette->palents[i].peRed;
816             col[i].rgbGreen = palette->palents[i].peGreen;
817             col[i].rgbBlue  = palette->palents[i].peBlue;
818             col[i].rgbReserved = 0;
819         }
820         SetDIBColorTable(surface->hDC, 0, 256, col);
821     }
822
823     /* Propagate the changes to the drawable when we have a palette. */
824     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
825         surface_load_location(surface, surface->draw_binding, NULL);
826 }
827
828 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
829 {
830     HRESULT hr;
831
832     /* If there's no destination surface there is nothing to do. */
833     if (!surface->overlay_dest)
834         return WINED3D_OK;
835
836     /* Blt calls ModifyLocation on the dest surface, which in turn calls
837      * DrawOverlay to update the overlay. Prevent an endless recursion. */
838     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
839         return WINED3D_OK;
840
841     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
842     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
843             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
844     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
845
846     return hr;
847 }
848
849 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
850 {
851     struct wined3d_device *device = surface->resource.device;
852     const RECT *pass_rect = rect;
853
854     TRACE("surface %p, rect %s, flags %#x.\n",
855             surface, wine_dbgstr_rect(rect), flags);
856
857     if (flags & WINED3DLOCK_DISCARD)
858     {
859         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
860         surface_prepare_system_memory(surface);
861         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
862     }
863     else
864     {
865         /* surface_load_location() does not check if the rectangle specifies
866          * the full surface. Most callers don't need that, so do it here. */
867         if (rect && !rect->top && !rect->left
868                 && rect->right == surface->resource.width
869                 && rect->bottom == surface->resource.height)
870             pass_rect = NULL;
871         surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
872     }
873
874     if (surface->flags & SFLAG_PBO)
875     {
876         const struct wined3d_gl_info *gl_info;
877         struct wined3d_context *context;
878
879         context = context_acquire(device, NULL);
880         gl_info = context->gl_info;
881
882         ENTER_GL();
883         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
884         checkGLcall("glBindBufferARB");
885
886         /* This shouldn't happen but could occur if some other function
887          * didn't handle the PBO properly. */
888         if (surface->resource.allocatedMemory)
889             ERR("The surface already has PBO memory allocated.\n");
890
891         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
892         checkGLcall("glMapBufferARB");
893
894         /* Make sure the PBO isn't set anymore in order not to break non-PBO
895          * calls. */
896         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
897         checkGLcall("glBindBufferARB");
898
899         LEAVE_GL();
900         context_release(context);
901     }
902
903     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
904     {
905         if (!rect)
906             surface_add_dirty_rect(surface, NULL);
907         else
908         {
909             WINED3DBOX b;
910
911             b.Left = rect->left;
912             b.Top = rect->top;
913             b.Right = rect->right;
914             b.Bottom = rect->bottom;
915             b.Front = 0;
916             b.Back = 1;
917             surface_add_dirty_rect(surface, &b);
918         }
919     }
920 }
921
922 static void surface_unmap(struct wined3d_surface *surface)
923 {
924     struct wined3d_device *device = surface->resource.device;
925     BOOL fullsurface;
926
927     TRACE("surface %p.\n", surface);
928
929     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
930
931     if (surface->flags & SFLAG_PBO)
932     {
933         const struct wined3d_gl_info *gl_info;
934         struct wined3d_context *context;
935
936         TRACE("Freeing PBO memory.\n");
937
938         context = context_acquire(device, NULL);
939         gl_info = context->gl_info;
940
941         ENTER_GL();
942         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
943         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
944         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
945         checkGLcall("glUnmapBufferARB");
946         LEAVE_GL();
947         context_release(context);
948
949         surface->resource.allocatedMemory = NULL;
950     }
951
952     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
953
954     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
955     {
956         TRACE("Not dirtified, nothing to do.\n");
957         goto done;
958     }
959
960     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
961             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
962     {
963         if (!surface->dirtyRect.left && !surface->dirtyRect.top
964                 && surface->dirtyRect.right == surface->resource.width
965                 && surface->dirtyRect.bottom == surface->resource.height)
966         {
967             fullsurface = TRUE;
968         }
969         else
970         {
971             /* TODO: Proper partial rectangle tracking. */
972             fullsurface = FALSE;
973             surface->flags |= SFLAG_INSYSMEM;
974         }
975
976         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
977
978         /* Partial rectangle tracking is not commonly implemented, it is only
979          * done for render targets. INSYSMEM was set before to tell
980          * surface_load_location() where to read the rectangle from.
981          * Indrawable is set because all modifications from the partial
982          * sysmem copy are written back to the drawable, thus the surface is
983          * merged again in the drawable. The sysmem copy is not fully up to
984          * date because only a subrectangle was read in Map(). */
985         if (!fullsurface)
986         {
987             surface_modify_location(surface, surface->draw_binding, TRUE);
988             surface_evict_sysmem(surface);
989         }
990
991         surface->dirtyRect.left = surface->resource.width;
992         surface->dirtyRect.top = surface->resource.height;
993         surface->dirtyRect.right = 0;
994         surface->dirtyRect.bottom = 0;
995     }
996     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
997     {
998         FIXME("Depth / stencil buffer locking is not implemented.\n");
999     }
1000
1001 done:
1002     /* Overlays have to be redrawn manually after changes with the GL implementation */
1003     if (surface->overlay_dest)
1004         surface_draw_overlay(surface);
1005 }
1006
1007 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1008 {
1009     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1010         return FALSE;
1011     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1012         return FALSE;
1013     return TRUE;
1014 }
1015
1016 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1017         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1018 {
1019     const struct wined3d_gl_info *gl_info;
1020     struct wined3d_context *context;
1021     DWORD src_mask, dst_mask;
1022     GLbitfield gl_mask;
1023
1024     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1025             device, src_surface, wine_dbgstr_rect(src_rect),
1026             dst_surface, wine_dbgstr_rect(dst_rect));
1027
1028     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1029     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1030
1031     if (src_mask != dst_mask)
1032     {
1033         ERR("Incompatible formats %s and %s.\n",
1034                 debug_d3dformat(src_surface->resource.format->id),
1035                 debug_d3dformat(dst_surface->resource.format->id));
1036         return;
1037     }
1038
1039     if (!src_mask)
1040     {
1041         ERR("Not a depth / stencil format: %s.\n",
1042                 debug_d3dformat(src_surface->resource.format->id));
1043         return;
1044     }
1045
1046     gl_mask = 0;
1047     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1048         gl_mask |= GL_DEPTH_BUFFER_BIT;
1049     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1050         gl_mask |= GL_STENCIL_BUFFER_BIT;
1051
1052     /* Make sure the locations are up-to-date. Loading the destination
1053      * surface isn't required if the entire surface is overwritten. */
1054     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1055     if (!surface_is_full_rect(dst_surface, dst_rect))
1056         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1057
1058     context = context_acquire(device, NULL);
1059     if (!context->valid)
1060     {
1061         context_release(context);
1062         WARN("Invalid context, skipping blit.\n");
1063         return;
1064     }
1065
1066     gl_info = context->gl_info;
1067
1068     ENTER_GL();
1069
1070     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1071     glReadBuffer(GL_NONE);
1072     checkGLcall("glReadBuffer()");
1073     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1074
1075     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1076     context_set_draw_buffer(context, GL_NONE);
1077     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1078
1079     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1080     {
1081         glDepthMask(GL_TRUE);
1082         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1083     }
1084     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1085     {
1086         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1087         {
1088             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1089             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1090         }
1091         glStencilMask(~0U);
1092         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1093     }
1094
1095     glDisable(GL_SCISSOR_TEST);
1096     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1097
1098     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1099             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1100     checkGLcall("glBlitFramebuffer()");
1101
1102     LEAVE_GL();
1103
1104     if (wined3d_settings.strict_draw_ordering)
1105         wglFlush(); /* Flush to ensure ordering across contexts. */
1106
1107     context_release(context);
1108 }
1109
1110 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1111  * Depth / stencil is not supported. */
1112 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1113         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1114         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1115 {
1116     const struct wined3d_gl_info *gl_info;
1117     struct wined3d_context *context;
1118     RECT src_rect, dst_rect;
1119     GLenum gl_filter;
1120     GLenum buffer;
1121
1122     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1123     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1124             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1125     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1126             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1127
1128     src_rect = *src_rect_in;
1129     dst_rect = *dst_rect_in;
1130
1131     switch (filter)
1132     {
1133         case WINED3DTEXF_LINEAR:
1134             gl_filter = GL_LINEAR;
1135             break;
1136
1137         default:
1138             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1139         case WINED3DTEXF_NONE:
1140         case WINED3DTEXF_POINT:
1141             gl_filter = GL_NEAREST;
1142             break;
1143     }
1144
1145     /* Resolve the source surface first if needed. */
1146     if (src_location == SFLAG_INRB_MULTISAMPLE
1147             && (src_surface->resource.format->id != dst_surface->resource.format->id
1148                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1149                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1150         src_location = SFLAG_INRB_RESOLVED;
1151
1152     /* Make sure the locations are up-to-date. Loading the destination
1153      * surface isn't required if the entire surface is overwritten. (And is
1154      * in fact harmful if we're being called by surface_load_location() with
1155      * the purpose of loading the destination surface.) */
1156     surface_load_location(src_surface, src_location, NULL);
1157     if (!surface_is_full_rect(dst_surface, &dst_rect))
1158         surface_load_location(dst_surface, dst_location, NULL);
1159
1160     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1161     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1162     else context = context_acquire(device, NULL);
1163
1164     if (!context->valid)
1165     {
1166         context_release(context);
1167         WARN("Invalid context, skipping blit.\n");
1168         return;
1169     }
1170
1171     gl_info = context->gl_info;
1172
1173     if (src_location == SFLAG_INDRAWABLE)
1174     {
1175         TRACE("Source surface %p is onscreen.\n", src_surface);
1176         buffer = surface_get_gl_buffer(src_surface);
1177         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1178     }
1179     else
1180     {
1181         TRACE("Source surface %p is offscreen.\n", src_surface);
1182         buffer = GL_COLOR_ATTACHMENT0;
1183     }
1184
1185     ENTER_GL();
1186     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1187     glReadBuffer(buffer);
1188     checkGLcall("glReadBuffer()");
1189     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1190     LEAVE_GL();
1191
1192     if (dst_location == SFLAG_INDRAWABLE)
1193     {
1194         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1195         buffer = surface_get_gl_buffer(dst_surface);
1196         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1197     }
1198     else
1199     {
1200         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1201         buffer = GL_COLOR_ATTACHMENT0;
1202     }
1203
1204     ENTER_GL();
1205     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1206     context_set_draw_buffer(context, buffer);
1207     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1208     context_invalidate_state(context, STATE_FRAMEBUFFER);
1209
1210     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1211     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1212     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1213     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1214     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1215
1216     glDisable(GL_SCISSOR_TEST);
1217     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1218
1219     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1220             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1221     checkGLcall("glBlitFramebuffer()");
1222
1223     LEAVE_GL();
1224
1225     if (wined3d_settings.strict_draw_ordering
1226             || (dst_location == SFLAG_INDRAWABLE
1227             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1228         wglFlush();
1229
1230     context_release(context);
1231 }
1232
1233 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1234         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1235         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1236 {
1237     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1238         return FALSE;
1239
1240     /* Source and/or destination need to be on the GL side */
1241     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1242         return FALSE;
1243
1244     switch (blit_op)
1245     {
1246         case WINED3D_BLIT_OP_COLOR_BLIT:
1247             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1248                 return FALSE;
1249             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1250                 return FALSE;
1251             break;
1252
1253         case WINED3D_BLIT_OP_DEPTH_BLIT:
1254             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1255                 return FALSE;
1256             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1257                 return FALSE;
1258             break;
1259
1260         default:
1261             return FALSE;
1262     }
1263
1264     if (!(src_format->id == dst_format->id
1265             || (is_identity_fixup(src_format->color_fixup)
1266             && is_identity_fixup(dst_format->color_fixup))))
1267         return FALSE;
1268
1269     return TRUE;
1270 }
1271
1272 /* This function checks if the primary render target uses the 8bit paletted format. */
1273 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1274 {
1275     if (device->fb.render_targets && device->fb.render_targets[0])
1276     {
1277         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1278         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1279                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1280             return TRUE;
1281     }
1282     return FALSE;
1283 }
1284
1285 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1286         DWORD color, WINED3DCOLORVALUE *float_color)
1287 {
1288     const struct wined3d_format *format = surface->resource.format;
1289     const struct wined3d_device *device = surface->resource.device;
1290
1291     switch (format->id)
1292     {
1293         case WINED3DFMT_P8_UINT:
1294             if (surface->palette)
1295             {
1296                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1297                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1298                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1299             }
1300             else
1301             {
1302                 float_color->r = 0.0f;
1303                 float_color->g = 0.0f;
1304                 float_color->b = 0.0f;
1305             }
1306             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1307             break;
1308
1309         case WINED3DFMT_B5G6R5_UNORM:
1310             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1311             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1312             float_color->b = (color & 0x1f) / 31.0f;
1313             float_color->a = 1.0f;
1314             break;
1315
1316         case WINED3DFMT_B8G8R8_UNORM:
1317         case WINED3DFMT_B8G8R8X8_UNORM:
1318             float_color->r = D3DCOLOR_R(color);
1319             float_color->g = D3DCOLOR_G(color);
1320             float_color->b = D3DCOLOR_B(color);
1321             float_color->a = 1.0f;
1322             break;
1323
1324         case WINED3DFMT_B8G8R8A8_UNORM:
1325             float_color->r = D3DCOLOR_R(color);
1326             float_color->g = D3DCOLOR_G(color);
1327             float_color->b = D3DCOLOR_B(color);
1328             float_color->a = D3DCOLOR_A(color);
1329             break;
1330
1331         default:
1332             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1333             return FALSE;
1334     }
1335
1336     return TRUE;
1337 }
1338
1339 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1340 {
1341     const struct wined3d_format *format = surface->resource.format;
1342
1343     switch (format->id)
1344     {
1345         case WINED3DFMT_S1_UINT_D15_UNORM:
1346             *float_depth = depth / (float)0x00007fff;
1347             break;
1348
1349         case WINED3DFMT_D16_UNORM:
1350             *float_depth = depth / (float)0x0000ffff;
1351             break;
1352
1353         case WINED3DFMT_D24_UNORM_S8_UINT:
1354         case WINED3DFMT_X8D24_UNORM:
1355             *float_depth = depth / (float)0x00ffffff;
1356             break;
1357
1358         case WINED3DFMT_D32_UNORM:
1359             *float_depth = depth / (float)0xffffffff;
1360             break;
1361
1362         default:
1363             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1364             return FALSE;
1365     }
1366
1367     return TRUE;
1368 }
1369
1370 /* Do not call while under the GL lock. */
1371 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1372 {
1373     const struct wined3d_resource *resource = &surface->resource;
1374     struct wined3d_device *device = resource->device;
1375     const struct blit_shader *blitter;
1376
1377     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1378             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1379     if (!blitter)
1380     {
1381         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1382         return WINED3DERR_INVALIDCALL;
1383     }
1384
1385     return blitter->depth_fill(device, surface, rect, depth);
1386 }
1387
1388 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1389         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1390 {
1391     struct wined3d_device *device = src_surface->resource.device;
1392
1393     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1394             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1395             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1396         return WINED3DERR_INVALIDCALL;
1397
1398     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1399
1400     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1401             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1402     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1403
1404     return WINED3D_OK;
1405 }
1406
1407 /* Do not call while under the GL lock. */
1408 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1409         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1410         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1411 {
1412     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1413     struct wined3d_device *device = dst_surface->resource.device;
1414     DWORD src_ds_flags, dst_ds_flags;
1415     RECT src_rect, dst_rect;
1416     BOOL scale, convert;
1417
1418     static const DWORD simple_blit = WINEDDBLT_ASYNC
1419             | WINEDDBLT_COLORFILL
1420             | WINEDDBLT_WAIT
1421             | WINEDDBLT_DEPTHFILL
1422             | WINEDDBLT_DONOTWAIT;
1423
1424     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1425             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1426             flags, fx, debug_d3dtexturefiltertype(filter));
1427     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1428
1429     if (fx)
1430     {
1431         TRACE("dwSize %#x.\n", fx->dwSize);
1432         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1433         TRACE("dwROP %#x.\n", fx->dwROP);
1434         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1435         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1436         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1437         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1438         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1439         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1440         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1441         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1442         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1443         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1444         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1445         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1446         TRACE("dwReserved %#x.\n", fx->dwReserved);
1447         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1448         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1449         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1450         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1451         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1452         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1453                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1454                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1455         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1456                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1457                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1458     }
1459
1460     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1461     {
1462         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1463         return WINEDDERR_SURFACEBUSY;
1464     }
1465
1466     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1467
1468     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1469             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1470             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1471             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1472             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1473     {
1474         /* The destination rect can be out of bounds on the condition
1475          * that a clipper is set for the surface. */
1476         if (dst_surface->clipper)
1477             FIXME("Blit clipping not implemented.\n");
1478         else
1479             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1480         return WINEDDERR_INVALIDRECT;
1481     }
1482
1483     if (src_surface)
1484     {
1485         surface_get_rect(src_surface, src_rect_in, &src_rect);
1486
1487         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1488                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1489                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1490                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1491                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1492         {
1493             WARN("Application gave us bad source rectangle for Blt.\n");
1494             return WINEDDERR_INVALIDRECT;
1495         }
1496     }
1497     else
1498     {
1499         memset(&src_rect, 0, sizeof(src_rect));
1500     }
1501
1502     if (!fx || !(fx->dwDDFX))
1503         flags &= ~WINEDDBLT_DDFX;
1504
1505     if (flags & WINEDDBLT_WAIT)
1506         flags &= ~WINEDDBLT_WAIT;
1507
1508     if (flags & WINEDDBLT_ASYNC)
1509     {
1510         static unsigned int once;
1511
1512         if (!once++)
1513             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1514         flags &= ~WINEDDBLT_ASYNC;
1515     }
1516
1517     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1518     if (flags & WINEDDBLT_DONOTWAIT)
1519     {
1520         static unsigned int once;
1521
1522         if (!once++)
1523             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1524         flags &= ~WINEDDBLT_DONOTWAIT;
1525     }
1526
1527     if (!device->d3d_initialized)
1528     {
1529         WARN("D3D not initialized, using fallback.\n");
1530         goto cpu;
1531     }
1532
1533     /* We want to avoid invalidating the sysmem location for converted
1534      * surfaces, since otherwise we'd have to convert the data back when
1535      * locking them. */
1536     if (dst_surface->flags & SFLAG_CONVERTED)
1537     {
1538         WARN("Converted surface, using CPU blit.\n");
1539         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1540     }
1541
1542     if (flags & ~simple_blit)
1543     {
1544         WARN("Using fallback for complex blit (%#x).\n", flags);
1545         goto fallback;
1546     }
1547
1548     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1549         src_swapchain = src_surface->container.u.swapchain;
1550     else
1551         src_swapchain = NULL;
1552
1553     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1554         dst_swapchain = dst_surface->container.u.swapchain;
1555     else
1556         dst_swapchain = NULL;
1557
1558     /* This isn't strictly needed. FBO blits for example could deal with
1559      * cross-swapchain blits by first downloading the source to a texture
1560      * before switching to the destination context. We just have this here to
1561      * not have to deal with the issue, since cross-swapchain blits should be
1562      * rare. */
1563     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1564     {
1565         FIXME("Using fallback for cross-swapchain blit.\n");
1566         goto fallback;
1567     }
1568
1569     scale = src_surface
1570             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1571             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1572     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1573
1574     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1575     if (src_surface)
1576         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1577     else
1578         src_ds_flags = 0;
1579
1580     if (src_ds_flags || dst_ds_flags)
1581     {
1582         if (flags & WINEDDBLT_DEPTHFILL)
1583         {
1584             float depth;
1585
1586             TRACE("Depth fill.\n");
1587
1588             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1589                 return WINED3DERR_INVALIDCALL;
1590
1591             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1592                 return WINED3D_OK;
1593         }
1594         else
1595         {
1596             /* Accessing depth / stencil surfaces is supposed to fail while in
1597              * a scene, except for fills, which seem to work. */
1598             if (device->inScene)
1599             {
1600                 WARN("Rejecting depth / stencil access while in scene.\n");
1601                 return WINED3DERR_INVALIDCALL;
1602             }
1603
1604             if (src_ds_flags != dst_ds_flags)
1605             {
1606                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1607                 return WINED3DERR_INVALIDCALL;
1608             }
1609
1610             if (src_rect.top || src_rect.left
1611                     || src_rect.bottom != src_surface->resource.height
1612                     || src_rect.right != src_surface->resource.width)
1613             {
1614                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1615                         wine_dbgstr_rect(&src_rect));
1616                 return WINED3DERR_INVALIDCALL;
1617             }
1618
1619             if (dst_rect.top || dst_rect.left
1620                     || dst_rect.bottom != dst_surface->resource.height
1621                     || dst_rect.right != dst_surface->resource.width)
1622             {
1623                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1624                         wine_dbgstr_rect(&src_rect));
1625                 return WINED3DERR_INVALIDCALL;
1626             }
1627
1628             if (scale)
1629             {
1630                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1631                 return WINED3DERR_INVALIDCALL;
1632             }
1633
1634             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1635                 return WINED3D_OK;
1636         }
1637     }
1638     else
1639     {
1640         /* In principle this would apply to depth blits as well, but we don't
1641          * implement those in the CPU blitter at the moment. */
1642         if ((dst_surface->flags & SFLAG_INSYSMEM)
1643                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1644         {
1645             if (scale)
1646                 TRACE("Not doing sysmem blit because of scaling.\n");
1647             else if (convert)
1648                 TRACE("Not doing sysmem blit because of format conversion.\n");
1649             else
1650                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1651         }
1652
1653         if (flags & WINEDDBLT_COLORFILL)
1654         {
1655             WINED3DCOLORVALUE color;
1656
1657             TRACE("Color fill.\n");
1658
1659             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1660                 goto fallback;
1661
1662             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1663                 return WINED3D_OK;
1664         }
1665         else
1666         {
1667             TRACE("Color blit.\n");
1668
1669             /* Upload */
1670             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1671             {
1672                 if (scale)
1673                     TRACE("Not doing upload because of scaling.\n");
1674                 else if (convert)
1675                     TRACE("Not doing upload because of format conversion.\n");
1676                 else
1677                 {
1678                     POINT dst_point = {dst_rect.left, dst_rect.top};
1679
1680                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1681                     {
1682                         if (!surface_is_offscreen(dst_surface))
1683                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1684                         return WINED3D_OK;
1685                     }
1686                 }
1687             }
1688
1689             /* Use present for back -> front blits. The idea behind this is
1690              * that present is potentially faster than a blit, in particular
1691              * when FBO blits aren't available. Some ddraw applications like
1692              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1693              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1694              * applications can't blit directly to the frontbuffer. */
1695             if (dst_swapchain && dst_swapchain->back_buffers
1696                     && dst_surface == dst_swapchain->front_buffer
1697                     && src_surface == dst_swapchain->back_buffers[0])
1698             {
1699                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1700
1701                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1702
1703                 /* Set the swap effect to COPY, we don't want the backbuffer
1704                  * to become undefined. */
1705                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1706                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1707                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1708
1709                 return WINED3D_OK;
1710             }
1711
1712             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1713                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1714                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1715             {
1716                 TRACE("Using FBO blit.\n");
1717
1718                 surface_blt_fbo(device, filter,
1719                         src_surface, src_surface->draw_binding, &src_rect,
1720                         dst_surface, dst_surface->draw_binding, &dst_rect);
1721                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1722                 return WINED3D_OK;
1723             }
1724
1725             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1726                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1727                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1728             {
1729                 TRACE("Using arbfp blit.\n");
1730
1731                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1732                     return WINED3D_OK;
1733             }
1734         }
1735     }
1736
1737 fallback:
1738
1739     /* Special cases for render targets. */
1740     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1741             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1742     {
1743         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1744                 src_surface, &src_rect, flags, fx, filter)))
1745             return WINED3D_OK;
1746     }
1747
1748 cpu:
1749
1750     /* For the rest call the X11 surface implementation. For render targets
1751      * this should be implemented OpenGL accelerated in BltOverride, other
1752      * blits are rather rare. */
1753     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1754 }
1755
1756 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1757         struct wined3d_surface *render_target)
1758 {
1759     TRACE("surface %p, render_target %p.\n", surface, render_target);
1760
1761     /* TODO: Check surface sizes, pools, etc. */
1762
1763     if (render_target->resource.multisample_type)
1764         return WINED3DERR_INVALIDCALL;
1765
1766     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1767 }
1768
1769 /* Context activation is done by the caller. */
1770 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1771 {
1772     if (!surface->resource.heapMemory)
1773     {
1774         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1775         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1776                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1777     }
1778
1779     ENTER_GL();
1780     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1781     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1782     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1783             surface->resource.size, surface->resource.allocatedMemory));
1784     checkGLcall("glGetBufferSubDataARB");
1785     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1786     checkGLcall("glDeleteBuffersARB");
1787     LEAVE_GL();
1788
1789     surface->pbo = 0;
1790     surface->flags &= ~SFLAG_PBO;
1791 }
1792
1793 /* Do not call while under the GL lock. */
1794 static void surface_unload(struct wined3d_resource *resource)
1795 {
1796     struct wined3d_surface *surface = surface_from_resource(resource);
1797     struct wined3d_renderbuffer_entry *entry, *entry2;
1798     struct wined3d_device *device = resource->device;
1799     const struct wined3d_gl_info *gl_info;
1800     struct wined3d_context *context;
1801
1802     TRACE("surface %p.\n", surface);
1803
1804     if (resource->pool == WINED3DPOOL_DEFAULT)
1805     {
1806         /* Default pool resources are supposed to be destroyed before Reset is called.
1807          * Implicit resources stay however. So this means we have an implicit render target
1808          * or depth stencil. The content may be destroyed, but we still have to tear down
1809          * opengl resources, so we cannot leave early.
1810          *
1811          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1812          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1813          * or the depth stencil into an FBO the texture or render buffer will be removed
1814          * and all flags get lost
1815          */
1816         surface_init_sysmem(surface);
1817         /* We also get here when the ddraw swapchain is destroyed, for example
1818          * for a mode switch. In this case this surface won't necessarily be
1819          * an implicit surface. We have to mark it lost so that the
1820          * application can restore it after the mode switch. */
1821         surface->flags |= SFLAG_LOST;
1822     }
1823     else
1824     {
1825         /* Load the surface into system memory */
1826         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1827         surface_modify_location(surface, surface->draw_binding, FALSE);
1828     }
1829     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1830     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1831     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1832
1833     context = context_acquire(device, NULL);
1834     gl_info = context->gl_info;
1835
1836     /* Destroy PBOs, but load them into real sysmem before */
1837     if (surface->flags & SFLAG_PBO)
1838         surface_remove_pbo(surface, gl_info);
1839
1840     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1841      * all application-created targets the application has to release the surface
1842      * before calling _Reset
1843      */
1844     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1845     {
1846         ENTER_GL();
1847         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1848         LEAVE_GL();
1849         list_remove(&entry->entry);
1850         HeapFree(GetProcessHeap(), 0, entry);
1851     }
1852     list_init(&surface->renderbuffers);
1853     surface->current_renderbuffer = NULL;
1854
1855     ENTER_GL();
1856
1857     /* If we're in a texture, the texture name belongs to the texture.
1858      * Otherwise, destroy it. */
1859     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1860     {
1861         glDeleteTextures(1, &surface->texture_name);
1862         surface->texture_name = 0;
1863         glDeleteTextures(1, &surface->texture_name_srgb);
1864         surface->texture_name_srgb = 0;
1865     }
1866     if (surface->rb_multisample)
1867     {
1868         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1869         surface->rb_multisample = 0;
1870     }
1871     if (surface->rb_resolved)
1872     {
1873         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1874         surface->rb_resolved = 0;
1875     }
1876
1877     LEAVE_GL();
1878
1879     context_release(context);
1880
1881     resource_unload(resource);
1882 }
1883
1884 static const struct wined3d_resource_ops surface_resource_ops =
1885 {
1886     surface_unload,
1887 };
1888
1889 static const struct wined3d_surface_ops surface_ops =
1890 {
1891     surface_private_setup,
1892     surface_realize_palette,
1893     surface_map,
1894     surface_unmap,
1895 };
1896
1897 /*****************************************************************************
1898  * Initializes the GDI surface, aka creates the DIB section we render to
1899  * The DIB section creation is done by calling GetDC, which will create the
1900  * section and releasing the dc to allow the app to use it. The dib section
1901  * will stay until the surface is released
1902  *
1903  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1904  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1905  * avoid confusion in the shared surface code.
1906  *
1907  * Returns:
1908  *  WINED3D_OK on success
1909  *  The return values of called methods on failure
1910  *
1911  *****************************************************************************/
1912 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1913 {
1914     HRESULT hr;
1915
1916     TRACE("surface %p.\n", surface);
1917
1918     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1919     {
1920         ERR("Overlays not yet supported by GDI surfaces.\n");
1921         return WINED3DERR_INVALIDCALL;
1922     }
1923
1924     /* Sysmem textures have memory already allocated - release it,
1925      * this avoids an unnecessary memcpy. */
1926     hr = surface_create_dib_section(surface);
1927     if (SUCCEEDED(hr))
1928     {
1929         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1930         surface->resource.heapMemory = NULL;
1931         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1932     }
1933
1934     /* We don't mind the nonpow2 stuff in GDI. */
1935     surface->pow2Width = surface->resource.width;
1936     surface->pow2Height = surface->resource.height;
1937
1938     return WINED3D_OK;
1939 }
1940
1941 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1942 {
1943     struct wined3d_palette *palette = surface->palette;
1944
1945     TRACE("surface %p.\n", surface);
1946
1947     if (!palette) return;
1948
1949     if (surface->flags & SFLAG_DIBSECTION)
1950     {
1951         RGBQUAD col[256];
1952         unsigned int i;
1953
1954         TRACE("Updating the DC's palette.\n");
1955
1956         for (i = 0; i < 256; ++i)
1957         {
1958             col[i].rgbRed = palette->palents[i].peRed;
1959             col[i].rgbGreen = palette->palents[i].peGreen;
1960             col[i].rgbBlue = palette->palents[i].peBlue;
1961             col[i].rgbReserved = 0;
1962         }
1963         SetDIBColorTable(surface->hDC, 0, 256, col);
1964     }
1965
1966     /* Update the image because of the palette change. Some games like e.g.
1967      * Red Alert call SetEntries a lot to implement fading. */
1968     /* Tell the swapchain to update the screen. */
1969     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1970     {
1971         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1972         if (surface == swapchain->front_buffer)
1973         {
1974             x11_copy_to_screen(swapchain, NULL);
1975         }
1976     }
1977 }
1978
1979 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1980 {
1981     TRACE("surface %p, rect %s, flags %#x.\n",
1982             surface, wine_dbgstr_rect(rect), flags);
1983
1984     if (!surface->resource.allocatedMemory)
1985     {
1986         /* This happens on gdi surfaces if the application set a user pointer
1987          * and resets it. Recreate the DIB section. */
1988         surface_create_dib_section(surface);
1989         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1990     }
1991 }
1992
1993 static void gdi_surface_unmap(struct wined3d_surface *surface)
1994 {
1995     TRACE("surface %p.\n", surface);
1996
1997     /* Tell the swapchain to update the screen. */
1998     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1999     {
2000         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2001         if (surface == swapchain->front_buffer)
2002         {
2003             x11_copy_to_screen(swapchain, &surface->lockedRect);
2004         }
2005     }
2006
2007     memset(&surface->lockedRect, 0, sizeof(RECT));
2008 }
2009
2010 static const struct wined3d_surface_ops gdi_surface_ops =
2011 {
2012     gdi_surface_private_setup,
2013     gdi_surface_realize_palette,
2014     gdi_surface_map,
2015     gdi_surface_unmap,
2016 };
2017
2018 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2019 {
2020     GLuint *name;
2021     DWORD flag;
2022
2023     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2024
2025     if(srgb)
2026     {
2027         name = &surface->texture_name_srgb;
2028         flag = SFLAG_INSRGBTEX;
2029     }
2030     else
2031     {
2032         name = &surface->texture_name;
2033         flag = SFLAG_INTEXTURE;
2034     }
2035
2036     if (!*name && new_name)
2037     {
2038         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2039          * surface has no texture name yet. See if we can get rid of this. */
2040         if (surface->flags & flag)
2041             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2042         surface_modify_location(surface, flag, FALSE);
2043     }
2044
2045     *name = new_name;
2046     surface_force_reload(surface);
2047 }
2048
2049 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2050 {
2051     TRACE("surface %p, target %#x.\n", surface, target);
2052
2053     if (surface->texture_target != target)
2054     {
2055         if (target == GL_TEXTURE_RECTANGLE_ARB)
2056         {
2057             surface->flags &= ~SFLAG_NORMCOORD;
2058         }
2059         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2060         {
2061             surface->flags |= SFLAG_NORMCOORD;
2062         }
2063     }
2064     surface->texture_target = target;
2065     surface_force_reload(surface);
2066 }
2067
2068 /* Context activation is done by the caller. */
2069 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2070 {
2071     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2072
2073     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2074     {
2075         struct wined3d_texture *texture = surface->container.u.texture;
2076
2077         TRACE("Passing to container (%p).\n", texture);
2078         texture->texture_ops->texture_bind(texture, context, srgb);
2079     }
2080     else
2081     {
2082         if (surface->texture_level)
2083         {
2084             ERR("Standalone surface %p is non-zero texture level %u.\n",
2085                     surface, surface->texture_level);
2086         }
2087
2088         if (srgb)
2089             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2090
2091         ENTER_GL();
2092
2093         if (!surface->texture_name)
2094         {
2095             glGenTextures(1, &surface->texture_name);
2096             checkGLcall("glGenTextures");
2097
2098             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2099
2100             context_bind_texture(context, surface->texture_target, surface->texture_name);
2101             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2102             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2103             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2104             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2105             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2106             checkGLcall("glTexParameteri");
2107         }
2108         else
2109         {
2110             context_bind_texture(context, surface->texture_target, surface->texture_name);
2111         }
2112
2113         LEAVE_GL();
2114     }
2115 }
2116
2117 /* This call just downloads data, the caller is responsible for binding the
2118  * correct texture. */
2119 /* Context activation is done by the caller. */
2120 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2121 {
2122     const struct wined3d_format *format = surface->resource.format;
2123
2124     /* Only support read back of converted P8 surfaces. */
2125     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2126     {
2127         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2128         return;
2129     }
2130
2131     ENTER_GL();
2132
2133     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2134     {
2135         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2136                 surface, surface->texture_level, format->glFormat, format->glType,
2137                 surface->resource.allocatedMemory);
2138
2139         if (surface->flags & SFLAG_PBO)
2140         {
2141             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2142             checkGLcall("glBindBufferARB");
2143             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2144             checkGLcall("glGetCompressedTexImageARB");
2145             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2146             checkGLcall("glBindBufferARB");
2147         }
2148         else
2149         {
2150             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2151                     surface->texture_level, surface->resource.allocatedMemory));
2152             checkGLcall("glGetCompressedTexImageARB");
2153         }
2154
2155         LEAVE_GL();
2156     }
2157     else
2158     {
2159         void *mem;
2160         GLenum gl_format = format->glFormat;
2161         GLenum gl_type = format->glType;
2162         int src_pitch = 0;
2163         int dst_pitch = 0;
2164
2165         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2166         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2167         {
2168             gl_format = GL_ALPHA;
2169             gl_type = GL_UNSIGNED_BYTE;
2170         }
2171
2172         if (surface->flags & SFLAG_NONPOW2)
2173         {
2174             unsigned char alignment = surface->resource.device->surface_alignment;
2175             src_pitch = format->byte_count * surface->pow2Width;
2176             dst_pitch = wined3d_surface_get_pitch(surface);
2177             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2178             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2179         }
2180         else
2181         {
2182             mem = surface->resource.allocatedMemory;
2183         }
2184
2185         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2186                 surface, surface->texture_level, gl_format, gl_type, mem);
2187
2188         if (surface->flags & SFLAG_PBO)
2189         {
2190             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2191             checkGLcall("glBindBufferARB");
2192
2193             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2194             checkGLcall("glGetTexImage");
2195
2196             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2197             checkGLcall("glBindBufferARB");
2198         }
2199         else
2200         {
2201             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2202             checkGLcall("glGetTexImage");
2203         }
2204         LEAVE_GL();
2205
2206         if (surface->flags & SFLAG_NONPOW2)
2207         {
2208             const BYTE *src_data;
2209             BYTE *dst_data;
2210             UINT y;
2211             /*
2212              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2213              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2214              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2215              *
2216              * We're doing this...
2217              *
2218              * instead of boxing the texture :
2219              * |<-texture width ->|  -->pow2width|   /\
2220              * |111111111111111111|              |   |
2221              * |222 Texture 222222| boxed empty  | texture height
2222              * |3333 Data 33333333|              |   |
2223              * |444444444444444444|              |   \/
2224              * -----------------------------------   |
2225              * |     boxed  empty | boxed empty  | pow2height
2226              * |                  |              |   \/
2227              * -----------------------------------
2228              *
2229              *
2230              * we're repacking the data to the expected texture width
2231              *
2232              * |<-texture width ->|  -->pow2width|   /\
2233              * |111111111111111111222222222222222|   |
2234              * |222333333333333333333444444444444| texture height
2235              * |444444                           |   |
2236              * |                                 |   \/
2237              * |                                 |   |
2238              * |            empty                | pow2height
2239              * |                                 |   \/
2240              * -----------------------------------
2241              *
2242              * == is the same as
2243              *
2244              * |<-texture width ->|    /\
2245              * |111111111111111111|
2246              * |222222222222222222|texture height
2247              * |333333333333333333|
2248              * |444444444444444444|    \/
2249              * --------------------
2250              *
2251              * this also means that any references to allocatedMemory should work with the data as if were a
2252              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2253              *
2254              * internally the texture is still stored in a boxed format so any references to textureName will
2255              * get a boxed texture with width pow2width and not a texture of width resource.width.
2256              *
2257              * Performance should not be an issue, because applications normally do not lock the surfaces when
2258              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2259              * and doesn't have to be re-read. */
2260             src_data = mem;
2261             dst_data = surface->resource.allocatedMemory;
2262             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2263             for (y = 1; y < surface->resource.height; ++y)
2264             {
2265                 /* skip the first row */
2266                 src_data += src_pitch;
2267                 dst_data += dst_pitch;
2268                 memcpy(dst_data, src_data, dst_pitch);
2269             }
2270
2271             HeapFree(GetProcessHeap(), 0, mem);
2272         }
2273     }
2274
2275     /* Surface has now been downloaded */
2276     surface->flags |= SFLAG_INSYSMEM;
2277 }
2278
2279 /* This call just uploads data, the caller is responsible for binding the
2280  * correct texture. */
2281 /* Context activation is done by the caller. */
2282 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2283         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2284         BOOL srgb, const struct wined3d_bo_address *data)
2285 {
2286     UINT update_w = src_rect->right - src_rect->left;
2287     UINT update_h = src_rect->bottom - src_rect->top;
2288
2289     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2290             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2291             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2292
2293     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2294         update_h *= format->heightscale;
2295
2296     ENTER_GL();
2297
2298     if (data->buffer_object)
2299     {
2300         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2301         checkGLcall("glBindBufferARB");
2302     }
2303
2304     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2305     {
2306         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2307         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2308         const BYTE *addr = data->addr;
2309         GLenum internal;
2310
2311         addr += (src_rect->top / format->block_height) * src_pitch;
2312         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2313
2314         if (srgb)
2315             internal = format->glGammaInternal;
2316         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2317             internal = format->rtInternal;
2318         else
2319             internal = format->glInternal;
2320
2321         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2322                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2323                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2324
2325         if (row_length == src_pitch)
2326         {
2327             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2328                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2329         }
2330         else
2331         {
2332             UINT row, y;
2333
2334             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2335              * can't use the unpack row length like below. */
2336             for (row = 0, y = dst_point->y; row < row_count; ++row)
2337             {
2338                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2339                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2340                 y += format->block_height;
2341                 addr += src_pitch;
2342             }
2343         }
2344         checkGLcall("glCompressedTexSubImage2DARB");
2345     }
2346     else
2347     {
2348         const BYTE *addr = data->addr;
2349
2350         addr += src_rect->top * src_pitch;
2351         addr += src_rect->left * format->byte_count;
2352
2353         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2354                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2355                 update_w, update_h, format->glFormat, format->glType, addr);
2356
2357         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2358         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2359                 update_w, update_h, format->glFormat, format->glType, addr);
2360         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2361         checkGLcall("glTexSubImage2D");
2362     }
2363
2364     if (data->buffer_object)
2365     {
2366         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2367         checkGLcall("glBindBufferARB");
2368     }
2369
2370     LEAVE_GL();
2371
2372     if (wined3d_settings.strict_draw_ordering)
2373         wglFlush();
2374
2375     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2376     {
2377         struct wined3d_device *device = surface->resource.device;
2378         unsigned int i;
2379
2380         for (i = 0; i < device->context_count; ++i)
2381         {
2382             context_surface_update(device->contexts[i], surface);
2383         }
2384     }
2385 }
2386
2387 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2388         struct wined3d_surface *src_surface, const RECT *src_rect)
2389 {
2390     const struct wined3d_format *src_format;
2391     const struct wined3d_format *dst_format;
2392     const struct wined3d_gl_info *gl_info;
2393     struct wined3d_context *context;
2394     struct wined3d_bo_address data;
2395     struct wined3d_format format;
2396     UINT update_w, update_h;
2397     CONVERT_TYPES convert;
2398     UINT dst_w, dst_h;
2399     UINT src_w, src_h;
2400     UINT src_pitch;
2401     POINT p;
2402     RECT r;
2403
2404     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2405             dst_surface, wine_dbgstr_point(dst_point),
2406             src_surface, wine_dbgstr_rect(src_rect));
2407
2408     src_format = src_surface->resource.format;
2409     dst_format = dst_surface->resource.format;
2410
2411     if (src_format->id != dst_format->id)
2412     {
2413         WARN("Source and destination surfaces should have the same format.\n");
2414         return WINED3DERR_INVALIDCALL;
2415     }
2416
2417     if (!dst_point)
2418     {
2419         p.x = 0;
2420         p.y = 0;
2421         dst_point = &p;
2422     }
2423     else if (dst_point->x < 0 || dst_point->y < 0)
2424     {
2425         WARN("Invalid destination point.\n");
2426         return WINED3DERR_INVALIDCALL;
2427     }
2428
2429     if (!src_rect)
2430     {
2431         r.left = 0;
2432         r.top = 0;
2433         r.right = src_surface->resource.width;
2434         r.bottom = src_surface->resource.height;
2435         src_rect = &r;
2436     }
2437     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2438             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2439     {
2440         WARN("Invalid source rectangle.\n");
2441         return WINED3DERR_INVALIDCALL;
2442     }
2443
2444     src_w = src_surface->resource.width;
2445     src_h = src_surface->resource.height;
2446
2447     dst_w = dst_surface->resource.width;
2448     dst_h = dst_surface->resource.height;
2449
2450     update_w = src_rect->right - src_rect->left;
2451     update_h = src_rect->bottom - src_rect->top;
2452
2453     if (update_w > dst_w || dst_point->x > dst_w - update_w
2454             || update_h > dst_h || dst_point->y > dst_h - update_h)
2455     {
2456         WARN("Destination out of bounds.\n");
2457         return WINED3DERR_INVALIDCALL;
2458     }
2459
2460     /* NPOT block sizes would be silly. */
2461     if ((src_format->flags & WINED3DFMT_FLAG_BLOCKS)
2462             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2463             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2464     {
2465         WARN("Update rect not block-aligned.\n");
2466         return WINED3DERR_INVALIDCALL;
2467     }
2468
2469     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2470     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2471     if (convert != NO_CONVERSION || format.convert)
2472     {
2473         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2474         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2475     }
2476
2477     context = context_acquire(dst_surface->resource.device, NULL);
2478     gl_info = context->gl_info;
2479
2480     /* Only load the surface for partial updates. For newly allocated texture
2481      * the texture wouldn't be the current location, and we'd upload zeroes
2482      * just to overwrite them again. */
2483     if (update_w == dst_w && update_h == dst_h)
2484         surface_prepare_texture(dst_surface, context, FALSE);
2485     else
2486         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2487     surface_bind(dst_surface, context, FALSE);
2488
2489     data.buffer_object = src_surface->pbo;
2490     data.addr = src_surface->resource.allocatedMemory;
2491     src_pitch = wined3d_surface_get_pitch(src_surface);
2492
2493     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2494
2495     invalidate_active_texture(dst_surface->resource.device, context);
2496
2497     context_release(context);
2498
2499     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2500     return WINED3D_OK;
2501 }
2502
2503 /* This call just allocates the texture, the caller is responsible for binding
2504  * the correct texture. */
2505 /* Context activation is done by the caller. */
2506 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2507         const struct wined3d_format *format, BOOL srgb)
2508 {
2509     BOOL enable_client_storage = FALSE;
2510     GLsizei width = surface->pow2Width;
2511     GLsizei height = surface->pow2Height;
2512     const BYTE *mem = NULL;
2513     GLenum internal;
2514
2515     if (srgb)
2516     {
2517         internal = format->glGammaInternal;
2518     }
2519     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2520     {
2521         internal = format->rtInternal;
2522     }
2523     else
2524     {
2525         internal = format->glInternal;
2526     }
2527
2528     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2529
2530     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2531             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2532             internal, width, height, format->glFormat, format->glType);
2533
2534     ENTER_GL();
2535
2536     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2537     {
2538         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2539                 || !surface->resource.allocatedMemory)
2540         {
2541             /* In some cases we want to disable client storage.
2542              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2543              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2544              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2545              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2546              */
2547             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2548             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2549             surface->flags &= ~SFLAG_CLIENT;
2550             enable_client_storage = TRUE;
2551         }
2552         else
2553         {
2554             surface->flags |= SFLAG_CLIENT;
2555
2556             /* Point OpenGL to our allocated texture memory. Do not use
2557              * resource.allocatedMemory here because it might point into a
2558              * PBO. Instead use heapMemory, but get the alignment right. */
2559             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2560                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2561         }
2562     }
2563
2564     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2565     {
2566         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2567                 internal, width, height, 0, surface->resource.size, mem));
2568         checkGLcall("glCompressedTexImage2DARB");
2569     }
2570     else
2571     {
2572         glTexImage2D(surface->texture_target, surface->texture_level,
2573                 internal, width, height, 0, format->glFormat, format->glType, mem);
2574         checkGLcall("glTexImage2D");
2575     }
2576
2577     if(enable_client_storage) {
2578         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2579         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2580     }
2581     LEAVE_GL();
2582 }
2583
2584 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2585  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2586 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2587 /* GL locking is done by the caller */
2588 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2589 {
2590     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2591     struct wined3d_renderbuffer_entry *entry;
2592     GLuint renderbuffer = 0;
2593     unsigned int src_width, src_height;
2594     unsigned int width, height;
2595
2596     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2597     {
2598         width = rt->pow2Width;
2599         height = rt->pow2Height;
2600     }
2601     else
2602     {
2603         width = surface->pow2Width;
2604         height = surface->pow2Height;
2605     }
2606
2607     src_width = surface->pow2Width;
2608     src_height = surface->pow2Height;
2609
2610     /* A depth stencil smaller than the render target is not valid */
2611     if (width > src_width || height > src_height) return;
2612
2613     /* Remove any renderbuffer set if the sizes match */
2614     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2615             || (width == src_width && height == src_height))
2616     {
2617         surface->current_renderbuffer = NULL;
2618         return;
2619     }
2620
2621     /* Look if we've already got a renderbuffer of the correct dimensions */
2622     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2623     {
2624         if (entry->width == width && entry->height == height)
2625         {
2626             renderbuffer = entry->id;
2627             surface->current_renderbuffer = entry;
2628             break;
2629         }
2630     }
2631
2632     if (!renderbuffer)
2633     {
2634         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2635         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2636         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2637                 surface->resource.format->glInternal, width, height);
2638
2639         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2640         entry->width = width;
2641         entry->height = height;
2642         entry->id = renderbuffer;
2643         list_add_head(&surface->renderbuffers, &entry->entry);
2644
2645         surface->current_renderbuffer = entry;
2646     }
2647
2648     checkGLcall("set_compatible_renderbuffer");
2649 }
2650
2651 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2652 {
2653     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2654
2655     TRACE("surface %p.\n", surface);
2656
2657     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2658     {
2659         ERR("Surface %p is not on a swapchain.\n", surface);
2660         return GL_NONE;
2661     }
2662
2663     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2664     {
2665         if (swapchain->render_to_fbo)
2666         {
2667             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2668             return GL_COLOR_ATTACHMENT0;
2669         }
2670         TRACE("Returning GL_BACK\n");
2671         return GL_BACK;
2672     }
2673     else if (surface == swapchain->front_buffer)
2674     {
2675         TRACE("Returning GL_FRONT\n");
2676         return GL_FRONT;
2677     }
2678
2679     FIXME("Higher back buffer, returning GL_BACK\n");
2680     return GL_BACK;
2681 }
2682
2683 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2684 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2685 {
2686     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2687
2688     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2689         /* No partial locking for textures yet. */
2690         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2691
2692     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2693     if (dirty_rect)
2694     {
2695         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2696         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2697         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2698         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2699     }
2700     else
2701     {
2702         surface->dirtyRect.left = 0;
2703         surface->dirtyRect.top = 0;
2704         surface->dirtyRect.right = surface->resource.width;
2705         surface->dirtyRect.bottom = surface->resource.height;
2706     }
2707
2708     /* if the container is a texture then mark it dirty. */
2709     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2710     {
2711         TRACE("Passing to container.\n");
2712         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2713     }
2714 }
2715
2716 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2717 {
2718     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2719     BOOL ck_changed;
2720
2721     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2722
2723     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2724     {
2725         ERR("Not supported on scratch surfaces.\n");
2726         return WINED3DERR_INVALIDCALL;
2727     }
2728
2729     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2730
2731     /* Reload if either the texture and sysmem have different ideas about the
2732      * color key, or the actual key values changed. */
2733     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2734             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2735             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2736     {
2737         TRACE("Reloading because of color keying\n");
2738         /* To perform the color key conversion we need a sysmem copy of
2739          * the surface. Make sure we have it. */
2740
2741         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2742         /* Make sure the texture is reloaded because of the color key change,
2743          * this kills performance though :( */
2744         /* TODO: This is not necessarily needed with hw palettized texture support. */
2745         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2746         /* Switching color keying on / off may change the internal format. */
2747         if (ck_changed)
2748             surface_force_reload(surface);
2749     }
2750     else if (!(surface->flags & flag))
2751     {
2752         TRACE("Reloading because surface is dirty.\n");
2753     }
2754     else
2755     {
2756         TRACE("surface is already in texture\n");
2757         return WINED3D_OK;
2758     }
2759
2760     /* No partial locking for textures yet. */
2761     surface_load_location(surface, flag, NULL);
2762     surface_evict_sysmem(surface);
2763
2764     return WINED3D_OK;
2765 }
2766
2767 /* See also float_16_to_32() in wined3d_private.h */
2768 static inline unsigned short float_32_to_16(const float *in)
2769 {
2770     int exp = 0;
2771     float tmp = fabsf(*in);
2772     unsigned int mantissa;
2773     unsigned short ret;
2774
2775     /* Deal with special numbers */
2776     if (*in == 0.0f)
2777         return 0x0000;
2778     if (isnan(*in))
2779         return 0x7c01;
2780     if (isinf(*in))
2781         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2782
2783     if (tmp < powf(2, 10))
2784     {
2785         do
2786         {
2787             tmp = tmp * 2.0f;
2788             exp--;
2789         } while (tmp < powf(2, 10));
2790     }
2791     else if (tmp >= powf(2, 11))
2792     {
2793         do
2794         {
2795             tmp /= 2.0f;
2796             exp++;
2797         } while (tmp >= powf(2, 11));
2798     }
2799
2800     mantissa = (unsigned int)tmp;
2801     if (tmp - mantissa >= 0.5f)
2802         ++mantissa; /* Round to nearest, away from zero. */
2803
2804     exp += 10;  /* Normalize the mantissa. */
2805     exp += 15;  /* Exponent is encoded with excess 15. */
2806
2807     if (exp > 30) /* too big */
2808     {
2809         ret = 0x7c00; /* INF */
2810     }
2811     else if (exp <= 0)
2812     {
2813         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2814         while (exp <= 0)
2815         {
2816             mantissa = mantissa >> 1;
2817             ++exp;
2818         }
2819         ret = mantissa & 0x3ff;
2820     }
2821     else
2822     {
2823         ret = (exp << 10) | (mantissa & 0x3ff);
2824     }
2825
2826     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2827     return ret;
2828 }
2829
2830 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2831 {
2832     ULONG refcount;
2833
2834     TRACE("Surface %p, container %p of type %#x.\n",
2835             surface, surface->container.u.base, surface->container.type);
2836
2837     switch (surface->container.type)
2838     {
2839         case WINED3D_CONTAINER_TEXTURE:
2840             return wined3d_texture_incref(surface->container.u.texture);
2841
2842         case WINED3D_CONTAINER_SWAPCHAIN:
2843             return wined3d_swapchain_incref(surface->container.u.swapchain);
2844
2845         default:
2846             ERR("Unhandled container type %#x.\n", surface->container.type);
2847         case WINED3D_CONTAINER_NONE:
2848             break;
2849     }
2850
2851     refcount = InterlockedIncrement(&surface->resource.ref);
2852     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2853
2854     return refcount;
2855 }
2856
2857 /* Do not call while under the GL lock. */
2858 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2859 {
2860     ULONG refcount;
2861
2862     TRACE("Surface %p, container %p of type %#x.\n",
2863             surface, surface->container.u.base, surface->container.type);
2864
2865     switch (surface->container.type)
2866     {
2867         case WINED3D_CONTAINER_TEXTURE:
2868             return wined3d_texture_decref(surface->container.u.texture);
2869
2870         case WINED3D_CONTAINER_SWAPCHAIN:
2871             return wined3d_swapchain_decref(surface->container.u.swapchain);
2872
2873         default:
2874             ERR("Unhandled container type %#x.\n", surface->container.type);
2875         case WINED3D_CONTAINER_NONE:
2876             break;
2877     }
2878
2879     refcount = InterlockedDecrement(&surface->resource.ref);
2880     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2881
2882     if (!refcount)
2883     {
2884         surface_cleanup(surface);
2885         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2886
2887         TRACE("Destroyed surface %p.\n", surface);
2888         HeapFree(GetProcessHeap(), 0, surface);
2889     }
2890
2891     return refcount;
2892 }
2893
2894 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2895 {
2896     return resource_set_priority(&surface->resource, priority);
2897 }
2898
2899 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2900 {
2901     return resource_get_priority(&surface->resource);
2902 }
2903
2904 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2905 {
2906     TRACE("surface %p.\n", surface);
2907
2908     if (!surface->resource.device->d3d_initialized)
2909     {
2910         ERR("D3D not initialized.\n");
2911         return;
2912     }
2913
2914     surface_internal_preload(surface, SRGB_ANY);
2915 }
2916
2917 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2918 {
2919     TRACE("surface %p.\n", surface);
2920
2921     return surface->resource.parent;
2922 }
2923
2924 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2925 {
2926     TRACE("surface %p.\n", surface);
2927
2928     return &surface->resource;
2929 }
2930
2931 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2932 {
2933     TRACE("surface %p, flags %#x.\n", surface, flags);
2934
2935     switch (flags)
2936     {
2937         case WINEDDGBS_CANBLT:
2938         case WINEDDGBS_ISBLTDONE:
2939             return WINED3D_OK;
2940
2941         default:
2942             return WINED3DERR_INVALIDCALL;
2943     }
2944 }
2945
2946 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2947 {
2948     TRACE("surface %p, flags %#x.\n", surface, flags);
2949
2950     /* XXX: DDERR_INVALIDSURFACETYPE */
2951
2952     switch (flags)
2953     {
2954         case WINEDDGFS_CANFLIP:
2955         case WINEDDGFS_ISFLIPDONE:
2956             return WINED3D_OK;
2957
2958         default:
2959             return WINED3DERR_INVALIDCALL;
2960     }
2961 }
2962
2963 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2964 {
2965     TRACE("surface %p.\n", surface);
2966
2967     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2968     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2969 }
2970
2971 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2972 {
2973     TRACE("surface %p.\n", surface);
2974
2975     surface->flags &= ~SFLAG_LOST;
2976     return WINED3D_OK;
2977 }
2978
2979 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2980 {
2981     TRACE("surface %p, palette %p.\n", surface, palette);
2982
2983     if (surface->palette == palette)
2984     {
2985         TRACE("Nop palette change.\n");
2986         return WINED3D_OK;
2987     }
2988
2989     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
2990         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
2991
2992     surface->palette = palette;
2993
2994     if (palette)
2995     {
2996         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2997             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
2998
2999         surface->surface_ops->surface_realize_palette(surface);
3000     }
3001
3002     return WINED3D_OK;
3003 }
3004
3005 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3006         DWORD flags, const WINEDDCOLORKEY *color_key)
3007 {
3008     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3009
3010     if (flags & WINEDDCKEY_COLORSPACE)
3011     {
3012         FIXME(" colorkey value not supported (%08x) !\n", flags);
3013         return WINED3DERR_INVALIDCALL;
3014     }
3015
3016     /* Dirtify the surface, but only if a key was changed. */
3017     if (color_key)
3018     {
3019         switch (flags & ~WINEDDCKEY_COLORSPACE)
3020         {
3021             case WINEDDCKEY_DESTBLT:
3022                 surface->DestBltCKey = *color_key;
3023                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3024                 break;
3025
3026             case WINEDDCKEY_DESTOVERLAY:
3027                 surface->DestOverlayCKey = *color_key;
3028                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3029                 break;
3030
3031             case WINEDDCKEY_SRCOVERLAY:
3032                 surface->SrcOverlayCKey = *color_key;
3033                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3034                 break;
3035
3036             case WINEDDCKEY_SRCBLT:
3037                 surface->SrcBltCKey = *color_key;
3038                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3039                 break;
3040         }
3041     }
3042     else
3043     {
3044         switch (flags & ~WINEDDCKEY_COLORSPACE)
3045         {
3046             case WINEDDCKEY_DESTBLT:
3047                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3048                 break;
3049
3050             case WINEDDCKEY_DESTOVERLAY:
3051                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3052                 break;
3053
3054             case WINEDDCKEY_SRCOVERLAY:
3055                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3056                 break;
3057
3058             case WINEDDCKEY_SRCBLT:
3059                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3060                 break;
3061         }
3062     }
3063
3064     return WINED3D_OK;
3065 }
3066
3067 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3068 {
3069     TRACE("surface %p.\n", surface);
3070
3071     return surface->palette;
3072 }
3073
3074 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3075 {
3076     const struct wined3d_format *format = surface->resource.format;
3077     DWORD pitch;
3078
3079     TRACE("surface %p.\n", surface);
3080
3081     if (format->flags & WINED3DFMT_FLAG_BLOCKS)
3082     {
3083         /* Since compressed formats are block based, pitch means the amount of
3084          * bytes to the next row of block rather than the next row of pixels. */
3085         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3086         pitch = row_block_count * format->block_byte_count;
3087     }
3088     else
3089     {
3090         unsigned char alignment = surface->resource.device->surface_alignment;
3091         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3092         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3093     }
3094
3095     TRACE("Returning %u.\n", pitch);
3096
3097     return pitch;
3098 }
3099
3100 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3101 {
3102     TRACE("surface %p, mem %p.\n", surface, mem);
3103
3104     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3105     {
3106         WARN("Surface is locked or the DC is in use.\n");
3107         return WINED3DERR_INVALIDCALL;
3108     }
3109
3110     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3111     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3112     {
3113         ERR("Not supported on render targets.\n");
3114         return WINED3DERR_INVALIDCALL;
3115     }
3116
3117     if (mem && mem != surface->resource.allocatedMemory)
3118     {
3119         void *release = NULL;
3120
3121         /* Do I have to copy the old surface content? */
3122         if (surface->flags & SFLAG_DIBSECTION)
3123         {
3124             DeleteDC(surface->hDC);
3125             DeleteObject(surface->dib.DIBsection);
3126             surface->dib.bitmap_data = NULL;
3127             surface->resource.allocatedMemory = NULL;
3128             surface->hDC = NULL;
3129             surface->flags &= ~SFLAG_DIBSECTION;
3130         }
3131         else if (!(surface->flags & SFLAG_USERPTR))
3132         {
3133             release = surface->resource.heapMemory;
3134             surface->resource.heapMemory = NULL;
3135         }
3136         surface->resource.allocatedMemory = mem;
3137         surface->flags |= SFLAG_USERPTR;
3138
3139         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3140         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3141
3142         /* For client textures OpenGL has to be notified. */
3143         if (surface->flags & SFLAG_CLIENT)
3144             surface_release_client_storage(surface);
3145
3146         /* Now free the old memory if any. */
3147         HeapFree(GetProcessHeap(), 0, release);
3148     }
3149     else if (surface->flags & SFLAG_USERPTR)
3150     {
3151         /* HeapMemory should be NULL already. */
3152         if (surface->resource.heapMemory)
3153             ERR("User pointer surface has heap memory allocated.\n");
3154
3155         if (!mem)
3156         {
3157             surface->resource.allocatedMemory = NULL;
3158             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3159
3160             if (surface->flags & SFLAG_CLIENT)
3161                 surface_release_client_storage(surface);
3162
3163             surface_prepare_system_memory(surface);
3164         }
3165
3166         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3167     }
3168
3169     return WINED3D_OK;
3170 }
3171
3172 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3173 {
3174     LONG w, h;
3175
3176     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3177
3178     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3179     {
3180         WARN("Not an overlay surface.\n");
3181         return WINEDDERR_NOTAOVERLAYSURFACE;
3182     }
3183
3184     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3185     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3186     surface->overlay_destrect.left = x;
3187     surface->overlay_destrect.top = y;
3188     surface->overlay_destrect.right = x + w;
3189     surface->overlay_destrect.bottom = y + h;
3190
3191     surface_draw_overlay(surface);
3192
3193     return WINED3D_OK;
3194 }
3195
3196 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3197 {
3198     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3199
3200     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3201     {
3202         TRACE("Not an overlay surface.\n");
3203         return WINEDDERR_NOTAOVERLAYSURFACE;
3204     }
3205
3206     if (!surface->overlay_dest)
3207     {
3208         TRACE("Overlay not visible.\n");
3209         *x = 0;
3210         *y = 0;
3211         return WINEDDERR_OVERLAYNOTVISIBLE;
3212     }
3213
3214     *x = surface->overlay_destrect.left;
3215     *y = surface->overlay_destrect.top;
3216
3217     TRACE("Returning position %d, %d.\n", *x, *y);
3218
3219     return WINED3D_OK;
3220 }
3221
3222 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3223         DWORD flags, struct wined3d_surface *ref)
3224 {
3225     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3226
3227     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3228     {
3229         TRACE("Not an overlay surface.\n");
3230         return WINEDDERR_NOTAOVERLAYSURFACE;
3231     }
3232
3233     return WINED3D_OK;
3234 }
3235
3236 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3237         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3238 {
3239     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3240             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3241
3242     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3243     {
3244         WARN("Not an overlay surface.\n");
3245         return WINEDDERR_NOTAOVERLAYSURFACE;
3246     }
3247     else if (!dst_surface)
3248     {
3249         WARN("Dest surface is NULL.\n");
3250         return WINED3DERR_INVALIDCALL;
3251     }
3252
3253     if (src_rect)
3254     {
3255         surface->overlay_srcrect = *src_rect;
3256     }
3257     else
3258     {
3259         surface->overlay_srcrect.left = 0;
3260         surface->overlay_srcrect.top = 0;
3261         surface->overlay_srcrect.right = surface->resource.width;
3262         surface->overlay_srcrect.bottom = surface->resource.height;
3263     }
3264
3265     if (dst_rect)
3266     {
3267         surface->overlay_destrect = *dst_rect;
3268     }
3269     else
3270     {
3271         surface->overlay_destrect.left = 0;
3272         surface->overlay_destrect.top = 0;
3273         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3274         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3275     }
3276
3277     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3278     {
3279         surface->overlay_dest = NULL;
3280         list_remove(&surface->overlay_entry);
3281     }
3282
3283     if (flags & WINEDDOVER_SHOW)
3284     {
3285         if (surface->overlay_dest != dst_surface)
3286         {
3287             surface->overlay_dest = dst_surface;
3288             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3289         }
3290     }
3291     else if (flags & WINEDDOVER_HIDE)
3292     {
3293         /* tests show that the rectangles are erased on hide */
3294         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3295         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3296         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3297         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3298         surface->overlay_dest = NULL;
3299     }
3300
3301     surface_draw_overlay(surface);
3302
3303     return WINED3D_OK;
3304 }
3305
3306 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3307 {
3308     TRACE("surface %p, clipper %p.\n", surface, clipper);
3309
3310     surface->clipper = clipper;
3311
3312     return WINED3D_OK;
3313 }
3314
3315 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3316 {
3317     TRACE("surface %p.\n", surface);
3318
3319     return surface->clipper;
3320 }
3321
3322 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3323 {
3324     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3325
3326     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3327
3328     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3329     {
3330         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3331         return WINED3DERR_INVALIDCALL;
3332     }
3333
3334     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3335             surface->pow2Width, surface->pow2Height);
3336     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3337     surface->resource.format = format;
3338
3339     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3340     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3341             format->glFormat, format->glInternal, format->glType);
3342
3343     return WINED3D_OK;
3344 }
3345
3346 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3347         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3348 {
3349     unsigned short *dst_s;
3350     const float *src_f;
3351     unsigned int x, y;
3352
3353     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3354
3355     for (y = 0; y < h; ++y)
3356     {
3357         src_f = (const float *)(src + y * pitch_in);
3358         dst_s = (unsigned short *) (dst + y * pitch_out);
3359         for (x = 0; x < w; ++x)
3360         {
3361             dst_s[x] = float_32_to_16(src_f + x);
3362         }
3363     }
3364 }
3365
3366 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3367         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3368 {
3369     static const unsigned char convert_5to8[] =
3370     {
3371         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3372         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3373         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3374         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3375     };
3376     static const unsigned char convert_6to8[] =
3377     {
3378         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3379         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3380         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3381         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3382         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3383         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3384         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3385         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3386     };
3387     unsigned int x, y;
3388
3389     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3390
3391     for (y = 0; y < h; ++y)
3392     {
3393         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3394         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3395         for (x = 0; x < w; ++x)
3396         {
3397             WORD pixel = src_line[x];
3398             dst_line[x] = 0xff000000
3399                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3400                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3401                     | convert_5to8[(pixel & 0x001f)];
3402         }
3403     }
3404 }
3405
3406 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3407  * in both cases we're just setting the X / Alpha channel to 0xff. */
3408 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3409         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3410 {
3411     unsigned int x, y;
3412
3413     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3414
3415     for (y = 0; y < h; ++y)
3416     {
3417         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3418         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3419
3420         for (x = 0; x < w; ++x)
3421         {
3422             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3423         }
3424     }
3425 }
3426
3427 static inline BYTE cliptobyte(int x)
3428 {
3429     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3430 }
3431
3432 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3433         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3434 {
3435     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3436     unsigned int x, y;
3437
3438     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3439
3440     for (y = 0; y < h; ++y)
3441     {
3442         const BYTE *src_line = src + y * pitch_in;
3443         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3444         for (x = 0; x < w; ++x)
3445         {
3446             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3447              *     C = Y - 16; D = U - 128; E = V - 128;
3448              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3449              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3450              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3451              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3452              * U and V are shared between the pixels. */
3453             if (!(x & 1)) /* For every even pixel, read new U and V. */
3454             {
3455                 d = (int) src_line[1] - 128;
3456                 e = (int) src_line[3] - 128;
3457                 r2 = 409 * e + 128;
3458                 g2 = - 100 * d - 208 * e + 128;
3459                 b2 = 516 * d + 128;
3460             }
3461             c2 = 298 * ((int) src_line[0] - 16);
3462             dst_line[x] = 0xff000000
3463                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3464                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3465                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3466                 /* Scale RGB values to 0..255 range,
3467                  * then clip them if still not in range (may be negative),
3468                  * then shift them within DWORD if necessary. */
3469             src_line += 2;
3470         }
3471     }
3472 }
3473
3474 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3475         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3476 {
3477     unsigned int x, y;
3478     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3479
3480     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3481
3482     for (y = 0; y < h; ++y)
3483     {
3484         const BYTE *src_line = src + y * pitch_in;
3485         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3486         for (x = 0; x < w; ++x)
3487         {
3488             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3489              *     C = Y - 16; D = U - 128; E = V - 128;
3490              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3491              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3492              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3493              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3494              * U and V are shared between the pixels. */
3495             if (!(x & 1)) /* For every even pixel, read new U and V. */
3496             {
3497                 d = (int) src_line[1] - 128;
3498                 e = (int) src_line[3] - 128;
3499                 r2 = 409 * e + 128;
3500                 g2 = - 100 * d - 208 * e + 128;
3501                 b2 = 516 * d + 128;
3502             }
3503             c2 = 298 * ((int) src_line[0] - 16);
3504             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3505                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3506                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3507                 /* Scale RGB values to 0..255 range,
3508                  * then clip them if still not in range (may be negative),
3509                  * then shift them within DWORD if necessary. */
3510             src_line += 2;
3511         }
3512     }
3513 }
3514
3515 struct d3dfmt_convertor_desc
3516 {
3517     enum wined3d_format_id from, to;
3518     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3519 };
3520
3521 static const struct d3dfmt_convertor_desc convertors[] =
3522 {
3523     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3524     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3525     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3526     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3527     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3528     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3529 };
3530
3531 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3532         enum wined3d_format_id to)
3533 {
3534     unsigned int i;
3535
3536     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3537     {
3538         if (convertors[i].from == from && convertors[i].to == to)
3539             return &convertors[i];
3540     }
3541
3542     return NULL;
3543 }
3544
3545 /*****************************************************************************
3546  * surface_convert_format
3547  *
3548  * Creates a duplicate of a surface in a different format. Is used by Blt to
3549  * blit between surfaces with different formats.
3550  *
3551  * Parameters
3552  *  source: Source surface
3553  *  fmt: Requested destination format
3554  *
3555  *****************************************************************************/
3556 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3557 {
3558     const struct d3dfmt_convertor_desc *conv;
3559     WINED3DLOCKED_RECT lock_src, lock_dst;
3560     struct wined3d_surface *ret = NULL;
3561     HRESULT hr;
3562
3563     conv = find_convertor(source->resource.format->id, to_fmt);
3564     if (!conv)
3565     {
3566         FIXME("Cannot find a conversion function from format %s to %s.\n",
3567                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3568         return NULL;
3569     }
3570
3571     wined3d_surface_create(source->resource.device, source->resource.width,
3572             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3573             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3574             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3575     if (!ret)
3576     {
3577         ERR("Failed to create a destination surface for conversion.\n");
3578         return NULL;
3579     }
3580
3581     memset(&lock_src, 0, sizeof(lock_src));
3582     memset(&lock_dst, 0, sizeof(lock_dst));
3583
3584     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3585     if (FAILED(hr))
3586     {
3587         ERR("Failed to lock the source surface.\n");
3588         wined3d_surface_decref(ret);
3589         return NULL;
3590     }
3591     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3592     if (FAILED(hr))
3593     {
3594         ERR("Failed to lock the destination surface.\n");
3595         wined3d_surface_unmap(source);
3596         wined3d_surface_decref(ret);
3597         return NULL;
3598     }
3599
3600     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3601             source->resource.width, source->resource.height);
3602
3603     wined3d_surface_unmap(ret);
3604     wined3d_surface_unmap(source);
3605
3606     return ret;
3607 }
3608
3609 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3610         unsigned int bpp, UINT pitch, DWORD color)
3611 {
3612     BYTE *first;
3613     int x, y;
3614
3615     /* Do first row */
3616
3617 #define COLORFILL_ROW(type) \
3618 do { \
3619     type *d = (type *)buf; \
3620     for (x = 0; x < width; ++x) \
3621         d[x] = (type)color; \
3622 } while(0)
3623
3624     switch (bpp)
3625     {
3626         case 1:
3627             COLORFILL_ROW(BYTE);
3628             break;
3629
3630         case 2:
3631             COLORFILL_ROW(WORD);
3632             break;
3633
3634         case 3:
3635         {
3636             BYTE *d = buf;
3637             for (x = 0; x < width; ++x, d += 3)
3638             {
3639                 d[0] = (color      ) & 0xFF;
3640                 d[1] = (color >>  8) & 0xFF;
3641                 d[2] = (color >> 16) & 0xFF;
3642             }
3643             break;
3644         }
3645         case 4:
3646             COLORFILL_ROW(DWORD);
3647             break;
3648
3649         default:
3650             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3651             return WINED3DERR_NOTAVAILABLE;
3652     }
3653
3654 #undef COLORFILL_ROW
3655
3656     /* Now copy first row. */
3657     first = buf;
3658     for (y = 1; y < height; ++y)
3659     {
3660         buf += pitch;
3661         memcpy(buf, first, width * bpp);
3662     }
3663
3664     return WINED3D_OK;
3665 }
3666
3667 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3668 {
3669     TRACE("surface %p.\n", surface);
3670
3671     if (!(surface->flags & SFLAG_LOCKED))
3672     {
3673         WARN("Trying to unmap unmapped surface.\n");
3674         return WINEDDERR_NOTLOCKED;
3675     }
3676     surface->flags &= ~SFLAG_LOCKED;
3677
3678     surface->surface_ops->surface_unmap(surface);
3679
3680     return WINED3D_OK;
3681 }
3682
3683 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3684         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3685 {
3686     const struct wined3d_format *format = surface->resource.format;
3687
3688     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3689             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3690
3691     if (surface->flags & SFLAG_LOCKED)
3692     {
3693         WARN("Surface is already mapped.\n");
3694         return WINED3DERR_INVALIDCALL;
3695     }
3696     if ((format->flags & WINED3DFMT_FLAG_BLOCKS)
3697             && rect && (rect->left || rect->top
3698             || rect->right != surface->resource.width
3699             || rect->bottom != surface->resource.height))
3700     {
3701         UINT width_mask = format->block_width - 1;
3702         UINT height_mask = format->block_height - 1;
3703
3704         if ((rect->left & width_mask) || (rect->right & width_mask)
3705                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3706         {
3707             WARN("Map rect %s is misaligned for %ux%u blocks.\n",
3708                     wine_dbgstr_rect(rect), format->block_width, format->block_height);
3709
3710             if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3711                 return WINED3DERR_INVALIDCALL;
3712         }
3713     }
3714
3715     surface->flags |= SFLAG_LOCKED;
3716
3717     if (!(surface->flags & SFLAG_LOCKABLE))
3718         WARN("Trying to lock unlockable surface.\n");
3719
3720     surface->surface_ops->surface_map(surface, rect, flags);
3721
3722     if (format->flags & WINED3DFMT_FLAG_BROKEN_PITCH)
3723         locked_rect->Pitch = surface->resource.width * format->byte_count;
3724     else
3725         locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3726
3727     if (!rect)
3728     {
3729         locked_rect->pBits = surface->resource.allocatedMemory;
3730         surface->lockedRect.left = 0;
3731         surface->lockedRect.top = 0;
3732         surface->lockedRect.right = surface->resource.width;
3733         surface->lockedRect.bottom = surface->resource.height;
3734     }
3735     else
3736     {
3737         if ((format->flags & (WINED3DFMT_FLAG_BLOCKS | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_BLOCKS)
3738         {
3739             /* Compressed textures are block based, so calculate the offset of
3740              * the block that contains the top-left pixel of the locked rectangle. */
3741             locked_rect->pBits = surface->resource.allocatedMemory
3742                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3743                     + ((rect->left / format->block_width) * format->block_byte_count);
3744         }
3745         else
3746         {
3747             locked_rect->pBits = surface->resource.allocatedMemory
3748                     + (locked_rect->Pitch * rect->top)
3749                     + (rect->left * format->byte_count);
3750         }
3751         surface->lockedRect.left = rect->left;
3752         surface->lockedRect.top = rect->top;
3753         surface->lockedRect.right = rect->right;
3754         surface->lockedRect.bottom = rect->bottom;
3755     }
3756
3757     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3758     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3759
3760     return WINED3D_OK;
3761 }
3762
3763 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3764 {
3765     WINED3DLOCKED_RECT lock;
3766     HRESULT hr;
3767
3768     TRACE("surface %p, dc %p.\n", surface, dc);
3769
3770     if (surface->flags & SFLAG_USERPTR)
3771     {
3772         ERR("Not supported on surfaces with application-provided memory.\n");
3773         return WINEDDERR_NODC;
3774     }
3775
3776     /* Give more detailed info for ddraw. */
3777     if (surface->flags & SFLAG_DCINUSE)
3778         return WINEDDERR_DCALREADYCREATED;
3779
3780     /* Can't GetDC if the surface is locked. */
3781     if (surface->flags & SFLAG_LOCKED)
3782         return WINED3DERR_INVALIDCALL;
3783
3784     /* Create a DIB section if there isn't a dc yet. */
3785     if (!surface->hDC)
3786     {
3787         if (surface->flags & SFLAG_CLIENT)
3788         {
3789             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3790             surface_release_client_storage(surface);
3791         }
3792         hr = surface_create_dib_section(surface);
3793         if (FAILED(hr))
3794             return WINED3DERR_INVALIDCALL;
3795
3796         /* Use the DIB section from now on if we are not using a PBO. */
3797         if (!(surface->flags & SFLAG_PBO))
3798             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3799     }
3800
3801     /* Map the surface. */
3802     hr = wined3d_surface_map(surface, &lock, NULL, 0);
3803     if (FAILED(hr))
3804     {
3805         ERR("Map failed, hr %#x.\n", hr);
3806         return hr;
3807     }
3808
3809     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3810      * activates the allocatedMemory. */
3811     if (surface->flags & SFLAG_PBO)
3812         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3813
3814     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3815             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3816     {
3817         /* GetDC on palettized formats is unsupported in D3D9, and the method
3818          * is missing in D3D8, so this should only be used for DX <=7
3819          * surfaces (with non-device palettes). */
3820         const PALETTEENTRY *pal = NULL;
3821
3822         if (surface->palette)
3823         {
3824             pal = surface->palette->palents;
3825         }
3826         else
3827         {
3828             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3829             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3830
3831             if (dds_primary && dds_primary->palette)
3832                 pal = dds_primary->palette->palents;
3833         }
3834
3835         if (pal)
3836         {
3837             RGBQUAD col[256];
3838             unsigned int i;
3839
3840             for (i = 0; i < 256; ++i)
3841             {
3842                 col[i].rgbRed = pal[i].peRed;
3843                 col[i].rgbGreen = pal[i].peGreen;
3844                 col[i].rgbBlue = pal[i].peBlue;
3845                 col[i].rgbReserved = 0;
3846             }
3847             SetDIBColorTable(surface->hDC, 0, 256, col);
3848         }
3849     }
3850
3851     surface->flags |= SFLAG_DCINUSE;
3852
3853     *dc = surface->hDC;
3854     TRACE("Returning dc %p.\n", *dc);
3855
3856     return WINED3D_OK;
3857 }
3858
3859 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3860 {
3861     TRACE("surface %p, dc %p.\n", surface, dc);
3862
3863     if (!(surface->flags & SFLAG_DCINUSE))
3864         return WINEDDERR_NODC;
3865
3866     if (surface->hDC != dc)
3867     {
3868         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3869                 dc, surface->hDC);
3870         return WINEDDERR_NODC;
3871     }
3872
3873     /* Copy the contents of the DIB over to the PBO. */
3874     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3875         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3876
3877     /* We locked first, so unlock now. */
3878     wined3d_surface_unmap(surface);
3879
3880     surface->flags &= ~SFLAG_DCINUSE;
3881
3882     return WINED3D_OK;
3883 }
3884
3885 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3886 {
3887     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3888
3889     if (flags)
3890     {
3891         static UINT once;
3892         if (!once++)
3893             FIXME("Ignoring flags %#x.\n", flags);
3894         else
3895             WARN("Ignoring flags %#x.\n", flags);
3896     }
3897
3898     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3899     {
3900         ERR("Not supported on swapchain surfaces.\n");
3901         return WINEDDERR_NOTFLIPPABLE;
3902     }
3903
3904     /* Flipping is only supported on render targets and overlays. */
3905     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3906     {
3907         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3908         return WINEDDERR_NOTFLIPPABLE;
3909     }
3910
3911     flip_surface(surface, override);
3912
3913     /* Update overlays if they're visible. */
3914     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3915         return surface_draw_overlay(surface);
3916
3917     return WINED3D_OK;
3918 }
3919
3920 /* Do not call while under the GL lock. */
3921 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3922 {
3923     struct wined3d_device *device = surface->resource.device;
3924
3925     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3926
3927     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3928     {
3929         struct wined3d_texture *texture = surface->container.u.texture;
3930
3931         TRACE("Passing to container (%p).\n", texture);
3932         texture->texture_ops->texture_preload(texture, srgb);
3933     }
3934     else
3935     {
3936         struct wined3d_context *context;
3937
3938         TRACE("(%p) : About to load surface\n", surface);
3939
3940         /* TODO: Use already acquired context when possible. */
3941         context = context_acquire(device, NULL);
3942
3943         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3944
3945         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3946         {
3947             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3948             GLclampf tmp;
3949             tmp = 0.9f;
3950             ENTER_GL();
3951             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3952             LEAVE_GL();
3953         }
3954
3955         context_release(context);
3956     }
3957 }
3958
3959 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3960 {
3961     if (!surface->resource.allocatedMemory)
3962     {
3963         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3964                 surface->resource.size + RESOURCE_ALIGNMENT);
3965         if (!surface->resource.heapMemory)
3966         {
3967             ERR("Out of memory\n");
3968             return FALSE;
3969         }
3970         surface->resource.allocatedMemory =
3971             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3972     }
3973     else
3974     {
3975         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3976     }
3977
3978     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3979
3980     return TRUE;
3981 }
3982
3983 /* Read the framebuffer back into the surface */
3984 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3985 {
3986     struct wined3d_device *device = surface->resource.device;
3987     const struct wined3d_gl_info *gl_info;
3988     struct wined3d_context *context;
3989     BYTE *mem;
3990     GLint fmt;
3991     GLint type;
3992     BYTE *row, *top, *bottom;
3993     int i;
3994     BOOL bpp;
3995     RECT local_rect;
3996     BOOL srcIsUpsideDown;
3997     GLint rowLen = 0;
3998     GLint skipPix = 0;
3999     GLint skipRow = 0;
4000
4001     context = context_acquire(device, surface);
4002     context_apply_blit_state(context, device);
4003     gl_info = context->gl_info;
4004
4005     ENTER_GL();
4006
4007     /* Select the correct read buffer, and give some debug output.
4008      * There is no need to keep track of the current read buffer or reset it, every part of the code
4009      * that reads sets the read buffer as desired.
4010      */
4011     if (surface_is_offscreen(surface))
4012     {
4013         /* Mapping the primary render target which is not on a swapchain.
4014          * Read from the back buffer. */
4015         TRACE("Mapping offscreen render target.\n");
4016         glReadBuffer(device->offscreenBuffer);
4017         srcIsUpsideDown = TRUE;
4018     }
4019     else
4020     {
4021         /* Onscreen surfaces are always part of a swapchain */
4022         GLenum buffer = surface_get_gl_buffer(surface);
4023         TRACE("Mapping %#x buffer.\n", buffer);
4024         glReadBuffer(buffer);
4025         checkGLcall("glReadBuffer");
4026         srcIsUpsideDown = FALSE;
4027     }
4028
4029     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4030     if (!rect)
4031     {
4032         local_rect.left = 0;
4033         local_rect.top = 0;
4034         local_rect.right = surface->resource.width;
4035         local_rect.bottom = surface->resource.height;
4036     }
4037     else
4038     {
4039         local_rect = *rect;
4040     }
4041     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4042
4043     switch (surface->resource.format->id)
4044     {
4045         case WINED3DFMT_P8_UINT:
4046         {
4047             if (primary_render_target_is_p8(device))
4048             {
4049                 /* In case of P8 render targets the index is stored in the alpha component */
4050                 fmt = GL_ALPHA;
4051                 type = GL_UNSIGNED_BYTE;
4052                 mem = dest;
4053                 bpp = surface->resource.format->byte_count;
4054             }
4055             else
4056             {
4057                 /* GL can't return palettized data, so read ARGB pixels into a
4058                  * separate block of memory and convert them into palettized format
4059                  * in software. Slow, but if the app means to use palettized render
4060                  * targets and locks it...
4061                  *
4062                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4063                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4064                  * for the color channels when palettizing the colors.
4065                  */
4066                 fmt = GL_RGB;
4067                 type = GL_UNSIGNED_BYTE;
4068                 pitch *= 3;
4069                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4070                 if (!mem)
4071                 {
4072                     ERR("Out of memory\n");
4073                     LEAVE_GL();
4074                     return;
4075                 }
4076                 bpp = surface->resource.format->byte_count * 3;
4077             }
4078         }
4079         break;
4080
4081         default:
4082             mem = dest;
4083             fmt = surface->resource.format->glFormat;
4084             type = surface->resource.format->glType;
4085             bpp = surface->resource.format->byte_count;
4086     }
4087
4088     if (surface->flags & SFLAG_PBO)
4089     {
4090         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4091         checkGLcall("glBindBufferARB");
4092         if (mem)
4093         {
4094             ERR("mem not null for pbo -- unexpected\n");
4095             mem = NULL;
4096         }
4097     }
4098
4099     /* Save old pixel store pack state */
4100     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4101     checkGLcall("glGetIntegerv");
4102     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4103     checkGLcall("glGetIntegerv");
4104     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4105     checkGLcall("glGetIntegerv");
4106
4107     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4108     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4109     checkGLcall("glPixelStorei");
4110     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4111     checkGLcall("glPixelStorei");
4112     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4113     checkGLcall("glPixelStorei");
4114
4115     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4116             local_rect.right - local_rect.left,
4117             local_rect.bottom - local_rect.top,
4118             fmt, type, mem);
4119     checkGLcall("glReadPixels");
4120
4121     /* Reset previous pixel store pack state */
4122     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4123     checkGLcall("glPixelStorei");
4124     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4125     checkGLcall("glPixelStorei");
4126     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4127     checkGLcall("glPixelStorei");
4128
4129     if (surface->flags & SFLAG_PBO)
4130     {
4131         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4132         checkGLcall("glBindBufferARB");
4133
4134         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4135          * to get a pointer to it and perform the flipping in software. This is a lot
4136          * faster than calling glReadPixels for each line. In case we want more speed
4137          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4138         if (!srcIsUpsideDown)
4139         {
4140             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4141             checkGLcall("glBindBufferARB");
4142
4143             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4144             checkGLcall("glMapBufferARB");
4145         }
4146     }
4147
4148     /* TODO: Merge this with the palettization loop below for P8 targets */
4149     if(!srcIsUpsideDown) {
4150         UINT len, off;
4151         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4152             Flip the lines in software */
4153         len = (local_rect.right - local_rect.left) * bpp;
4154         off = local_rect.left * bpp;
4155
4156         row = HeapAlloc(GetProcessHeap(), 0, len);
4157         if(!row) {
4158             ERR("Out of memory\n");
4159             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4160                 HeapFree(GetProcessHeap(), 0, mem);
4161             LEAVE_GL();
4162             return;
4163         }
4164
4165         top = mem + pitch * local_rect.top;
4166         bottom = mem + pitch * (local_rect.bottom - 1);
4167         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4168             memcpy(row, top + off, len);
4169             memcpy(top + off, bottom + off, len);
4170             memcpy(bottom + off, row, len);
4171             top += pitch;
4172             bottom -= pitch;
4173         }
4174         HeapFree(GetProcessHeap(), 0, row);
4175
4176         /* Unmap the temp PBO buffer */
4177         if (surface->flags & SFLAG_PBO)
4178         {
4179             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4180             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4181         }
4182     }
4183
4184     LEAVE_GL();
4185     context_release(context);
4186
4187     /* For P8 textures we need to perform an inverse palette lookup. This is
4188      * done by searching for a palette index which matches the RGB value.
4189      * Note this isn't guaranteed to work when there are multiple entries for
4190      * the same color but we have no choice. In case of P8 render targets,
4191      * the index is stored in the alpha component so no conversion is needed. */
4192     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4193     {
4194         const PALETTEENTRY *pal = NULL;
4195         DWORD width = pitch / 3;
4196         int x, y, c;
4197
4198         if (surface->palette)
4199         {
4200             pal = surface->palette->palents;
4201         }
4202         else
4203         {
4204             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4205             HeapFree(GetProcessHeap(), 0, mem);
4206             return;
4207         }
4208
4209         for(y = local_rect.top; y < local_rect.bottom; y++) {
4210             for(x = local_rect.left; x < local_rect.right; x++) {
4211                 /*                      start              lines            pixels      */
4212                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4213                 const BYTE *green = blue  + 1;
4214                 const BYTE *red = green + 1;
4215
4216                 for(c = 0; c < 256; c++) {
4217                     if(*red   == pal[c].peRed   &&
4218                        *green == pal[c].peGreen &&
4219                        *blue  == pal[c].peBlue)
4220                     {
4221                         *((BYTE *) dest + y * width + x) = c;
4222                         break;
4223                     }
4224                 }
4225             }
4226         }
4227         HeapFree(GetProcessHeap(), 0, mem);
4228     }
4229 }
4230
4231 /* Read the framebuffer contents into a texture. Note that this function
4232  * doesn't do any kind of flipping. Using this on an onscreen surface will
4233  * result in a flipped D3D texture. */
4234 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4235 {
4236     struct wined3d_device *device = surface->resource.device;
4237     struct wined3d_context *context;
4238
4239     context = context_acquire(device, surface);
4240     device_invalidate_state(device, STATE_FRAMEBUFFER);
4241
4242     surface_prepare_texture(surface, context, srgb);
4243     surface_bind_and_dirtify(surface, context, srgb);
4244
4245     TRACE("Reading back offscreen render target %p.\n", surface);
4246
4247     ENTER_GL();
4248
4249     if (surface_is_offscreen(surface))
4250         glReadBuffer(device->offscreenBuffer);
4251     else
4252         glReadBuffer(surface_get_gl_buffer(surface));
4253     checkGLcall("glReadBuffer");
4254
4255     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4256             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4257     checkGLcall("glCopyTexSubImage2D");
4258
4259     LEAVE_GL();
4260
4261     context_release(context);
4262 }
4263
4264 /* Context activation is done by the caller. */
4265 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4266         struct wined3d_context *context, BOOL srgb)
4267 {
4268     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4269     CONVERT_TYPES convert;
4270     struct wined3d_format format;
4271
4272     if (surface->flags & alloc_flag) return;
4273
4274     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4275     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4276     else surface->flags &= ~SFLAG_CONVERTED;
4277
4278     surface_bind_and_dirtify(surface, context, srgb);
4279     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4280     surface->flags |= alloc_flag;
4281 }
4282
4283 /* Context activation is done by the caller. */
4284 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4285 {
4286     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4287     {
4288         struct wined3d_texture *texture = surface->container.u.texture;
4289         UINT sub_count = texture->level_count * texture->layer_count;
4290         UINT i;
4291
4292         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4293
4294         for (i = 0; i < sub_count; ++i)
4295         {
4296             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4297             surface_prepare_texture_internal(s, context, srgb);
4298         }
4299
4300         return;
4301     }
4302
4303     surface_prepare_texture_internal(surface, context, srgb);
4304 }
4305
4306 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4307 {
4308     if (multisample)
4309     {
4310         if (surface->rb_multisample)
4311             return;
4312
4313         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4314         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4315         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4316                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4317         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4318     }
4319     else
4320     {
4321         if (surface->rb_resolved)
4322             return;
4323
4324         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4325         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4326         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4327                 surface->pow2Width, surface->pow2Height);
4328         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4329     }
4330 }
4331
4332 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4333         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4334 {
4335     struct wined3d_device *device = surface->resource.device;
4336     UINT pitch = wined3d_surface_get_pitch(surface);
4337     const struct wined3d_gl_info *gl_info;
4338     struct wined3d_context *context;
4339     RECT local_rect;
4340     UINT w, h;
4341
4342     surface_get_rect(surface, rect, &local_rect);
4343
4344     mem += local_rect.top * pitch + local_rect.left * bpp;
4345     w = local_rect.right - local_rect.left;
4346     h = local_rect.bottom - local_rect.top;
4347
4348     /* Activate the correct context for the render target */
4349     context = context_acquire(device, surface);
4350     context_apply_blit_state(context, device);
4351     gl_info = context->gl_info;
4352
4353     ENTER_GL();
4354
4355     if (!surface_is_offscreen(surface))
4356     {
4357         GLenum buffer = surface_get_gl_buffer(surface);
4358         TRACE("Unlocking %#x buffer.\n", buffer);
4359         context_set_draw_buffer(context, buffer);
4360
4361         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4362         glPixelZoom(1.0f, -1.0f);
4363     }
4364     else
4365     {
4366         /* Primary offscreen render target */
4367         TRACE("Offscreen render target.\n");
4368         context_set_draw_buffer(context, device->offscreenBuffer);
4369
4370         glPixelZoom(1.0f, 1.0f);
4371     }
4372
4373     glRasterPos3i(local_rect.left, local_rect.top, 1);
4374     checkGLcall("glRasterPos3i");
4375
4376     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4377     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4378
4379     if (surface->flags & SFLAG_PBO)
4380     {
4381         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4382         checkGLcall("glBindBufferARB");
4383     }
4384
4385     glDrawPixels(w, h, fmt, type, mem);
4386     checkGLcall("glDrawPixels");
4387
4388     if (surface->flags & SFLAG_PBO)
4389     {
4390         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4391         checkGLcall("glBindBufferARB");
4392     }
4393
4394     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4395     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4396
4397     LEAVE_GL();
4398
4399     if (wined3d_settings.strict_draw_ordering
4400             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4401             && surface->container.u.swapchain->front_buffer == surface))
4402         wglFlush();
4403
4404     context_release(context);
4405 }
4406
4407 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4408         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4409 {
4410     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4411     const struct wined3d_device *device = surface->resource.device;
4412     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4413     BOOL blit_supported = FALSE;
4414
4415     /* Copy the default values from the surface. Below we might perform fixups */
4416     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4417     *format = *surface->resource.format;
4418     *convert = NO_CONVERSION;
4419
4420     /* Ok, now look if we have to do any conversion */
4421     switch (surface->resource.format->id)
4422     {
4423         case WINED3DFMT_P8_UINT:
4424             /* Below the call to blit_supported is disabled for Wine 1.2
4425              * because the function isn't operating correctly yet. At the
4426              * moment 8-bit blits are handled in software and if certain GL
4427              * extensions are around, surface conversion is performed at
4428              * upload time. The blit_supported call recognizes it as a
4429              * destination fixup. This type of upload 'fixup' and 8-bit to
4430              * 8-bit blits need to be handled by the blit_shader.
4431              * TODO: get rid of this #if 0. */
4432 #if 0
4433             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4434                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4435                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4436 #endif
4437             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4438
4439             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4440              * texturing. Further also use conversion in case of color keying.
4441              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4442              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4443              * conflicts with this.
4444              */
4445             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4446                     || colorkey_active || !use_texturing)
4447             {
4448                 format->glFormat = GL_RGBA;
4449                 format->glInternal = GL_RGBA;
4450                 format->glType = GL_UNSIGNED_BYTE;
4451                 format->conv_byte_count = 4;
4452                 if (colorkey_active)
4453                     *convert = CONVERT_PALETTED_CK;
4454                 else
4455                     *convert = CONVERT_PALETTED;
4456             }
4457             break;
4458
4459         case WINED3DFMT_B2G3R3_UNORM:
4460             /* **********************
4461                 GL_UNSIGNED_BYTE_3_3_2
4462                 ********************** */
4463             if (colorkey_active) {
4464                 /* This texture format will never be used.. So do not care about color keying
4465                     up until the point in time it will be needed :-) */
4466                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4467             }
4468             break;
4469
4470         case WINED3DFMT_B5G6R5_UNORM:
4471             if (colorkey_active)
4472             {
4473                 *convert = CONVERT_CK_565;
4474                 format->glFormat = GL_RGBA;
4475                 format->glInternal = GL_RGB5_A1;
4476                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4477                 format->conv_byte_count = 2;
4478             }
4479             break;
4480
4481         case WINED3DFMT_B5G5R5X1_UNORM:
4482             if (colorkey_active)
4483             {
4484                 *convert = CONVERT_CK_5551;
4485                 format->glFormat = GL_BGRA;
4486                 format->glInternal = GL_RGB5_A1;
4487                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4488                 format->conv_byte_count = 2;
4489             }
4490             break;
4491
4492         case WINED3DFMT_B8G8R8_UNORM:
4493             if (colorkey_active)
4494             {
4495                 *convert = CONVERT_CK_RGB24;
4496                 format->glFormat = GL_RGBA;
4497                 format->glInternal = GL_RGBA8;
4498                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4499                 format->conv_byte_count = 4;
4500             }
4501             break;
4502
4503         case WINED3DFMT_B8G8R8X8_UNORM:
4504             if (colorkey_active)
4505             {
4506                 *convert = CONVERT_RGB32_888;
4507                 format->glFormat = GL_RGBA;
4508                 format->glInternal = GL_RGBA8;
4509                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4510                 format->conv_byte_count = 4;
4511             }
4512             break;
4513
4514         default:
4515             break;
4516     }
4517
4518     return WINED3D_OK;
4519 }
4520
4521 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4522 {
4523     const struct wined3d_device *device = surface->resource.device;
4524     const struct wined3d_palette *pal = surface->palette;
4525     BOOL index_in_alpha = FALSE;
4526     unsigned int i;
4527
4528     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4529      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4530      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4531      * duplicate entries. Store the color key in the unused alpha component to speed the
4532      * download up and to make conversion unneeded. */
4533     index_in_alpha = primary_render_target_is_p8(device);
4534
4535     if (!pal)
4536     {
4537         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4538         if (index_in_alpha)
4539         {
4540             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4541              * there's no palette at this time. */
4542             for (i = 0; i < 256; i++) table[i][3] = i;
4543         }
4544     }
4545     else
4546     {
4547         TRACE("Using surface palette %p\n", pal);
4548         /* Get the surface's palette */
4549         for (i = 0; i < 256; ++i)
4550         {
4551             table[i][0] = pal->palents[i].peRed;
4552             table[i][1] = pal->palents[i].peGreen;
4553             table[i][2] = pal->palents[i].peBlue;
4554
4555             /* When index_in_alpha is set the palette index is stored in the
4556              * alpha component. In case of a readback we can then read
4557              * GL_ALPHA. Color keying is handled in BltOverride using a
4558              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4559              * color key itself is passed to glAlphaFunc in other cases the
4560              * alpha component of pixels that should be masked away is set to 0. */
4561             if (index_in_alpha)
4562             {
4563                 table[i][3] = i;
4564             }
4565             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4566                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4567             {
4568                 table[i][3] = 0x00;
4569             }
4570             else if (pal->flags & WINEDDPCAPS_ALPHA)
4571             {
4572                 table[i][3] = pal->palents[i].peFlags;
4573             }
4574             else
4575             {
4576                 table[i][3] = 0xFF;
4577             }
4578         }
4579     }
4580 }
4581
4582 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4583         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4584 {
4585     const BYTE *source;
4586     BYTE *dest;
4587     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4588
4589     switch (convert) {
4590         case NO_CONVERSION:
4591         {
4592             memcpy(dst, src, pitch * height);
4593             break;
4594         }
4595         case CONVERT_PALETTED:
4596         case CONVERT_PALETTED_CK:
4597         {
4598             BYTE table[256][4];
4599             unsigned int x, y;
4600
4601             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4602
4603             for (y = 0; y < height; y++)
4604             {
4605                 source = src + pitch * y;
4606                 dest = dst + outpitch * y;
4607                 /* This is an 1 bpp format, using the width here is fine */
4608                 for (x = 0; x < width; x++) {
4609                     BYTE color = *source++;
4610                     *dest++ = table[color][0];
4611                     *dest++ = table[color][1];
4612                     *dest++ = table[color][2];
4613                     *dest++ = table[color][3];
4614                 }
4615             }
4616         }
4617         break;
4618
4619         case CONVERT_CK_565:
4620         {
4621             /* Converting the 565 format in 5551 packed to emulate color-keying.
4622
4623               Note : in all these conversion, it would be best to average the averaging
4624                       pixels to get the color of the pixel that will be color-keyed to
4625                       prevent 'color bleeding'. This will be done later on if ever it is
4626                       too visible.
4627
4628               Note2: Nvidia documents say that their driver does not support alpha + color keying
4629                      on the same surface and disables color keying in such a case
4630             */
4631             unsigned int x, y;
4632             const WORD *Source;
4633             WORD *Dest;
4634
4635             TRACE("Color keyed 565\n");
4636
4637             for (y = 0; y < height; y++) {
4638                 Source = (const WORD *)(src + y * pitch);
4639                 Dest = (WORD *) (dst + y * outpitch);
4640                 for (x = 0; x < width; x++ ) {
4641                     WORD color = *Source++;
4642                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4643                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4644                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4645                         *Dest |= 0x0001;
4646                     Dest++;
4647                 }
4648             }
4649         }
4650         break;
4651
4652         case CONVERT_CK_5551:
4653         {
4654             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4655             unsigned int x, y;
4656             const WORD *Source;
4657             WORD *Dest;
4658             TRACE("Color keyed 5551\n");
4659             for (y = 0; y < height; y++) {
4660                 Source = (const WORD *)(src + y * pitch);
4661                 Dest = (WORD *) (dst + y * outpitch);
4662                 for (x = 0; x < width; x++ ) {
4663                     WORD color = *Source++;
4664                     *Dest = color;
4665                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4666                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4667                         *Dest |= (1 << 15);
4668                     else
4669                         *Dest &= ~(1 << 15);
4670                     Dest++;
4671                 }
4672             }
4673         }
4674         break;
4675
4676         case CONVERT_CK_RGB24:
4677         {
4678             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4679             unsigned int x, y;
4680             for (y = 0; y < height; y++)
4681             {
4682                 source = src + pitch * y;
4683                 dest = dst + outpitch * y;
4684                 for (x = 0; x < width; x++) {
4685                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4686                     DWORD dstcolor = color << 8;
4687                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4688                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4689                         dstcolor |= 0xff;
4690                     *(DWORD*)dest = dstcolor;
4691                     source += 3;
4692                     dest += 4;
4693                 }
4694             }
4695         }
4696         break;
4697
4698         case CONVERT_RGB32_888:
4699         {
4700             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4701             unsigned int x, y;
4702             for (y = 0; y < height; y++)
4703             {
4704                 source = src + pitch * y;
4705                 dest = dst + outpitch * y;
4706                 for (x = 0; x < width; x++) {
4707                     DWORD color = 0xffffff & *(const DWORD*)source;
4708                     DWORD dstcolor = color << 8;
4709                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4710                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4711                         dstcolor |= 0xff;
4712                     *(DWORD*)dest = dstcolor;
4713                     source += 4;
4714                     dest += 4;
4715                 }
4716             }
4717         }
4718         break;
4719
4720         default:
4721             ERR("Unsupported conversion type %#x.\n", convert);
4722     }
4723     return WINED3D_OK;
4724 }
4725
4726 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4727 {
4728     /* Flip the surface contents */
4729     /* Flip the DC */
4730     {
4731         HDC tmp;
4732         tmp = front->hDC;
4733         front->hDC = back->hDC;
4734         back->hDC = tmp;
4735     }
4736
4737     /* Flip the DIBsection */
4738     {
4739         HBITMAP tmp;
4740         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4741         tmp = front->dib.DIBsection;
4742         front->dib.DIBsection = back->dib.DIBsection;
4743         back->dib.DIBsection = tmp;
4744
4745         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4746         else front->flags &= ~SFLAG_DIBSECTION;
4747         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4748         else back->flags &= ~SFLAG_DIBSECTION;
4749     }
4750
4751     /* Flip the surface data */
4752     {
4753         void* tmp;
4754
4755         tmp = front->dib.bitmap_data;
4756         front->dib.bitmap_data = back->dib.bitmap_data;
4757         back->dib.bitmap_data = tmp;
4758
4759         tmp = front->resource.allocatedMemory;
4760         front->resource.allocatedMemory = back->resource.allocatedMemory;
4761         back->resource.allocatedMemory = tmp;
4762
4763         tmp = front->resource.heapMemory;
4764         front->resource.heapMemory = back->resource.heapMemory;
4765         back->resource.heapMemory = tmp;
4766     }
4767
4768     /* Flip the PBO */
4769     {
4770         GLuint tmp_pbo = front->pbo;
4771         front->pbo = back->pbo;
4772         back->pbo = tmp_pbo;
4773     }
4774
4775     /* Flip the opengl texture */
4776     {
4777         GLuint tmp;
4778
4779         tmp = back->texture_name;
4780         back->texture_name = front->texture_name;
4781         front->texture_name = tmp;
4782
4783         tmp = back->texture_name_srgb;
4784         back->texture_name_srgb = front->texture_name_srgb;
4785         front->texture_name_srgb = tmp;
4786
4787         tmp = back->rb_multisample;
4788         back->rb_multisample = front->rb_multisample;
4789         front->rb_multisample = tmp;
4790
4791         tmp = back->rb_resolved;
4792         back->rb_resolved = front->rb_resolved;
4793         front->rb_resolved = tmp;
4794
4795         resource_unload(&back->resource);
4796         resource_unload(&front->resource);
4797     }
4798
4799     {
4800         DWORD tmp_flags = back->flags;
4801         back->flags = front->flags;
4802         front->flags = tmp_flags;
4803     }
4804 }
4805
4806 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4807  * pixel copy calls. */
4808 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4809         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4810 {
4811     struct wined3d_device *device = dst_surface->resource.device;
4812     float xrel, yrel;
4813     UINT row;
4814     struct wined3d_context *context;
4815     BOOL upsidedown = FALSE;
4816     RECT dst_rect = *dst_rect_in;
4817
4818     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4819      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4820      */
4821     if(dst_rect.top > dst_rect.bottom) {
4822         UINT tmp = dst_rect.bottom;
4823         dst_rect.bottom = dst_rect.top;
4824         dst_rect.top = tmp;
4825         upsidedown = TRUE;
4826     }
4827
4828     context = context_acquire(device, src_surface);
4829     context_apply_blit_state(context, device);
4830     surface_internal_preload(dst_surface, SRGB_RGB);
4831     ENTER_GL();
4832
4833     /* Bind the target texture */
4834     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4835     if (surface_is_offscreen(src_surface))
4836     {
4837         TRACE("Reading from an offscreen target\n");
4838         upsidedown = !upsidedown;
4839         glReadBuffer(device->offscreenBuffer);
4840     }
4841     else
4842     {
4843         glReadBuffer(surface_get_gl_buffer(src_surface));
4844     }
4845     checkGLcall("glReadBuffer");
4846
4847     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4848     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4849
4850     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4851     {
4852         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4853
4854         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4855             ERR("Texture filtering not supported in direct blit\n");
4856         }
4857     }
4858     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4859             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4860     {
4861         ERR("Texture filtering not supported in direct blit\n");
4862     }
4863
4864     if (upsidedown
4865             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4866             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4867     {
4868         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4869
4870         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4871                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4872                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4873                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4874     }
4875     else
4876     {
4877         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4878         /* I have to process this row by row to swap the image,
4879          * otherwise it would be upside down, so stretching in y direction
4880          * doesn't cost extra time
4881          *
4882          * However, stretching in x direction can be avoided if not necessary
4883          */
4884         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4885             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4886             {
4887                 /* Well, that stuff works, but it's very slow.
4888                  * find a better way instead
4889                  */
4890                 UINT col;
4891
4892                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4893                 {
4894                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4895                             dst_rect.left + col /* x offset */, row /* y offset */,
4896                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4897                 }
4898             }
4899             else
4900             {
4901                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4902                         dst_rect.left /* x offset */, row /* y offset */,
4903                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4904             }
4905         }
4906     }
4907     checkGLcall("glCopyTexSubImage2D");
4908
4909     LEAVE_GL();
4910     context_release(context);
4911
4912     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4913      * path is never entered
4914      */
4915     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4916 }
4917
4918 /* Uses the hardware to stretch and flip the image */
4919 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4920         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4921 {
4922     struct wined3d_device *device = dst_surface->resource.device;
4923     struct wined3d_swapchain *src_swapchain = NULL;
4924     GLuint src, backup = 0;
4925     float left, right, top, bottom; /* Texture coordinates */
4926     UINT fbwidth = src_surface->resource.width;
4927     UINT fbheight = src_surface->resource.height;
4928     struct wined3d_context *context;
4929     GLenum drawBuffer = GL_BACK;
4930     GLenum texture_target;
4931     BOOL noBackBufferBackup;
4932     BOOL src_offscreen;
4933     BOOL upsidedown = FALSE;
4934     RECT dst_rect = *dst_rect_in;
4935
4936     TRACE("Using hwstretch blit\n");
4937     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4938     context = context_acquire(device, src_surface);
4939     context_apply_blit_state(context, device);
4940     surface_internal_preload(dst_surface, SRGB_RGB);
4941
4942     src_offscreen = surface_is_offscreen(src_surface);
4943     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4944     if (!noBackBufferBackup && !src_surface->texture_name)
4945     {
4946         /* Get it a description */
4947         surface_internal_preload(src_surface, SRGB_RGB);
4948     }
4949     ENTER_GL();
4950
4951     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4952      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4953      */
4954     if (context->aux_buffers >= 2)
4955     {
4956         /* Got more than one aux buffer? Use the 2nd aux buffer */
4957         drawBuffer = GL_AUX1;
4958     }
4959     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4960     {
4961         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4962         drawBuffer = GL_AUX0;
4963     }
4964
4965     if(noBackBufferBackup) {
4966         glGenTextures(1, &backup);
4967         checkGLcall("glGenTextures");
4968         context_bind_texture(context, GL_TEXTURE_2D, backup);
4969         texture_target = GL_TEXTURE_2D;
4970     } else {
4971         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4972          * we are reading from the back buffer, the backup can be used as source texture
4973          */
4974         texture_target = src_surface->texture_target;
4975         context_bind_texture(context, texture_target, src_surface->texture_name);
4976         glEnable(texture_target);
4977         checkGLcall("glEnable(texture_target)");
4978
4979         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4980         src_surface->flags &= ~SFLAG_INTEXTURE;
4981     }
4982
4983     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4984      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4985      */
4986     if(dst_rect.top > dst_rect.bottom) {
4987         UINT tmp = dst_rect.bottom;
4988         dst_rect.bottom = dst_rect.top;
4989         dst_rect.top = tmp;
4990         upsidedown = TRUE;
4991     }
4992
4993     if (src_offscreen)
4994     {
4995         TRACE("Reading from an offscreen target\n");
4996         upsidedown = !upsidedown;
4997         glReadBuffer(device->offscreenBuffer);
4998     }
4999     else
5000     {
5001         glReadBuffer(surface_get_gl_buffer(src_surface));
5002     }
5003
5004     /* TODO: Only back up the part that will be overwritten */
5005     glCopyTexSubImage2D(texture_target, 0,
5006                         0, 0 /* read offsets */,
5007                         0, 0,
5008                         fbwidth,
5009                         fbheight);
5010
5011     checkGLcall("glCopyTexSubImage2D");
5012
5013     /* No issue with overriding these - the sampler is dirty due to blit usage */
5014     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5015             wined3d_gl_mag_filter(magLookup, Filter));
5016     checkGLcall("glTexParameteri");
5017     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5018             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5019     checkGLcall("glTexParameteri");
5020
5021     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5022         src_swapchain = src_surface->container.u.swapchain;
5023     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5024     {
5025         src = backup ? backup : src_surface->texture_name;
5026     }
5027     else
5028     {
5029         glReadBuffer(GL_FRONT);
5030         checkGLcall("glReadBuffer(GL_FRONT)");
5031
5032         glGenTextures(1, &src);
5033         checkGLcall("glGenTextures(1, &src)");
5034         context_bind_texture(context, GL_TEXTURE_2D, src);
5035
5036         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5037          * out for power of 2 sizes
5038          */
5039         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5040                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5041         checkGLcall("glTexImage2D");
5042         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5043                             0, 0 /* read offsets */,
5044                             0, 0,
5045                             fbwidth,
5046                             fbheight);
5047
5048         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5049         checkGLcall("glTexParameteri");
5050         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5051         checkGLcall("glTexParameteri");
5052
5053         glReadBuffer(GL_BACK);
5054         checkGLcall("glReadBuffer(GL_BACK)");
5055
5056         if(texture_target != GL_TEXTURE_2D) {
5057             glDisable(texture_target);
5058             glEnable(GL_TEXTURE_2D);
5059             texture_target = GL_TEXTURE_2D;
5060         }
5061     }
5062     checkGLcall("glEnd and previous");
5063
5064     left = src_rect->left;
5065     right = src_rect->right;
5066
5067     if (!upsidedown)
5068     {
5069         top = src_surface->resource.height - src_rect->top;
5070         bottom = src_surface->resource.height - src_rect->bottom;
5071     }
5072     else
5073     {
5074         top = src_surface->resource.height - src_rect->bottom;
5075         bottom = src_surface->resource.height - src_rect->top;
5076     }
5077
5078     if (src_surface->flags & SFLAG_NORMCOORD)
5079     {
5080         left /= src_surface->pow2Width;
5081         right /= src_surface->pow2Width;
5082         top /= src_surface->pow2Height;
5083         bottom /= src_surface->pow2Height;
5084     }
5085
5086     /* draw the source texture stretched and upside down. The correct surface is bound already */
5087     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5088     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5089
5090     context_set_draw_buffer(context, drawBuffer);
5091     glReadBuffer(drawBuffer);
5092
5093     glBegin(GL_QUADS);
5094         /* bottom left */
5095         glTexCoord2f(left, bottom);
5096         glVertex2i(0, 0);
5097
5098         /* top left */
5099         glTexCoord2f(left, top);
5100         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5101
5102         /* top right */
5103         glTexCoord2f(right, top);
5104         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5105
5106         /* bottom right */
5107         glTexCoord2f(right, bottom);
5108         glVertex2i(dst_rect.right - dst_rect.left, 0);
5109     glEnd();
5110     checkGLcall("glEnd and previous");
5111
5112     if (texture_target != dst_surface->texture_target)
5113     {
5114         glDisable(texture_target);
5115         glEnable(dst_surface->texture_target);
5116         texture_target = dst_surface->texture_target;
5117     }
5118
5119     /* Now read the stretched and upside down image into the destination texture */
5120     context_bind_texture(context, texture_target, dst_surface->texture_name);
5121     glCopyTexSubImage2D(texture_target,
5122                         0,
5123                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5124                         0, 0, /* We blitted the image to the origin */
5125                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5126     checkGLcall("glCopyTexSubImage2D");
5127
5128     if(drawBuffer == GL_BACK) {
5129         /* Write the back buffer backup back */
5130         if(backup) {
5131             if(texture_target != GL_TEXTURE_2D) {
5132                 glDisable(texture_target);
5133                 glEnable(GL_TEXTURE_2D);
5134                 texture_target = GL_TEXTURE_2D;
5135             }
5136             context_bind_texture(context, GL_TEXTURE_2D, backup);
5137         }
5138         else
5139         {
5140             if (texture_target != src_surface->texture_target)
5141             {
5142                 glDisable(texture_target);
5143                 glEnable(src_surface->texture_target);
5144                 texture_target = src_surface->texture_target;
5145             }
5146             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5147         }
5148
5149         glBegin(GL_QUADS);
5150             /* top left */
5151             glTexCoord2f(0.0f, 0.0f);
5152             glVertex2i(0, fbheight);
5153
5154             /* bottom left */
5155             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5156             glVertex2i(0, 0);
5157
5158             /* bottom right */
5159             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5160                     (float)fbheight / (float)src_surface->pow2Height);
5161             glVertex2i(fbwidth, 0);
5162
5163             /* top right */
5164             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5165             glVertex2i(fbwidth, fbheight);
5166         glEnd();
5167     }
5168     glDisable(texture_target);
5169     checkGLcall("glDisable(texture_target)");
5170
5171     /* Cleanup */
5172     if (src != src_surface->texture_name && src != backup)
5173     {
5174         glDeleteTextures(1, &src);
5175         checkGLcall("glDeleteTextures(1, &src)");
5176     }
5177     if(backup) {
5178         glDeleteTextures(1, &backup);
5179         checkGLcall("glDeleteTextures(1, &backup)");
5180     }
5181
5182     LEAVE_GL();
5183
5184     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5185
5186     context_release(context);
5187
5188     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5189      * path is never entered
5190      */
5191     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5192 }
5193
5194 /* Front buffer coordinates are always full screen coordinates, but our GL
5195  * drawable is limited to the window's client area. The sysmem and texture
5196  * copies do have the full screen size. Note that GL has a bottom-left
5197  * origin, while D3D has a top-left origin. */
5198 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5199 {
5200     UINT drawable_height;
5201
5202     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5203             && surface == surface->container.u.swapchain->front_buffer)
5204     {
5205         POINT offset = {0, 0};
5206         RECT windowsize;
5207
5208         ScreenToClient(window, &offset);
5209         OffsetRect(rect, offset.x, offset.y);
5210
5211         GetClientRect(window, &windowsize);
5212         drawable_height = windowsize.bottom - windowsize.top;
5213     }
5214     else
5215     {
5216         drawable_height = surface->resource.height;
5217     }
5218
5219     rect->top = drawable_height - rect->top;
5220     rect->bottom = drawable_height - rect->bottom;
5221 }
5222
5223 static void surface_blt_to_drawable(const struct wined3d_device *device,
5224         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5225         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5226         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5227 {
5228     struct wined3d_context *context;
5229     RECT src_rect, dst_rect;
5230
5231     src_rect = *src_rect_in;
5232     dst_rect = *dst_rect_in;
5233
5234     /* Make sure the surface is up-to-date. This should probably use
5235      * surface_load_location() and worry about the destination surface too,
5236      * unless we're overwriting it completely. */
5237     surface_internal_preload(src_surface, SRGB_RGB);
5238
5239     /* Activate the destination context, set it up for blitting */
5240     context = context_acquire(device, dst_surface);
5241     context_apply_blit_state(context, device);
5242
5243     if (!surface_is_offscreen(dst_surface))
5244         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5245
5246     device->blitter->set_shader(device->blit_priv, context, src_surface);
5247
5248     ENTER_GL();
5249
5250     if (color_key)
5251     {
5252         glEnable(GL_ALPHA_TEST);
5253         checkGLcall("glEnable(GL_ALPHA_TEST)");
5254
5255         /* When the primary render target uses P8, the alpha component
5256          * contains the palette index. Which means that the colorkey is one of
5257          * the palette entries. In other cases pixels that should be masked
5258          * away have alpha set to 0. */
5259         if (primary_render_target_is_p8(device))
5260             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5261         else
5262             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5263         checkGLcall("glAlphaFunc");
5264     }
5265     else
5266     {
5267         glDisable(GL_ALPHA_TEST);
5268         checkGLcall("glDisable(GL_ALPHA_TEST)");
5269     }
5270
5271     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5272
5273     if (color_key)
5274     {
5275         glDisable(GL_ALPHA_TEST);
5276         checkGLcall("glDisable(GL_ALPHA_TEST)");
5277     }
5278
5279     LEAVE_GL();
5280
5281     /* Leave the opengl state valid for blitting */
5282     device->blitter->unset_shader(context->gl_info);
5283
5284     if (wined3d_settings.strict_draw_ordering
5285             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5286             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5287         wglFlush(); /* Flush to ensure ordering across contexts. */
5288
5289     context_release(context);
5290 }
5291
5292 /* Do not call while under the GL lock. */
5293 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5294 {
5295     struct wined3d_device *device = s->resource.device;
5296     const struct blit_shader *blitter;
5297
5298     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5299             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5300     if (!blitter)
5301     {
5302         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5303         return WINED3DERR_INVALIDCALL;
5304     }
5305
5306     return blitter->color_fill(device, s, rect, color);
5307 }
5308
5309 /* Do not call while under the GL lock. */
5310 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5311         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5312         WINED3DTEXTUREFILTERTYPE Filter)
5313 {
5314     struct wined3d_device *device = dst_surface->resource.device;
5315     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5316     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5317
5318     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5319             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5320             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5321
5322     /* Get the swapchain. One of the surfaces has to be a primary surface */
5323     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5324     {
5325         WARN("Destination is in sysmem, rejecting gl blt\n");
5326         return WINED3DERR_INVALIDCALL;
5327     }
5328
5329     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5330         dstSwapchain = dst_surface->container.u.swapchain;
5331
5332     if (src_surface)
5333     {
5334         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5335         {
5336             WARN("Src is in sysmem, rejecting gl blt\n");
5337             return WINED3DERR_INVALIDCALL;
5338         }
5339
5340         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5341             srcSwapchain = src_surface->container.u.swapchain;
5342     }
5343
5344     /* Early sort out of cases where no render target is used */
5345     if (!dstSwapchain && !srcSwapchain
5346             && src_surface != device->fb.render_targets[0]
5347             && dst_surface != device->fb.render_targets[0])
5348     {
5349         TRACE("No surface is render target, not using hardware blit.\n");
5350         return WINED3DERR_INVALIDCALL;
5351     }
5352
5353     /* No destination color keying supported */
5354     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5355     {
5356         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5357         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5358         return WINED3DERR_INVALIDCALL;
5359     }
5360
5361     if (dstSwapchain && dstSwapchain == srcSwapchain)
5362     {
5363         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5364         return WINED3DERR_INVALIDCALL;
5365     }
5366
5367     if (dstSwapchain && srcSwapchain)
5368     {
5369         FIXME("Implement hardware blit between two different swapchains\n");
5370         return WINED3DERR_INVALIDCALL;
5371     }
5372
5373     if (dstSwapchain)
5374     {
5375         /* Handled with regular texture -> swapchain blit */
5376         if (src_surface == device->fb.render_targets[0])
5377             TRACE("Blit from active render target to a swapchain\n");
5378     }
5379     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5380     {
5381         FIXME("Implement blit from a swapchain to the active render target\n");
5382         return WINED3DERR_INVALIDCALL;
5383     }
5384
5385     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5386     {
5387         /* Blit from render target to texture */
5388         BOOL stretchx;
5389
5390         /* P8 read back is not implemented */
5391         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5392                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5393         {
5394             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5395             return WINED3DERR_INVALIDCALL;
5396         }
5397
5398         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5399         {
5400             TRACE("Color keying not supported by frame buffer to texture blit\n");
5401             return WINED3DERR_INVALIDCALL;
5402             /* Destination color key is checked above */
5403         }
5404
5405         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5406             stretchx = TRUE;
5407         else
5408             stretchx = FALSE;
5409
5410         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5411          * flip the image nor scale it.
5412          *
5413          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5414          * -> If the app wants a image width an unscaled width, copy it line per line
5415          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5416          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5417          *    back buffer. This is slower than reading line per line, thus not used for flipping
5418          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5419          *    pixel by pixel. */
5420         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5421                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5422         {
5423             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5424             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5425         } else {
5426             TRACE("Using hardware stretching to flip / stretch the texture\n");
5427             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5428         }
5429
5430         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5431         {
5432             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5433             dst_surface->resource.allocatedMemory = NULL;
5434             dst_surface->resource.heapMemory = NULL;
5435         }
5436         else
5437         {
5438             dst_surface->flags &= ~SFLAG_INSYSMEM;
5439         }
5440
5441         return WINED3D_OK;
5442     }
5443     else if (src_surface)
5444     {
5445         /* Blit from offscreen surface to render target */
5446         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5447         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5448
5449         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5450
5451         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5452                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5453                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5454         {
5455             FIXME("Unsupported blit operation falling back to software\n");
5456             return WINED3DERR_INVALIDCALL;
5457         }
5458
5459         /* Color keying: Check if we have to do a color keyed blt,
5460          * and if not check if a color key is activated.
5461          *
5462          * Just modify the color keying parameters in the surface and restore them afterwards
5463          * The surface keeps track of the color key last used to load the opengl surface.
5464          * PreLoad will catch the change to the flags and color key and reload if necessary.
5465          */
5466         if (flags & WINEDDBLT_KEYSRC)
5467         {
5468             /* Use color key from surface */
5469         }
5470         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5471         {
5472             /* Use color key from DDBltFx */
5473             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5474             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5475         }
5476         else
5477         {
5478             /* Do not use color key */
5479             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5480         }
5481
5482         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5483                 src_surface, src_rect, dst_surface, dst_rect);
5484
5485         /* Restore the color key parameters */
5486         src_surface->CKeyFlags = oldCKeyFlags;
5487         src_surface->SrcBltCKey = oldBltCKey;
5488
5489         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5490
5491         return WINED3D_OK;
5492     }
5493
5494     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5495     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5496     return WINED3DERR_INVALIDCALL;
5497 }
5498
5499 /* GL locking is done by the caller */
5500 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5501         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5502 {
5503     struct wined3d_device *device = surface->resource.device;
5504     const struct wined3d_gl_info *gl_info = context->gl_info;
5505     GLint compare_mode = GL_NONE;
5506     struct blt_info info;
5507     GLint old_binding = 0;
5508     RECT rect;
5509
5510     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5511
5512     glDisable(GL_CULL_FACE);
5513     glDisable(GL_BLEND);
5514     glDisable(GL_ALPHA_TEST);
5515     glDisable(GL_SCISSOR_TEST);
5516     glDisable(GL_STENCIL_TEST);
5517     glEnable(GL_DEPTH_TEST);
5518     glDepthFunc(GL_ALWAYS);
5519     glDepthMask(GL_TRUE);
5520     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5521     glViewport(x, y, w, h);
5522
5523     SetRect(&rect, 0, h, w, 0);
5524     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5525     context_active_texture(context, context->gl_info, 0);
5526     glGetIntegerv(info.binding, &old_binding);
5527     glBindTexture(info.bind_target, texture);
5528     if (gl_info->supported[ARB_SHADOW])
5529     {
5530         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5531         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5532     }
5533
5534     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5535             gl_info, info.tex_type, &surface->ds_current_size);
5536
5537     glBegin(GL_TRIANGLE_STRIP);
5538     glTexCoord3fv(info.coords[0]);
5539     glVertex2f(-1.0f, -1.0f);
5540     glTexCoord3fv(info.coords[1]);
5541     glVertex2f(1.0f, -1.0f);
5542     glTexCoord3fv(info.coords[2]);
5543     glVertex2f(-1.0f, 1.0f);
5544     glTexCoord3fv(info.coords[3]);
5545     glVertex2f(1.0f, 1.0f);
5546     glEnd();
5547
5548     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5549     glBindTexture(info.bind_target, old_binding);
5550
5551     glPopAttrib();
5552
5553     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5554 }
5555
5556 void surface_modify_ds_location(struct wined3d_surface *surface,
5557         DWORD location, UINT w, UINT h)
5558 {
5559     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5560
5561     if (location & ~SFLAG_DS_LOCATIONS)
5562         FIXME("Invalid location (%#x) specified.\n", location);
5563
5564     surface->ds_current_size.cx = w;
5565     surface->ds_current_size.cy = h;
5566     surface->flags &= ~SFLAG_DS_LOCATIONS;
5567     surface->flags |= location;
5568 }
5569
5570 /* Context activation is done by the caller. */
5571 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5572 {
5573     struct wined3d_device *device = surface->resource.device;
5574     GLsizei w, h;
5575
5576     TRACE("surface %p, new location %#x.\n", surface, location);
5577
5578     /* TODO: Make this work for modes other than FBO */
5579     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5580
5581     if (!(surface->flags & location))
5582     {
5583         w = surface->ds_current_size.cx;
5584         h = surface->ds_current_size.cy;
5585         surface->ds_current_size.cx = 0;
5586         surface->ds_current_size.cy = 0;
5587     }
5588     else
5589     {
5590         w = surface->resource.width;
5591         h = surface->resource.height;
5592     }
5593
5594     if (surface->ds_current_size.cx == surface->resource.width
5595             && surface->ds_current_size.cy == surface->resource.height)
5596     {
5597         TRACE("Location (%#x) is already up to date.\n", location);
5598         return;
5599     }
5600
5601     if (surface->current_renderbuffer)
5602     {
5603         FIXME("Not supported with fixed up depth stencil.\n");
5604         return;
5605     }
5606
5607     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5608     {
5609         /* This mostly happens when a depth / stencil is used without being
5610          * cleared first. In principle we could upload from sysmem, or
5611          * explicitly clear before first usage. For the moment there don't
5612          * appear to be a lot of applications depending on this, so a FIXME
5613          * should do. */
5614         FIXME("No up to date depth stencil location.\n");
5615         surface->flags |= location;
5616         surface->ds_current_size.cx = surface->resource.width;
5617         surface->ds_current_size.cy = surface->resource.height;
5618         return;
5619     }
5620
5621     if (location == SFLAG_DS_OFFSCREEN)
5622     {
5623         GLint old_binding = 0;
5624         GLenum bind_target;
5625
5626         /* The render target is allowed to be smaller than the depth/stencil
5627          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5628          * than the offscreen surface. Don't overwrite the offscreen surface
5629          * with undefined data. */
5630         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5631         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5632
5633         TRACE("Copying onscreen depth buffer to depth texture.\n");
5634
5635         ENTER_GL();
5636
5637         if (!device->depth_blt_texture)
5638         {
5639             glGenTextures(1, &device->depth_blt_texture);
5640         }
5641
5642         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5643          * directly on the FBO texture. That's because we need to flip. */
5644         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5645                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5646         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5647         {
5648             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5649             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5650         }
5651         else
5652         {
5653             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5654             bind_target = GL_TEXTURE_2D;
5655         }
5656         glBindTexture(bind_target, device->depth_blt_texture);
5657         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5658          * internal format, because the internal format might include stencil
5659          * data. In principle we should copy stencil data as well, but unless
5660          * the driver supports stencil export it's hard to do, and doesn't
5661          * seem to be needed in practice. If the hardware doesn't support
5662          * writing stencil data, the glCopyTexImage2D() call might trigger
5663          * software fallbacks. */
5664         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5665         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5666         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5667         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5668         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5669         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5670         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5671         glBindTexture(bind_target, old_binding);
5672
5673         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5674                 NULL, surface, SFLAG_INTEXTURE);
5675         context_set_draw_buffer(context, GL_NONE);
5676         glReadBuffer(GL_NONE);
5677
5678         /* Do the actual blit */
5679         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5680         checkGLcall("depth_blt");
5681
5682         context_invalidate_state(context, STATE_FRAMEBUFFER);
5683
5684         LEAVE_GL();
5685
5686         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5687     }
5688     else if (location == SFLAG_DS_ONSCREEN)
5689     {
5690         TRACE("Copying depth texture to onscreen depth buffer.\n");
5691
5692         ENTER_GL();
5693
5694         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5695                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5696         surface_depth_blt(surface, context, surface->texture_name,
5697                 0, surface->pow2Height - h, w, h, surface->texture_target);
5698         checkGLcall("depth_blt");
5699
5700         context_invalidate_state(context, STATE_FRAMEBUFFER);
5701
5702         LEAVE_GL();
5703
5704         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5705     }
5706     else
5707     {
5708         ERR("Invalid location (%#x) specified.\n", location);
5709     }
5710
5711     surface->flags |= location;
5712     surface->ds_current_size.cx = surface->resource.width;
5713     surface->ds_current_size.cy = surface->resource.height;
5714 }
5715
5716 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5717 {
5718     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5719     struct wined3d_surface *overlay;
5720
5721     TRACE("surface %p, location %s, persistent %#x.\n",
5722             surface, debug_surflocation(location), persistent);
5723
5724     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5725             && (location & SFLAG_INDRAWABLE))
5726         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5727
5728     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5729             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5730         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5731
5732     if (persistent)
5733     {
5734         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5735                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5736         {
5737             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5738             {
5739                 TRACE("Passing to container.\n");
5740                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5741             }
5742         }
5743         surface->flags &= ~SFLAG_LOCATIONS;
5744         surface->flags |= location;
5745
5746         /* Redraw emulated overlays, if any */
5747         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5748         {
5749             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5750             {
5751                 surface_draw_overlay(overlay);
5752             }
5753         }
5754     }
5755     else
5756     {
5757         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5758         {
5759             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5760             {
5761                 TRACE("Passing to container\n");
5762                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5763             }
5764         }
5765         surface->flags &= ~location;
5766     }
5767
5768     if (!(surface->flags & SFLAG_LOCATIONS))
5769     {
5770         ERR("Surface %p does not have any up to date location.\n", surface);
5771     }
5772 }
5773
5774 static DWORD resource_access_from_location(DWORD location)
5775 {
5776     switch (location)
5777     {
5778         case SFLAG_INSYSMEM:
5779             return WINED3D_RESOURCE_ACCESS_CPU;
5780
5781         case SFLAG_INDRAWABLE:
5782         case SFLAG_INSRGBTEX:
5783         case SFLAG_INTEXTURE:
5784         case SFLAG_INRB_MULTISAMPLE:
5785         case SFLAG_INRB_RESOLVED:
5786             return WINED3D_RESOURCE_ACCESS_GPU;
5787
5788         default:
5789             FIXME("Unhandled location %#x.\n", location);
5790             return 0;
5791     }
5792 }
5793
5794 static void surface_load_sysmem(struct wined3d_surface *surface,
5795         const struct wined3d_gl_info *gl_info, const RECT *rect)
5796 {
5797     surface_prepare_system_memory(surface);
5798
5799     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5800         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5801
5802     /* Download the surface to system memory. */
5803     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5804     {
5805         struct wined3d_device *device = surface->resource.device;
5806         struct wined3d_context *context;
5807
5808         /* TODO: Use already acquired context when possible. */
5809         context = context_acquire(device, NULL);
5810
5811         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5812         surface_download_data(surface, gl_info);
5813
5814         context_release(context);
5815
5816         return;
5817     }
5818
5819     if (surface->flags & SFLAG_INDRAWABLE)
5820     {
5821         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5822                 wined3d_surface_get_pitch(surface));
5823         return;
5824     }
5825
5826     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5827             surface, surface->flags & SFLAG_LOCATIONS);
5828 }
5829
5830 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5831         const struct wined3d_gl_info *gl_info, const RECT *rect)
5832 {
5833     struct wined3d_device *device = surface->resource.device;
5834     struct wined3d_format format;
5835     CONVERT_TYPES convert;
5836     UINT byte_count;
5837     BYTE *mem;
5838
5839     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5840     {
5841         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5842         return WINED3DERR_INVALIDCALL;
5843     }
5844
5845     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5846         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5847
5848     if (surface->flags & SFLAG_INTEXTURE)
5849     {
5850         RECT r;
5851
5852         surface_get_rect(surface, rect, &r);
5853         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5854
5855         return WINED3D_OK;
5856     }
5857
5858     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5859     {
5860         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5861          * path through sysmem. */
5862         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5863     }
5864
5865     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5866
5867     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5868      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5869      * called. */
5870     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5871     {
5872         struct wined3d_context *context;
5873
5874         TRACE("Removing the pbo attached to surface %p.\n", surface);
5875
5876         /* TODO: Use already acquired context when possible. */
5877         context = context_acquire(device, NULL);
5878
5879         surface_remove_pbo(surface, gl_info);
5880
5881         context_release(context);
5882     }
5883
5884     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5885     {
5886         UINT height = surface->resource.height;
5887         UINT width = surface->resource.width;
5888         UINT src_pitch, dst_pitch;
5889
5890         byte_count = format.conv_byte_count;
5891         src_pitch = wined3d_surface_get_pitch(surface);
5892
5893         /* Stick to the alignment for the converted surface too, makes it
5894          * easier to load the surface. */
5895         dst_pitch = width * byte_count;
5896         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5897
5898         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5899         {
5900             ERR("Out of memory (%u).\n", dst_pitch * height);
5901             return E_OUTOFMEMORY;
5902         }
5903
5904         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5905                 src_pitch, width, height, dst_pitch, convert, surface);
5906
5907         surface->flags |= SFLAG_CONVERTED;
5908     }
5909     else
5910     {
5911         surface->flags &= ~SFLAG_CONVERTED;
5912         mem = surface->resource.allocatedMemory;
5913         byte_count = format.byte_count;
5914     }
5915
5916     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5917
5918     /* Don't delete PBO memory. */
5919     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5920         HeapFree(GetProcessHeap(), 0, mem);
5921
5922     return WINED3D_OK;
5923 }
5924
5925 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5926         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5927 {
5928     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5929     struct wined3d_device *device = surface->resource.device;
5930     struct wined3d_context *context;
5931     UINT width, src_pitch, dst_pitch;
5932     struct wined3d_bo_address data;
5933     struct wined3d_format format;
5934     POINT dst_point = {0, 0};
5935     CONVERT_TYPES convert;
5936     BYTE *mem;
5937
5938     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5939             && surface_is_offscreen(surface)
5940             && (surface->flags & SFLAG_INDRAWABLE))
5941     {
5942         surface_load_fb_texture(surface, srgb);
5943
5944         return WINED3D_OK;
5945     }
5946
5947     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5948             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5949             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5950                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5951                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5952     {
5953         if (srgb)
5954             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5955                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5956         else
5957             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5958                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5959
5960         return WINED3D_OK;
5961     }
5962
5963     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
5964             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
5965             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5966                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5967                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5968     {
5969         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
5970         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
5971         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
5972
5973         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
5974                 &rect, surface, dst_location, &rect);
5975
5976         return WINED3D_OK;
5977     }
5978
5979     /* Upload from system memory */
5980
5981     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
5982             TRUE /* We will use textures */, &format, &convert);
5983
5984     if (srgb)
5985     {
5986         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
5987         {
5988             /* Performance warning... */
5989             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
5990             surface_load_location(surface, SFLAG_INSYSMEM, rect);
5991         }
5992     }
5993     else
5994     {
5995         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
5996         {
5997             /* Performance warning... */
5998             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
5999             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6000         }
6001     }
6002
6003     if (!(surface->flags & SFLAG_INSYSMEM))
6004     {
6005         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6006         /* Lets hope we get it from somewhere... */
6007         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6008     }
6009
6010     /* TODO: Use already acquired context when possible. */
6011     context = context_acquire(device, NULL);
6012
6013     surface_prepare_texture(surface, context, srgb);
6014     surface_bind_and_dirtify(surface, context, srgb);
6015
6016     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6017     {
6018         surface->flags |= SFLAG_GLCKEY;
6019         surface->glCKey = surface->SrcBltCKey;
6020     }
6021     else surface->flags &= ~SFLAG_GLCKEY;
6022
6023     width = surface->resource.width;
6024     src_pitch = wined3d_surface_get_pitch(surface);
6025
6026     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6027      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6028      * called. */
6029     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6030     {
6031         TRACE("Removing the pbo attached to surface %p.\n", surface);
6032         surface_remove_pbo(surface, gl_info);
6033     }
6034
6035     if (format.convert)
6036     {
6037         /* This code is entered for texture formats which need a fixup. */
6038         UINT height = surface->resource.height;
6039
6040         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6041         dst_pitch = width * format.conv_byte_count;
6042         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6043
6044         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6045         {
6046             ERR("Out of memory (%u).\n", dst_pitch * height);
6047             context_release(context);
6048             return E_OUTOFMEMORY;
6049         }
6050         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6051     }
6052     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6053     {
6054         /* This code is only entered for color keying fixups */
6055         UINT height = surface->resource.height;
6056
6057         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6058         dst_pitch = width * format.conv_byte_count;
6059         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6060
6061         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6062         {
6063             ERR("Out of memory (%u).\n", dst_pitch * height);
6064             context_release(context);
6065             return E_OUTOFMEMORY;
6066         }
6067         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6068                 width, height, dst_pitch, convert, surface);
6069     }
6070     else
6071     {
6072         mem = surface->resource.allocatedMemory;
6073     }
6074
6075     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6076     data.addr = mem;
6077     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6078
6079     context_release(context);
6080
6081     /* Don't delete PBO memory. */
6082     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6083         HeapFree(GetProcessHeap(), 0, mem);
6084
6085     return WINED3D_OK;
6086 }
6087
6088 static void surface_multisample_resolve(struct wined3d_surface *surface)
6089 {
6090     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6091
6092     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6093         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6094
6095     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6096             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6097 }
6098
6099 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6100 {
6101     struct wined3d_device *device = surface->resource.device;
6102     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6103     HRESULT hr;
6104
6105     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6106
6107     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6108     {
6109         if (location == SFLAG_INTEXTURE)
6110         {
6111             struct wined3d_context *context = context_acquire(device, NULL);
6112             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6113             context_release(context);
6114             return WINED3D_OK;
6115         }
6116         else
6117         {
6118             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6119             return WINED3DERR_INVALIDCALL;
6120         }
6121     }
6122
6123     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6124         location = SFLAG_INTEXTURE;
6125
6126     if (surface->flags & location)
6127     {
6128         TRACE("Location already up to date.\n");
6129         return WINED3D_OK;
6130     }
6131
6132     if (WARN_ON(d3d_surface))
6133     {
6134         DWORD required_access = resource_access_from_location(location);
6135         if ((surface->resource.access_flags & required_access) != required_access)
6136             WARN("Operation requires %#x access, but surface only has %#x.\n",
6137                     required_access, surface->resource.access_flags);
6138     }
6139
6140     if (!(surface->flags & SFLAG_LOCATIONS))
6141     {
6142         ERR("Surface %p does not have any up to date location.\n", surface);
6143         surface->flags |= SFLAG_LOST;
6144         return WINED3DERR_DEVICELOST;
6145     }
6146
6147     switch (location)
6148     {
6149         case SFLAG_INSYSMEM:
6150             surface_load_sysmem(surface, gl_info, rect);
6151             break;
6152
6153         case SFLAG_INDRAWABLE:
6154             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6155                 return hr;
6156             break;
6157
6158         case SFLAG_INRB_RESOLVED:
6159             surface_multisample_resolve(surface);
6160             break;
6161
6162         case SFLAG_INTEXTURE:
6163         case SFLAG_INSRGBTEX:
6164             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6165                 return hr;
6166             break;
6167
6168         default:
6169             ERR("Don't know how to handle location %#x.\n", location);
6170             break;
6171     }
6172
6173     if (!rect)
6174     {
6175         surface->flags |= location;
6176
6177         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6178             surface_evict_sysmem(surface);
6179     }
6180
6181     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6182             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6183     {
6184         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6185     }
6186
6187     return WINED3D_OK;
6188 }
6189
6190 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6191 {
6192     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6193
6194     /* Not on a swapchain - must be offscreen */
6195     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6196
6197     /* The front buffer is always onscreen */
6198     if (surface == swapchain->front_buffer) return FALSE;
6199
6200     /* If the swapchain is rendered to an FBO, the backbuffer is
6201      * offscreen, otherwise onscreen */
6202     return swapchain->render_to_fbo;
6203 }
6204
6205 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6206 /* Context activation is done by the caller. */
6207 static void ffp_blit_free(struct wined3d_device *device) { }
6208
6209 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6210 /* Context activation is done by the caller. */
6211 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6212 {
6213     BYTE table[256][4];
6214     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6215
6216     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6217
6218     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6219     ENTER_GL();
6220     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6221     LEAVE_GL();
6222 }
6223
6224 /* Context activation is done by the caller. */
6225 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6226 {
6227     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6228
6229     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6230      * else the surface is converted in software at upload time in LoadLocation.
6231      */
6232     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6233         ffp_blit_p8_upload_palette(surface, context->gl_info);
6234
6235     ENTER_GL();
6236     glEnable(surface->texture_target);
6237     checkGLcall("glEnable(surface->texture_target)");
6238     LEAVE_GL();
6239     return WINED3D_OK;
6240 }
6241
6242 /* Context activation is done by the caller. */
6243 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6244 {
6245     ENTER_GL();
6246     glDisable(GL_TEXTURE_2D);
6247     checkGLcall("glDisable(GL_TEXTURE_2D)");
6248     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6249     {
6250         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6251         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6252     }
6253     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6254     {
6255         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6256         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6257     }
6258     LEAVE_GL();
6259 }
6260
6261 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6262         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6263         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6264 {
6265     enum complex_fixup src_fixup;
6266
6267     switch (blit_op)
6268     {
6269         case WINED3D_BLIT_OP_COLOR_BLIT:
6270             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6271                 return FALSE;
6272
6273             src_fixup = get_complex_fixup(src_format->color_fixup);
6274             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6275             {
6276                 TRACE("Checking support for fixup:\n");
6277                 dump_color_fixup_desc(src_format->color_fixup);
6278             }
6279
6280             if (!is_identity_fixup(dst_format->color_fixup))
6281             {
6282                 TRACE("Destination fixups are not supported\n");
6283                 return FALSE;
6284             }
6285
6286             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6287             {
6288                 TRACE("P8 fixup supported\n");
6289                 return TRUE;
6290             }
6291
6292             /* We only support identity conversions. */
6293             if (is_identity_fixup(src_format->color_fixup))
6294             {
6295                 TRACE("[OK]\n");
6296                 return TRUE;
6297             }
6298
6299             TRACE("[FAILED]\n");
6300             return FALSE;
6301
6302         case WINED3D_BLIT_OP_COLOR_FILL:
6303             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6304                 return FALSE;
6305
6306             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6307             {
6308                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6309                     return FALSE;
6310             }
6311             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6312             {
6313                 TRACE("Color fill not supported\n");
6314                 return FALSE;
6315             }
6316
6317             /* FIXME: We should reject color fills on formats with fixups,
6318              * but this would break P8 color fills for example. */
6319
6320             return TRUE;
6321
6322         case WINED3D_BLIT_OP_DEPTH_FILL:
6323             return TRUE;
6324
6325         default:
6326             TRACE("Unsupported blit_op=%d\n", blit_op);
6327             return FALSE;
6328     }
6329 }
6330
6331 /* Do not call while under the GL lock. */
6332 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6333         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6334 {
6335     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6336     struct wined3d_fb_state fb = {&dst_surface, NULL};
6337
6338     return device_clear_render_targets(device, 1, &fb,
6339             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6340 }
6341
6342 /* Do not call while under the GL lock. */
6343 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6344         struct wined3d_surface *surface, const RECT *rect, float depth)
6345 {
6346     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6347     struct wined3d_fb_state fb = {NULL, surface};
6348
6349     return device_clear_render_targets(device, 0, &fb,
6350             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6351 }
6352
6353 const struct blit_shader ffp_blit =  {
6354     ffp_blit_alloc,
6355     ffp_blit_free,
6356     ffp_blit_set,
6357     ffp_blit_unset,
6358     ffp_blit_supported,
6359     ffp_blit_color_fill,
6360     ffp_blit_depth_fill,
6361 };
6362
6363 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6364 {
6365     return WINED3D_OK;
6366 }
6367
6368 /* Context activation is done by the caller. */
6369 static void cpu_blit_free(struct wined3d_device *device)
6370 {
6371 }
6372
6373 /* Context activation is done by the caller. */
6374 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6375 {
6376     return WINED3D_OK;
6377 }
6378
6379 /* Context activation is done by the caller. */
6380 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6381 {
6382 }
6383
6384 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6385         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6386         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6387 {
6388     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6389     {
6390         return TRUE;
6391     }
6392
6393     return FALSE;
6394 }
6395
6396 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6397         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6398         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6399 {
6400     UINT row_block_count;
6401     const BYTE *src_row;
6402     BYTE *dst_row;
6403     UINT x, y;
6404
6405     src_row = src_data;
6406     dst_row = dst_data;
6407
6408     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6409
6410     if (!flags)
6411     {
6412         for (y = 0; y < update_h; y += format->block_height)
6413         {
6414             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6415             src_row += src_pitch;
6416             dst_row += dst_pitch;
6417         }
6418
6419         return WINED3D_OK;
6420     }
6421
6422     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6423     {
6424         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6425
6426         switch (format->id)
6427         {
6428             case WINED3DFMT_DXT1:
6429                 for (y = 0; y < update_h; y += format->block_height)
6430                 {
6431                     struct block
6432                     {
6433                         WORD color[2];
6434                         BYTE control_row[4];
6435                     };
6436
6437                     const struct block *s = (const struct block *)src_row;
6438                     struct block *d = (struct block *)dst_row;
6439
6440                     for (x = 0; x < row_block_count; ++x)
6441                     {
6442                         d[x].color[0] = s[x].color[0];
6443                         d[x].color[1] = s[x].color[1];
6444                         d[x].control_row[0] = s[x].control_row[3];
6445                         d[x].control_row[1] = s[x].control_row[2];
6446                         d[x].control_row[2] = s[x].control_row[1];
6447                         d[x].control_row[3] = s[x].control_row[0];
6448                     }
6449                     src_row -= src_pitch;
6450                     dst_row += dst_pitch;
6451                 }
6452                 return WINED3D_OK;
6453
6454             case WINED3DFMT_DXT3:
6455                 for (y = 0; y < update_h; y += format->block_height)
6456                 {
6457                     struct block
6458                     {
6459                         WORD alpha_row[4];
6460                         WORD color[2];
6461                         BYTE control_row[4];
6462                     };
6463
6464                     const struct block *s = (const struct block *)src_row;
6465                     struct block *d = (struct block *)dst_row;
6466
6467                     for (x = 0; x < row_block_count; ++x)
6468                     {
6469                         d[x].alpha_row[0] = s[x].alpha_row[3];
6470                         d[x].alpha_row[1] = s[x].alpha_row[2];
6471                         d[x].alpha_row[2] = s[x].alpha_row[1];
6472                         d[x].alpha_row[3] = s[x].alpha_row[0];
6473                         d[x].color[0] = s[x].color[0];
6474                         d[x].color[1] = s[x].color[1];
6475                         d[x].control_row[0] = s[x].control_row[3];
6476                         d[x].control_row[1] = s[x].control_row[2];
6477                         d[x].control_row[2] = s[x].control_row[1];
6478                         d[x].control_row[3] = s[x].control_row[0];
6479                     }
6480                     src_row -= src_pitch;
6481                     dst_row += dst_pitch;
6482                 }
6483                 return WINED3D_OK;
6484
6485             default:
6486                 FIXME("Compressed flip not implemented for format %s.\n",
6487                         debug_d3dformat(format->id));
6488                 return E_NOTIMPL;
6489         }
6490     }
6491
6492     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6493             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6494
6495     return E_NOTIMPL;
6496 }
6497
6498 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6499         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6500         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6501 {
6502     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6503     const struct wined3d_format *src_format, *dst_format;
6504     struct wined3d_surface *orig_src = src_surface;
6505     WINED3DLOCKED_RECT dlock, slock;
6506     HRESULT hr = WINED3D_OK;
6507     const BYTE *sbuf;
6508     RECT xdst,xsrc;
6509     BYTE *dbuf;
6510     int x, y;
6511
6512     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6513             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6514             flags, fx, debug_d3dtexturefiltertype(filter));
6515
6516     xsrc = *src_rect;
6517
6518     if (!src_surface)
6519     {
6520         RECT full_rect;
6521
6522         full_rect.left = 0;
6523         full_rect.top = 0;
6524         full_rect.right = dst_surface->resource.width;
6525         full_rect.bottom = dst_surface->resource.height;
6526         IntersectRect(&xdst, &full_rect, dst_rect);
6527     }
6528     else
6529     {
6530         BOOL clip_horiz, clip_vert;
6531
6532         xdst = *dst_rect;
6533         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6534         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6535
6536         if (clip_vert || clip_horiz)
6537         {
6538             /* Now check if this is a special case or not... */
6539             if ((flags & WINEDDBLT_DDFX)
6540                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6541                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6542             {
6543                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6544                 return WINED3D_OK;
6545             }
6546
6547             if (clip_horiz)
6548             {
6549                 if (xdst.left < 0)
6550                 {
6551                     xsrc.left -= xdst.left;
6552                     xdst.left = 0;
6553                 }
6554                 if (xdst.right > dst_surface->resource.width)
6555                 {
6556                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6557                     xdst.right = (int)dst_surface->resource.width;
6558                 }
6559             }
6560
6561             if (clip_vert)
6562             {
6563                 if (xdst.top < 0)
6564                 {
6565                     xsrc.top -= xdst.top;
6566                     xdst.top = 0;
6567                 }
6568                 if (xdst.bottom > dst_surface->resource.height)
6569                 {
6570                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6571                     xdst.bottom = (int)dst_surface->resource.height;
6572                 }
6573             }
6574
6575             /* And check if after clipping something is still to be done... */
6576             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6577                     || (xdst.left >= (int)dst_surface->resource.width)
6578                     || (xdst.top >= (int)dst_surface->resource.height)
6579                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6580                     || (xsrc.left >= (int)src_surface->resource.width)
6581                     || (xsrc.top >= (int)src_surface->resource.height))
6582             {
6583                 TRACE("Nothing to be done after clipping.\n");
6584                 return WINED3D_OK;
6585             }
6586         }
6587     }
6588
6589     if (src_surface == dst_surface)
6590     {
6591         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6592         slock = dlock;
6593         src_format = dst_surface->resource.format;
6594         dst_format = src_format;
6595     }
6596     else
6597     {
6598         dst_format = dst_surface->resource.format;
6599         if (src_surface)
6600         {
6601             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6602             {
6603                 src_surface = surface_convert_format(src_surface, dst_format->id);
6604                 if (!src_surface)
6605                 {
6606                     /* The conv function writes a FIXME */
6607                     WARN("Cannot convert source surface format to dest format.\n");
6608                     goto release;
6609                 }
6610             }
6611             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6612             src_format = src_surface->resource.format;
6613         }
6614         else
6615         {
6616             src_format = dst_format;
6617         }
6618         if (dst_rect)
6619             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6620         else
6621             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6622     }
6623
6624     bpp = dst_surface->resource.format->byte_count;
6625     srcheight = xsrc.bottom - xsrc.top;
6626     srcwidth = xsrc.right - xsrc.left;
6627     dstheight = xdst.bottom - xdst.top;
6628     dstwidth = xdst.right - xdst.left;
6629     width = (xdst.right - xdst.left) * bpp;
6630
6631     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_BLOCKS)
6632     {
6633         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6634
6635         if (src_surface == dst_surface)
6636         {
6637             FIXME("Only plain blits supported on compressed surfaces.\n");
6638             hr = E_NOTIMPL;
6639             goto release;
6640         }
6641
6642         if (srcheight != dstheight || srcwidth != dstwidth)
6643         {
6644             WARN("Stretching not supported on compressed surfaces.\n");
6645             hr = WINED3DERR_INVALIDCALL;
6646             goto release;
6647         }
6648
6649         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6650         {
6651             WARN("Rectangle not block-aligned.\n");
6652             hr = WINED3DERR_INVALIDCALL;
6653             goto release;
6654         }
6655
6656         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6657                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6658                 src_format, flags, fx);
6659         goto release;
6660     }
6661
6662     if (dst_rect && src_surface != dst_surface)
6663         dbuf = dlock.pBits;
6664     else
6665         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6666
6667     /* First, all the 'source-less' blits */
6668     if (flags & WINEDDBLT_COLORFILL)
6669     {
6670         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6671         flags &= ~WINEDDBLT_COLORFILL;
6672     }
6673
6674     if (flags & WINEDDBLT_DEPTHFILL)
6675     {
6676         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6677     }
6678     if (flags & WINEDDBLT_ROP)
6679     {
6680         /* Catch some degenerate cases here. */
6681         switch (fx->dwROP)
6682         {
6683             case BLACKNESS:
6684                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6685                 break;
6686             case 0xAA0029: /* No-op */
6687                 break;
6688             case WHITENESS:
6689                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6690                 break;
6691             case SRCCOPY: /* Well, we do that below? */
6692                 break;
6693             default:
6694                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6695                 goto error;
6696         }
6697         flags &= ~WINEDDBLT_ROP;
6698     }
6699     if (flags & WINEDDBLT_DDROPS)
6700     {
6701         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6702     }
6703     /* Now the 'with source' blits. */
6704     if (src_surface)
6705     {
6706         const BYTE *sbase;
6707         int sx, xinc, sy, yinc;
6708
6709         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6710             goto release;
6711
6712         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6713                 && (srcwidth != dstwidth || srcheight != dstheight))
6714         {
6715             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6716             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6717         }
6718
6719         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6720         xinc = (srcwidth << 16) / dstwidth;
6721         yinc = (srcheight << 16) / dstheight;
6722
6723         if (!flags)
6724         {
6725             /* No effects, we can cheat here. */
6726             if (dstwidth == srcwidth)
6727             {
6728                 if (dstheight == srcheight)
6729                 {
6730                     /* No stretching in either direction. This needs to be as
6731                      * fast as possible. */
6732                     sbuf = sbase;
6733
6734                     /* Check for overlapping surfaces. */
6735                     if (src_surface != dst_surface || xdst.top < xsrc.top
6736                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6737                     {
6738                         /* No overlap, or dst above src, so copy from top downwards. */
6739                         for (y = 0; y < dstheight; ++y)
6740                         {
6741                             memcpy(dbuf, sbuf, width);
6742                             sbuf += slock.Pitch;
6743                             dbuf += dlock.Pitch;
6744                         }
6745                     }
6746                     else if (xdst.top > xsrc.top)
6747                     {
6748                         /* Copy from bottom upwards. */
6749                         sbuf += (slock.Pitch*dstheight);
6750                         dbuf += (dlock.Pitch*dstheight);
6751                         for (y = 0; y < dstheight; ++y)
6752                         {
6753                             sbuf -= slock.Pitch;
6754                             dbuf -= dlock.Pitch;
6755                             memcpy(dbuf, sbuf, width);
6756                         }
6757                     }
6758                     else
6759                     {
6760                         /* Src and dst overlapping on the same line, use memmove. */
6761                         for (y = 0; y < dstheight; ++y)
6762                         {
6763                             memmove(dbuf, sbuf, width);
6764                             sbuf += slock.Pitch;
6765                             dbuf += dlock.Pitch;
6766                         }
6767                     }
6768                 }
6769                 else
6770                 {
6771                     /* Stretching in y direction only. */
6772                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6773                     {
6774                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6775                         memcpy(dbuf, sbuf, width);
6776                         dbuf += dlock.Pitch;
6777                     }
6778                 }
6779             }
6780             else
6781             {
6782                 /* Stretching in X direction. */
6783                 int last_sy = -1;
6784                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6785                 {
6786                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6787
6788                     if ((sy >> 16) == (last_sy >> 16))
6789                     {
6790                         /* This source row is the same as last source row -
6791                          * Copy the already stretched row. */
6792                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6793                     }
6794                     else
6795                     {
6796 #define STRETCH_ROW(type) \
6797 do { \
6798     const type *s = (const type *)sbuf; \
6799     type *d = (type *)dbuf; \
6800     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6801         d[x] = s[sx >> 16]; \
6802 } while(0)
6803
6804                         switch(bpp)
6805                         {
6806                             case 1:
6807                                 STRETCH_ROW(BYTE);
6808                                 break;
6809                             case 2:
6810                                 STRETCH_ROW(WORD);
6811                                 break;
6812                             case 4:
6813                                 STRETCH_ROW(DWORD);
6814                                 break;
6815                             case 3:
6816                             {
6817                                 const BYTE *s;
6818                                 BYTE *d = dbuf;
6819                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6820                                 {
6821                                     DWORD pixel;
6822
6823                                     s = sbuf + 3 * (sx >> 16);
6824                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6825                                     d[0] = (pixel      ) & 0xff;
6826                                     d[1] = (pixel >>  8) & 0xff;
6827                                     d[2] = (pixel >> 16) & 0xff;
6828                                     d += 3;
6829                                 }
6830                                 break;
6831                             }
6832                             default:
6833                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6834                                 hr = WINED3DERR_NOTAVAILABLE;
6835                                 goto error;
6836                         }
6837 #undef STRETCH_ROW
6838                     }
6839                     dbuf += dlock.Pitch;
6840                     last_sy = sy;
6841                 }
6842             }
6843         }
6844         else
6845         {
6846             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6847             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6848             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6849             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6850             {
6851                 /* The color keying flags are checked for correctness in ddraw */
6852                 if (flags & WINEDDBLT_KEYSRC)
6853                 {
6854                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6855                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6856                 }
6857                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6858                 {
6859                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6860                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6861                 }
6862
6863                 if (flags & WINEDDBLT_KEYDEST)
6864                 {
6865                     /* Destination color keys are taken from the source surface! */
6866                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6867                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6868                 }
6869                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6870                 {
6871                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6872                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6873                 }
6874
6875                 if (bpp == 1)
6876                 {
6877                     keymask = 0xff;
6878                 }
6879                 else
6880                 {
6881                     keymask = src_format->red_mask
6882                             | src_format->green_mask
6883                             | src_format->blue_mask;
6884                 }
6885                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6886             }
6887
6888             if (flags & WINEDDBLT_DDFX)
6889             {
6890                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6891                 LONG tmpxy;
6892                 dTopLeft     = dbuf;
6893                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6894                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6895                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6896
6897                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6898                 {
6899                     /* I don't think we need to do anything about this flag */
6900                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6901                 }
6902                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6903                 {
6904                     tmp          = dTopRight;
6905                     dTopRight    = dTopLeft;
6906                     dTopLeft     = tmp;
6907                     tmp          = dBottomRight;
6908                     dBottomRight = dBottomLeft;
6909                     dBottomLeft  = tmp;
6910                     dstxinc = dstxinc * -1;
6911                 }
6912                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6913                 {
6914                     tmp          = dTopLeft;
6915                     dTopLeft     = dBottomLeft;
6916                     dBottomLeft  = tmp;
6917                     tmp          = dTopRight;
6918                     dTopRight    = dBottomRight;
6919                     dBottomRight = tmp;
6920                     dstyinc = dstyinc * -1;
6921                 }
6922                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6923                 {
6924                     /* I don't think we need to do anything about this flag */
6925                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6926                 }
6927                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6928                 {
6929                     tmp          = dBottomRight;
6930                     dBottomRight = dTopLeft;
6931                     dTopLeft     = tmp;
6932                     tmp          = dBottomLeft;
6933                     dBottomLeft  = dTopRight;
6934                     dTopRight    = tmp;
6935                     dstxinc = dstxinc * -1;
6936                     dstyinc = dstyinc * -1;
6937                 }
6938                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6939                 {
6940                     tmp          = dTopLeft;
6941                     dTopLeft     = dBottomLeft;
6942                     dBottomLeft  = dBottomRight;
6943                     dBottomRight = dTopRight;
6944                     dTopRight    = tmp;
6945                     tmpxy   = dstxinc;
6946                     dstxinc = dstyinc;
6947                     dstyinc = tmpxy;
6948                     dstxinc = dstxinc * -1;
6949                 }
6950                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6951                 {
6952                     tmp          = dTopLeft;
6953                     dTopLeft     = dTopRight;
6954                     dTopRight    = dBottomRight;
6955                     dBottomRight = dBottomLeft;
6956                     dBottomLeft  = tmp;
6957                     tmpxy   = dstxinc;
6958                     dstxinc = dstyinc;
6959                     dstyinc = tmpxy;
6960                     dstyinc = dstyinc * -1;
6961                 }
6962                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6963                 {
6964                     /* I don't think we need to do anything about this flag */
6965                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6966                 }
6967                 dbuf = dTopLeft;
6968                 flags &= ~(WINEDDBLT_DDFX);
6969             }
6970
6971 #define COPY_COLORKEY_FX(type) \
6972 do { \
6973     const type *s; \
6974     type *d = (type *)dbuf, *dx, tmp; \
6975     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6976     { \
6977         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6978         dx = d; \
6979         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6980         { \
6981             tmp = s[sx >> 16]; \
6982             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6983                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6984             { \
6985                 dx[0] = tmp; \
6986             } \
6987             dx = (type *)(((BYTE *)dx) + dstxinc); \
6988         } \
6989         d = (type *)(((BYTE *)d) + dstyinc); \
6990     } \
6991 } while(0)
6992
6993             switch (bpp)
6994             {
6995                 case 1:
6996                     COPY_COLORKEY_FX(BYTE);
6997                     break;
6998                 case 2:
6999                     COPY_COLORKEY_FX(WORD);
7000                     break;
7001                 case 4:
7002                     COPY_COLORKEY_FX(DWORD);
7003                     break;
7004                 case 3:
7005                 {
7006                     const BYTE *s;
7007                     BYTE *d = dbuf, *dx;
7008                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7009                     {
7010                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7011                         dx = d;
7012                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7013                         {
7014                             DWORD pixel, dpixel = 0;
7015                             s = sbuf + 3 * (sx>>16);
7016                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7017                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7018                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7019                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7020                             {
7021                                 dx[0] = (pixel      ) & 0xff;
7022                                 dx[1] = (pixel >>  8) & 0xff;
7023                                 dx[2] = (pixel >> 16) & 0xff;
7024                             }
7025                             dx += dstxinc;
7026                         }
7027                         d += dstyinc;
7028                     }
7029                     break;
7030                 }
7031                 default:
7032                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7033                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7034                     hr = WINED3DERR_NOTAVAILABLE;
7035                     goto error;
7036 #undef COPY_COLORKEY_FX
7037             }
7038         }
7039     }
7040
7041 error:
7042     if (flags && FIXME_ON(d3d_surface))
7043     {
7044         FIXME("\tUnsupported flags: %#x.\n", flags);
7045     }
7046
7047 release:
7048     wined3d_surface_unmap(dst_surface);
7049     if (src_surface && src_surface != dst_surface)
7050         wined3d_surface_unmap(src_surface);
7051     /* Release the converted surface, if any. */
7052     if (src_surface && src_surface != orig_src)
7053         wined3d_surface_decref(src_surface);
7054
7055     return hr;
7056 }
7057
7058 /* Do not call while under the GL lock. */
7059 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7060         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7061 {
7062     static const RECT src_rect;
7063     WINEDDBLTFX BltFx;
7064
7065     memset(&BltFx, 0, sizeof(BltFx));
7066     BltFx.dwSize = sizeof(BltFx);
7067     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7068     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7069             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7070 }
7071
7072 /* Do not call while under the GL lock. */
7073 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7074         struct wined3d_surface *surface, const RECT *rect, float depth)
7075 {
7076     FIXME("Depth filling not implemented by cpu_blit.\n");
7077     return WINED3DERR_INVALIDCALL;
7078 }
7079
7080 const struct blit_shader cpu_blit =  {
7081     cpu_blit_alloc,
7082     cpu_blit_free,
7083     cpu_blit_set,
7084     cpu_blit_unset,
7085     cpu_blit_supported,
7086     cpu_blit_color_fill,
7087     cpu_blit_depth_fill,
7088 };
7089
7090 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7091         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7092         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7093         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7094 {
7095     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7096     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7097     unsigned int resource_size;
7098     HRESULT hr;
7099
7100     if (multisample_quality > 0)
7101     {
7102         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7103         multisample_quality = 0;
7104     }
7105
7106     /* Quick lockable sanity check.
7107      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7108      * this function is too deep to need to care about things like this.
7109      * Levels need to be checked too, since they all affect what can be done. */
7110     switch (pool)
7111     {
7112         case WINED3DPOOL_SCRATCH:
7113             if (!lockable)
7114             {
7115                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7116                         "which are mutually exclusive, setting lockable to TRUE.\n");
7117                 lockable = TRUE;
7118             }
7119             break;
7120
7121         case WINED3DPOOL_SYSTEMMEM:
7122             if (!lockable)
7123                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7124             break;
7125
7126         case WINED3DPOOL_MANAGED:
7127             if (usage & WINED3DUSAGE_DYNAMIC)
7128                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7129             break;
7130
7131         case WINED3DPOOL_DEFAULT:
7132             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7133                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7134             break;
7135
7136         default:
7137             FIXME("Unknown pool %#x.\n", pool);
7138             break;
7139     };
7140
7141     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7142         FIXME("Trying to create a render target that isn't in the default pool.\n");
7143
7144     /* FIXME: Check that the format is supported by the device. */
7145
7146     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7147     if (!resource_size)
7148         return WINED3DERR_INVALIDCALL;
7149
7150     surface->surface_type = surface_type;
7151
7152     switch (surface_type)
7153     {
7154         case SURFACE_OPENGL:
7155             surface->surface_ops = &surface_ops;
7156             break;
7157
7158         case SURFACE_GDI:
7159             surface->surface_ops = &gdi_surface_ops;
7160             break;
7161
7162         default:
7163             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7164             return WINED3DERR_INVALIDCALL;
7165     }
7166
7167     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7168             multisample_type, multisample_quality, usage, pool, width, height, 1,
7169             resource_size, parent, parent_ops, &surface_resource_ops);
7170     if (FAILED(hr))
7171     {
7172         WARN("Failed to initialize resource, returning %#x.\n", hr);
7173         return hr;
7174     }
7175
7176     /* "Standalone" surface. */
7177     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7178
7179     surface->texture_level = level;
7180     list_init(&surface->overlays);
7181
7182     /* Flags */
7183     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7184     if (discard)
7185         surface->flags |= SFLAG_DISCARD;
7186     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7187         surface->flags |= SFLAG_LOCKABLE;
7188     /* I'm not sure if this qualifies as a hack or as an optimization. It
7189      * seems reasonable to assume that lockable render targets will get
7190      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7191      * creation. However, the other reason we want to do this is that several
7192      * ddraw applications access surface memory while the surface isn't
7193      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7194      * future locks prevents these from crashing. */
7195     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7196         surface->flags |= SFLAG_DYNLOCK;
7197
7198     /* Mark the texture as dirty so that it gets loaded first time around. */
7199     surface_add_dirty_rect(surface, NULL);
7200     list_init(&surface->renderbuffers);
7201
7202     TRACE("surface %p, memory %p, size %u\n",
7203             surface, surface->resource.allocatedMemory, surface->resource.size);
7204
7205     /* Call the private setup routine */
7206     hr = surface->surface_ops->surface_private_setup(surface);
7207     if (FAILED(hr))
7208     {
7209         ERR("Private setup failed, returning %#x\n", hr);
7210         surface_cleanup(surface);
7211         return hr;
7212     }
7213
7214     /* Similar to lockable rendertargets above, creating the DIB section
7215      * during surface initialization prevents the sysmem pointer from changing
7216      * after a wined3d_surface_getdc() call. */
7217     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7218             && SUCCEEDED(surface_create_dib_section(surface)))
7219     {
7220         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7221         surface->resource.heapMemory = NULL;
7222         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7223     }
7224
7225     return hr;
7226 }
7227
7228 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7229         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7230         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7231         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7232 {
7233     struct wined3d_surface *object;
7234     HRESULT hr;
7235
7236     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7237             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7238     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7239             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7240     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7241
7242     if (surface_type == SURFACE_OPENGL && !device->adapter)
7243     {
7244         ERR("OpenGL surfaces are not available without OpenGL.\n");
7245         return WINED3DERR_NOTAVAILABLE;
7246     }
7247
7248     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7249     if (!object)
7250     {
7251         ERR("Failed to allocate surface memory.\n");
7252         return WINED3DERR_OUTOFVIDEOMEMORY;
7253     }
7254
7255     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7256             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7257     if (FAILED(hr))
7258     {
7259         WARN("Failed to initialize surface, returning %#x.\n", hr);
7260         HeapFree(GetProcessHeap(), 0, object);
7261         return hr;
7262     }
7263
7264     TRACE("Created surface %p.\n", object);
7265     *surface = object;
7266
7267     return hr;
7268 }