wined3d: Make some parameters to get_glsl_program_entry() const.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2008 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     TRACE("surface %p.\n", surface);
46
47     if (surface->texture_name || (surface->flags & SFLAG_PBO) || !list_empty(&surface->renderbuffers))
48     {
49         struct wined3d_renderbuffer_entry *entry, *entry2;
50         const struct wined3d_gl_info *gl_info;
51         struct wined3d_context *context;
52
53         context = context_acquire(surface->resource.device, NULL);
54         gl_info = context->gl_info;
55
56         ENTER_GL();
57
58         if (surface->texture_name)
59         {
60             TRACE("Deleting texture %u.\n", surface->texture_name);
61             glDeleteTextures(1, &surface->texture_name);
62         }
63
64         if (surface->flags & SFLAG_PBO)
65         {
66             TRACE("Deleting PBO %u.\n", surface->pbo);
67             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
68         }
69
70         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
71         {
72             TRACE("Deleting renderbuffer %u.\n", entry->id);
73             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
74             HeapFree(GetProcessHeap(), 0, entry);
75         }
76
77         LEAVE_GL();
78
79         context_release(context);
80     }
81
82     if (surface->flags & SFLAG_DIBSECTION)
83     {
84         /* Release the DC. */
85         SelectObject(surface->hDC, surface->dib.holdbitmap);
86         DeleteDC(surface->hDC);
87         /* Release the DIB section. */
88         DeleteObject(surface->dib.DIBsection);
89         surface->dib.bitmap_data = NULL;
90         surface->resource.allocatedMemory = NULL;
91     }
92
93     if (surface->flags & SFLAG_USERPTR)
94         wined3d_surface_set_mem(surface, NULL);
95     if (surface->overlay_dest)
96         list_remove(&surface->overlay_entry);
97
98     HeapFree(GetProcessHeap(), 0, surface->palette9);
99
100     resource_cleanup(&surface->resource);
101 }
102
103 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
104 {
105     TRACE("surface %p, container %p.\n", surface, container);
106
107     if (!container && type != WINED3D_CONTAINER_NONE)
108         ERR("Setting NULL container of type %#x.\n", type);
109
110     if (type == WINED3D_CONTAINER_SWAPCHAIN)
111     {
112         surface->get_drawable_size = get_drawable_size_swapchain;
113     }
114     else
115     {
116         switch (wined3d_settings.offscreen_rendering_mode)
117         {
118             case ORM_FBO:
119                 surface->get_drawable_size = get_drawable_size_fbo;
120                 break;
121
122             case ORM_BACKBUFFER:
123                 surface->get_drawable_size = get_drawable_size_backbuffer;
124                 break;
125
126             default:
127                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
128                 return;
129         }
130     }
131
132     surface->container.type = type;
133     surface->container.u.base = container;
134 }
135
136 struct blt_info
137 {
138     GLenum binding;
139     GLenum bind_target;
140     enum tex_types tex_type;
141     GLfloat coords[4][3];
142 };
143
144 struct float_rect
145 {
146     float l;
147     float t;
148     float r;
149     float b;
150 };
151
152 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
153 {
154     f->l = ((r->left * 2.0f) / w) - 1.0f;
155     f->t = ((r->top * 2.0f) / h) - 1.0f;
156     f->r = ((r->right * 2.0f) / w) - 1.0f;
157     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
158 }
159
160 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
161 {
162     GLfloat (*coords)[3] = info->coords;
163     struct float_rect f;
164
165     switch (target)
166     {
167         default:
168             FIXME("Unsupported texture target %#x\n", target);
169             /* Fall back to GL_TEXTURE_2D */
170         case GL_TEXTURE_2D:
171             info->binding = GL_TEXTURE_BINDING_2D;
172             info->bind_target = GL_TEXTURE_2D;
173             info->tex_type = tex_2d;
174             coords[0][0] = (float)rect->left / w;
175             coords[0][1] = (float)rect->top / h;
176             coords[0][2] = 0.0f;
177
178             coords[1][0] = (float)rect->right / w;
179             coords[1][1] = (float)rect->top / h;
180             coords[1][2] = 0.0f;
181
182             coords[2][0] = (float)rect->left / w;
183             coords[2][1] = (float)rect->bottom / h;
184             coords[2][2] = 0.0f;
185
186             coords[3][0] = (float)rect->right / w;
187             coords[3][1] = (float)rect->bottom / h;
188             coords[3][2] = 0.0f;
189             break;
190
191         case GL_TEXTURE_RECTANGLE_ARB:
192             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
193             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
194             info->tex_type = tex_rect;
195             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
196             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
197             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
198             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
199             break;
200
201         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
202             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
203             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
204             info->tex_type = tex_cube;
205             cube_coords_float(rect, w, h, &f);
206
207             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
208             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
209             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
210             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
211             break;
212
213         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
214             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
215             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
216             info->tex_type = tex_cube;
217             cube_coords_float(rect, w, h, &f);
218
219             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
220             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
221             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
222             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
223             break;
224
225         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
226             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
227             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
228             info->tex_type = tex_cube;
229             cube_coords_float(rect, w, h, &f);
230
231             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
232             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
233             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
234             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
235             break;
236
237         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
238             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
239             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
240             info->tex_type = tex_cube;
241             cube_coords_float(rect, w, h, &f);
242
243             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
244             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
245             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
246             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
247             break;
248
249         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
250             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
251             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
252             info->tex_type = tex_cube;
253             cube_coords_float(rect, w, h, &f);
254
255             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
256             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
257             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
258             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
259             break;
260
261         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
262             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
263             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
264             info->tex_type = tex_cube;
265             cube_coords_float(rect, w, h, &f);
266
267             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
268             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
269             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
270             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
271             break;
272     }
273 }
274
275 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
276 {
277     if (rect_in)
278         *rect_out = *rect_in;
279     else
280     {
281         rect_out->left = 0;
282         rect_out->top = 0;
283         rect_out->right = surface->resource.width;
284         rect_out->bottom = surface->resource.height;
285     }
286 }
287
288 /* GL locking and context activation is done by the caller */
289 void draw_textured_quad(const struct wined3d_surface *src_surface, const RECT *src_rect,
290         const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
291 {
292     struct blt_info info;
293
294     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
295
296     glEnable(info.bind_target);
297     checkGLcall("glEnable(bind_target)");
298
299     /* Bind the texture */
300     glBindTexture(info.bind_target, src_surface->texture_name);
301     checkGLcall("glBindTexture");
302
303     /* Filtering for StretchRect */
304     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
305             wined3d_gl_mag_filter(magLookup, Filter));
306     checkGLcall("glTexParameteri");
307     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
308             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
309     checkGLcall("glTexParameteri");
310     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
311     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
312     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
313     checkGLcall("glTexEnvi");
314
315     /* Draw a quad */
316     glBegin(GL_TRIANGLE_STRIP);
317     glTexCoord3fv(info.coords[0]);
318     glVertex2i(dst_rect->left, dst_rect->top);
319
320     glTexCoord3fv(info.coords[1]);
321     glVertex2i(dst_rect->right, dst_rect->top);
322
323     glTexCoord3fv(info.coords[2]);
324     glVertex2i(dst_rect->left, dst_rect->bottom);
325
326     glTexCoord3fv(info.coords[3]);
327     glVertex2i(dst_rect->right, dst_rect->bottom);
328     glEnd();
329
330     /* Unbind the texture */
331     glBindTexture(info.bind_target, 0);
332     checkGLcall("glBindTexture(info->bind_target, 0)");
333
334     /* We changed the filtering settings on the texture. Inform the
335      * container about this to get the filters reset properly next draw. */
336     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
337     {
338         struct wined3d_texture *texture = src_surface->container.u.texture;
339         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
340         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
341         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
342     }
343 }
344
345 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
346 {
347     const struct wined3d_format *format = surface->resource.format;
348     SYSTEM_INFO sysInfo;
349     BITMAPINFO *b_info;
350     int extraline = 0;
351     DWORD *masks;
352     UINT usage;
353     HDC dc;
354
355     TRACE("surface %p.\n", surface);
356
357     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
358     {
359         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
360         return WINED3DERR_INVALIDCALL;
361     }
362
363     switch (format->byte_count)
364     {
365         case 2:
366         case 4:
367             /* Allocate extra space to store the RGB bit masks. */
368             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
369             break;
370
371         case 3:
372             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
373             break;
374
375         default:
376             /* Allocate extra space for a palette. */
377             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
378                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
379             break;
380     }
381
382     if (!b_info)
383         return E_OUTOFMEMORY;
384
385     /* Some applications access the surface in via DWORDs, and do not take
386      * the necessary care at the end of the surface. So we need at least
387      * 4 extra bytes at the end of the surface. Check against the page size,
388      * if the last page used for the surface has at least 4 spare bytes we're
389      * safe, otherwise add an extra line to the DIB section. */
390     GetSystemInfo(&sysInfo);
391     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
392     {
393         extraline = 1;
394         TRACE("Adding an extra line to the DIB section.\n");
395     }
396
397     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
398     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
399     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
400     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
401     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
402             * wined3d_surface_get_pitch(surface);
403     b_info->bmiHeader.biPlanes = 1;
404     b_info->bmiHeader.biBitCount = format->byte_count * 8;
405
406     b_info->bmiHeader.biXPelsPerMeter = 0;
407     b_info->bmiHeader.biYPelsPerMeter = 0;
408     b_info->bmiHeader.biClrUsed = 0;
409     b_info->bmiHeader.biClrImportant = 0;
410
411     /* Get the bit masks */
412     masks = (DWORD *)b_info->bmiColors;
413     switch (surface->resource.format->id)
414     {
415         case WINED3DFMT_B8G8R8_UNORM:
416             usage = DIB_RGB_COLORS;
417             b_info->bmiHeader.biCompression = BI_RGB;
418             break;
419
420         case WINED3DFMT_B5G5R5X1_UNORM:
421         case WINED3DFMT_B5G5R5A1_UNORM:
422         case WINED3DFMT_B4G4R4A4_UNORM:
423         case WINED3DFMT_B4G4R4X4_UNORM:
424         case WINED3DFMT_B2G3R3_UNORM:
425         case WINED3DFMT_B2G3R3A8_UNORM:
426         case WINED3DFMT_R10G10B10A2_UNORM:
427         case WINED3DFMT_R8G8B8A8_UNORM:
428         case WINED3DFMT_R8G8B8X8_UNORM:
429         case WINED3DFMT_B10G10R10A2_UNORM:
430         case WINED3DFMT_B5G6R5_UNORM:
431         case WINED3DFMT_R16G16B16A16_UNORM:
432             usage = 0;
433             b_info->bmiHeader.biCompression = BI_BITFIELDS;
434             masks[0] = format->red_mask;
435             masks[1] = format->green_mask;
436             masks[2] = format->blue_mask;
437             break;
438
439         default:
440             /* Don't know palette */
441             b_info->bmiHeader.biCompression = BI_RGB;
442             usage = 0;
443             break;
444     }
445
446     if (!(dc = GetDC(0)))
447     {
448         HeapFree(GetProcessHeap(), 0, b_info);
449         return HRESULT_FROM_WIN32(GetLastError());
450     }
451
452     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
453             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
454             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
455     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
456     ReleaseDC(0, dc);
457
458     if (!surface->dib.DIBsection)
459     {
460         ERR("Failed to create DIB section.\n");
461         HeapFree(GetProcessHeap(), 0, b_info);
462         return HRESULT_FROM_WIN32(GetLastError());
463     }
464
465     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
466     /* Copy the existing surface to the dib section. */
467     if (surface->resource.allocatedMemory)
468     {
469         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
470                 surface->resource.height * wined3d_surface_get_pitch(surface));
471     }
472     else
473     {
474         /* This is to make maps read the GL texture although memory is allocated. */
475         surface->flags &= ~SFLAG_INSYSMEM;
476     }
477     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
478
479     HeapFree(GetProcessHeap(), 0, b_info);
480
481     /* Now allocate a DC. */
482     surface->hDC = CreateCompatibleDC(0);
483     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
484     TRACE("Using wined3d palette %p.\n", surface->palette);
485     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
486
487     surface->flags |= SFLAG_DIBSECTION;
488
489     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
490     surface->resource.heapMemory = NULL;
491
492     return WINED3D_OK;
493 }
494
495 static void surface_prepare_system_memory(struct wined3d_surface *surface)
496 {
497     struct wined3d_device *device = surface->resource.device;
498     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
499
500     TRACE("surface %p.\n", surface);
501
502     /* Performance optimization: Count how often a surface is locked, if it is
503      * locked regularly do not throw away the system memory copy. This avoids
504      * the need to download the surface from OpenGL all the time. The surface
505      * is still downloaded if the OpenGL texture is changed. */
506     if (!(surface->flags & SFLAG_DYNLOCK))
507     {
508         if (++surface->lockCount > MAXLOCKCOUNT)
509         {
510             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
511             surface->flags |= SFLAG_DYNLOCK;
512         }
513     }
514
515     /* Create a PBO for dynamically locked surfaces but don't do it for
516      * converted or NPOT surfaces. Also don't create a PBO for systemmem
517      * surfaces. */
518     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
519             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
520             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
521     {
522         struct wined3d_context *context;
523         GLenum error;
524
525         context = context_acquire(device, NULL);
526         ENTER_GL();
527
528         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
529         error = glGetError();
530         if (!surface->pbo || error != GL_NO_ERROR)
531             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
532
533         TRACE("Binding PBO %u.\n", surface->pbo);
534
535         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
536         checkGLcall("glBindBufferARB");
537
538         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
539                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
540         checkGLcall("glBufferDataARB");
541
542         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
543         checkGLcall("glBindBufferARB");
544
545         /* We don't need the system memory anymore and we can't even use it for PBOs. */
546         if (!(surface->flags & SFLAG_CLIENT))
547         {
548             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
549             surface->resource.heapMemory = NULL;
550         }
551         surface->resource.allocatedMemory = NULL;
552         surface->flags |= SFLAG_PBO;
553         LEAVE_GL();
554         context_release(context);
555     }
556     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
557     {
558         /* Whatever surface we have, make sure that there is memory allocated
559          * for the downloaded copy, or a PBO to map. */
560         if (!surface->resource.heapMemory)
561             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
562
563         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
564                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
565
566         if (surface->flags & SFLAG_INSYSMEM)
567             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
568     }
569 }
570
571 static void surface_evict_sysmem(struct wined3d_surface *surface)
572 {
573     if (surface->flags & SFLAG_DONOTFREE)
574         return;
575
576     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
577     surface->resource.allocatedMemory = NULL;
578     surface->resource.heapMemory = NULL;
579     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
580 }
581
582 /* Context activation is done by the caller. */
583 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
584         const struct wined3d_gl_info *gl_info, BOOL srgb)
585 {
586     struct wined3d_device *device = surface->resource.device;
587     DWORD active_sampler;
588     GLint active_texture;
589
590     /* We don't need a specific texture unit, but after binding the texture
591      * the current unit is dirty. Read the unit back instead of switching to
592      * 0, this avoids messing around with the state manager's GL states. The
593      * current texture unit should always be a valid one.
594      *
595      * To be more specific, this is tricky because we can implicitly be
596      * called from sampler() in state.c. This means we can't touch anything
597      * other than whatever happens to be the currently active texture, or we
598      * would risk marking already applied sampler states dirty again.
599      *
600      * TODO: Track the current active texture per GL context instead of using
601      * glGet(). */
602
603     ENTER_GL();
604     glGetIntegerv(GL_ACTIVE_TEXTURE, &active_texture);
605     LEAVE_GL();
606     active_sampler = device->rev_tex_unit_map[active_texture - GL_TEXTURE0_ARB];
607
608     if (active_sampler != WINED3D_UNMAPPED_STAGE)
609         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
610     surface_bind(surface, gl_info, srgb);
611 }
612
613 static void surface_force_reload(struct wined3d_surface *surface)
614 {
615     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
616 }
617
618 static void surface_release_client_storage(struct wined3d_surface *surface)
619 {
620     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
621
622     ENTER_GL();
623     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
624     if (surface->texture_name)
625     {
626         surface_bind_and_dirtify(surface, context->gl_info, FALSE);
627         glTexImage2D(surface->texture_target, surface->texture_level,
628                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
629     }
630     if (surface->texture_name_srgb)
631     {
632         surface_bind_and_dirtify(surface, context->gl_info, TRUE);
633         glTexImage2D(surface->texture_target, surface->texture_level,
634                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
635     }
636     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
637     LEAVE_GL();
638
639     context_release(context);
640
641     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
642     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
643     surface_force_reload(surface);
644 }
645
646 static HRESULT surface_private_setup(struct wined3d_surface *surface)
647 {
648     /* TODO: Check against the maximum texture sizes supported by the video card. */
649     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
650     unsigned int pow2Width, pow2Height;
651
652     TRACE("surface %p.\n", surface);
653
654     surface->texture_name = 0;
655     surface->texture_target = GL_TEXTURE_2D;
656
657     /* Non-power2 support */
658     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
659     {
660         pow2Width = surface->resource.width;
661         pow2Height = surface->resource.height;
662     }
663     else
664     {
665         /* Find the nearest pow2 match */
666         pow2Width = pow2Height = 1;
667         while (pow2Width < surface->resource.width)
668             pow2Width <<= 1;
669         while (pow2Height < surface->resource.height)
670             pow2Height <<= 1;
671     }
672     surface->pow2Width = pow2Width;
673     surface->pow2Height = pow2Height;
674
675     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
676     {
677         /* TODO: Add support for non power two compressed textures. */
678         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
679         {
680             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
681                   surface, surface->resource.width, surface->resource.height);
682             return WINED3DERR_NOTAVAILABLE;
683         }
684     }
685
686     if (pow2Width != surface->resource.width
687             || pow2Height != surface->resource.height)
688     {
689         surface->flags |= SFLAG_NONPOW2;
690     }
691
692     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
693             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
694     {
695         /* One of three options:
696          * 1: Do the same as we do with NPOT and scale the texture, (any
697          *    texture ops would require the texture to be scaled which is
698          *    potentially slow)
699          * 2: Set the texture to the maximum size (bad idea).
700          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
701          * 4: Create the surface, but allow it to be used only for DirectDraw
702          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
703          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
704          *    the render target. */
705         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
706         {
707             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
708             return WINED3DERR_NOTAVAILABLE;
709         }
710
711         /* We should never use this surface in combination with OpenGL! */
712         TRACE("Creating an oversized surface: %ux%u.\n",
713                 surface->pow2Width, surface->pow2Height);
714     }
715     else
716     {
717         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
718          * and EXT_PALETTED_TEXTURE is used in combination with texture
719          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
720          * EXT_PALETTED_TEXTURE doesn't work in combination with
721          * ARB_TEXTURE_RECTANGLE. */
722         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
723                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
724                 && gl_info->supported[EXT_PALETTED_TEXTURE]
725                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
726         {
727             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
728             surface->pow2Width = surface->resource.width;
729             surface->pow2Height = surface->resource.height;
730             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
731         }
732     }
733
734     switch (wined3d_settings.offscreen_rendering_mode)
735     {
736         case ORM_FBO:
737             surface->get_drawable_size = get_drawable_size_fbo;
738             break;
739
740         case ORM_BACKBUFFER:
741             surface->get_drawable_size = get_drawable_size_backbuffer;
742             break;
743
744         default:
745             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
746             return WINED3DERR_INVALIDCALL;
747     }
748
749     surface->flags |= SFLAG_INSYSMEM;
750
751     return WINED3D_OK;
752 }
753
754 static void surface_realize_palette(struct wined3d_surface *surface)
755 {
756     struct wined3d_palette *palette = surface->palette;
757
758     TRACE("surface %p.\n", surface);
759
760     if (!palette) return;
761
762     if (surface->resource.format->id == WINED3DFMT_P8_UINT
763             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
764     {
765         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
766         {
767             /* Make sure the texture is up to date. This call doesn't do
768              * anything if the texture is already up to date. */
769             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
770
771             /* We want to force a palette refresh, so mark the drawable as not being up to date */
772             if (!surface_is_offscreen(surface))
773                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
774         }
775         else
776         {
777             if (!(surface->flags & SFLAG_INSYSMEM))
778             {
779                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
780                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
781             }
782             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
783         }
784     }
785
786     if (surface->flags & SFLAG_DIBSECTION)
787     {
788         RGBQUAD col[256];
789         unsigned int i;
790
791         TRACE("Updating the DC's palette.\n");
792
793         for (i = 0; i < 256; ++i)
794         {
795             col[i].rgbRed   = palette->palents[i].peRed;
796             col[i].rgbGreen = palette->palents[i].peGreen;
797             col[i].rgbBlue  = palette->palents[i].peBlue;
798             col[i].rgbReserved = 0;
799         }
800         SetDIBColorTable(surface->hDC, 0, 256, col);
801     }
802
803     /* Propagate the changes to the drawable when we have a palette. */
804     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
805         surface_load_location(surface, SFLAG_INDRAWABLE, NULL);
806 }
807
808 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
809 {
810     HRESULT hr;
811
812     /* If there's no destination surface there is nothing to do. */
813     if (!surface->overlay_dest)
814         return WINED3D_OK;
815
816     /* Blt calls ModifyLocation on the dest surface, which in turn calls
817      * DrawOverlay to update the overlay. Prevent an endless recursion. */
818     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
819         return WINED3D_OK;
820
821     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
822     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
823             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
824     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
825
826     return hr;
827 }
828
829 static void surface_preload(struct wined3d_surface *surface)
830 {
831     TRACE("surface %p.\n", surface);
832
833     surface_internal_preload(surface, SRGB_ANY);
834 }
835
836 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
837 {
838     struct wined3d_device *device = surface->resource.device;
839     const RECT *pass_rect = rect;
840
841     TRACE("surface %p, rect %s, flags %#x.\n",
842             surface, wine_dbgstr_rect(rect), flags);
843
844     if (flags & WINED3DLOCK_DISCARD)
845     {
846         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
847         surface_prepare_system_memory(surface);
848         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
849     }
850     else
851     {
852         /* surface_load_location() does not check if the rectangle specifies
853          * the full surface. Most callers don't need that, so do it here. */
854         if (rect && !rect->top && !rect->left
855                 && rect->right == surface->resource.width
856                 && rect->bottom == surface->resource.height)
857             pass_rect = NULL;
858
859         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
860                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
861                 || surface == device->fb.render_targets[0])))
862             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
863     }
864
865     if (surface->flags & SFLAG_PBO)
866     {
867         const struct wined3d_gl_info *gl_info;
868         struct wined3d_context *context;
869
870         context = context_acquire(device, NULL);
871         gl_info = context->gl_info;
872
873         ENTER_GL();
874         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
875         checkGLcall("glBindBufferARB");
876
877         /* This shouldn't happen but could occur if some other function
878          * didn't handle the PBO properly. */
879         if (surface->resource.allocatedMemory)
880             ERR("The surface already has PBO memory allocated.\n");
881
882         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
883         checkGLcall("glMapBufferARB");
884
885         /* Make sure the PBO isn't set anymore in order not to break non-PBO
886          * calls. */
887         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
888         checkGLcall("glBindBufferARB");
889
890         LEAVE_GL();
891         context_release(context);
892     }
893
894     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
895     {
896         if (!rect)
897             surface_add_dirty_rect(surface, NULL);
898         else
899         {
900             WINED3DBOX b;
901
902             b.Left = rect->left;
903             b.Top = rect->top;
904             b.Right = rect->right;
905             b.Bottom = rect->bottom;
906             b.Front = 0;
907             b.Back = 1;
908             surface_add_dirty_rect(surface, &b);
909         }
910     }
911 }
912
913 static void surface_unmap(struct wined3d_surface *surface)
914 {
915     struct wined3d_device *device = surface->resource.device;
916     BOOL fullsurface;
917
918     TRACE("surface %p.\n", surface);
919
920     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
921
922     if (surface->flags & SFLAG_PBO)
923     {
924         const struct wined3d_gl_info *gl_info;
925         struct wined3d_context *context;
926
927         TRACE("Freeing PBO memory.\n");
928
929         context = context_acquire(device, NULL);
930         gl_info = context->gl_info;
931
932         ENTER_GL();
933         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
934         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
935         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
936         checkGLcall("glUnmapBufferARB");
937         LEAVE_GL();
938         context_release(context);
939
940         surface->resource.allocatedMemory = NULL;
941     }
942
943     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
944
945     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
946     {
947         TRACE("Not dirtified, nothing to do.\n");
948         goto done;
949     }
950
951     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
952             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
953     {
954         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
955         {
956             static BOOL warned = FALSE;
957             if (!warned)
958             {
959                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
960                 warned = TRUE;
961             }
962             goto done;
963         }
964
965         if (!surface->dirtyRect.left && !surface->dirtyRect.top
966                 && surface->dirtyRect.right == surface->resource.width
967                 && surface->dirtyRect.bottom == surface->resource.height)
968         {
969             fullsurface = TRUE;
970         }
971         else
972         {
973             /* TODO: Proper partial rectangle tracking. */
974             fullsurface = FALSE;
975             surface->flags |= SFLAG_INSYSMEM;
976         }
977
978         surface_load_location(surface, SFLAG_INDRAWABLE, fullsurface ? NULL : &surface->dirtyRect);
979
980         /* Partial rectangle tracking is not commonly implemented, it is only
981          * done for render targets. INSYSMEM was set before to tell
982          * surface_load_location() where to read the rectangle from.
983          * Indrawable is set because all modifications from the partial
984          * sysmem copy are written back to the drawable, thus the surface is
985          * merged again in the drawable. The sysmem copy is not fully up to
986          * date because only a subrectangle was read in Map(). */
987         if (!fullsurface)
988         {
989             surface_modify_location(surface, SFLAG_INDRAWABLE, TRUE);
990             surface_evict_sysmem(surface);
991         }
992
993         surface->dirtyRect.left = surface->resource.width;
994         surface->dirtyRect.top = surface->resource.height;
995         surface->dirtyRect.right = 0;
996         surface->dirtyRect.bottom = 0;
997     }
998     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
999     {
1000         FIXME("Depth / stencil buffer locking is not implemented.\n");
1001     }
1002
1003 done:
1004     /* Overlays have to be redrawn manually after changes with the GL implementation */
1005     if (surface->overlay_dest)
1006         surface->surface_ops->surface_draw_overlay(surface);
1007 }
1008
1009 static HRESULT surface_getdc(struct wined3d_surface *surface)
1010 {
1011     WINED3DLOCKED_RECT lock;
1012     HRESULT hr;
1013
1014     TRACE("surface %p.\n", surface);
1015
1016     /* Create a DIB section if there isn't a dc yet. */
1017     if (!surface->hDC)
1018     {
1019         if (surface->flags & SFLAG_CLIENT)
1020         {
1021             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1022             surface_release_client_storage(surface);
1023         }
1024         hr = surface_create_dib_section(surface);
1025         if (FAILED(hr))
1026             return WINED3DERR_INVALIDCALL;
1027
1028         /* Use the DIB section from now on if we are not using a PBO. */
1029         if (!(surface->flags & SFLAG_PBO))
1030             surface->resource.allocatedMemory = surface->dib.bitmap_data;
1031     }
1032
1033     /* Map the surface. */
1034     hr = wined3d_surface_map(surface, &lock, NULL, 0);
1035     if (FAILED(hr))
1036         ERR("Map failed, hr %#x.\n", hr);
1037
1038     /* Sync the DIB with the PBO. This can't be done earlier because Map()
1039      * activates the allocatedMemory. */
1040     if (surface->flags & SFLAG_PBO)
1041         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->dib.bitmap_size);
1042
1043     return hr;
1044 }
1045
1046 static HRESULT surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
1047 {
1048     TRACE("surface %p, override %p.\n", surface, override);
1049
1050     /* Flipping is only supported on render targets and overlays. */
1051     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
1052     {
1053         WARN("Tried to flip a non-render target, non-overlay surface.\n");
1054         return WINEDDERR_NOTFLIPPABLE;
1055     }
1056
1057     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1058     {
1059         flip_surface(surface, override);
1060
1061         /* Update the overlay if it is visible */
1062         if (surface->overlay_dest)
1063             return surface->surface_ops->surface_draw_overlay(surface);
1064         else
1065             return WINED3D_OK;
1066     }
1067
1068     return WINED3D_OK;
1069 }
1070
1071 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1072 {
1073     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1074         return FALSE;
1075     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1076         return FALSE;
1077     return TRUE;
1078 }
1079
1080 static void wined3d_surface_depth_blt_fbo(struct wined3d_device *device, struct wined3d_surface *src_surface,
1081         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1082 {
1083     const struct wined3d_gl_info *gl_info;
1084     struct wined3d_context *context;
1085     DWORD src_mask, dst_mask;
1086     GLbitfield gl_mask;
1087
1088     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1089             device, src_surface, wine_dbgstr_rect(src_rect),
1090             dst_surface, wine_dbgstr_rect(dst_rect));
1091
1092     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1093     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1094
1095     if (src_mask != dst_mask)
1096     {
1097         ERR("Incompatible formats %s and %s.\n",
1098                 debug_d3dformat(src_surface->resource.format->id),
1099                 debug_d3dformat(dst_surface->resource.format->id));
1100         return;
1101     }
1102
1103     if (!src_mask)
1104     {
1105         ERR("Not a depth / stencil format: %s.\n",
1106                 debug_d3dformat(src_surface->resource.format->id));
1107         return;
1108     }
1109
1110     gl_mask = 0;
1111     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1112         gl_mask |= GL_DEPTH_BUFFER_BIT;
1113     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1114         gl_mask |= GL_STENCIL_BUFFER_BIT;
1115
1116     /* Make sure the locations are up-to-date. Loading the destination
1117      * surface isn't required if the entire surface is overwritten. */
1118     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1119     if (!surface_is_full_rect(dst_surface, dst_rect))
1120         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1121
1122     context = context_acquire(device, NULL);
1123     if (!context->valid)
1124     {
1125         context_release(context);
1126         WARN("Invalid context, skipping blit.\n");
1127         return;
1128     }
1129
1130     gl_info = context->gl_info;
1131
1132     ENTER_GL();
1133
1134     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1135     glReadBuffer(GL_NONE);
1136     checkGLcall("glReadBuffer()");
1137     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1138
1139     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1140     context_set_draw_buffer(context, GL_NONE);
1141     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1142
1143     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1144     {
1145         glDepthMask(GL_TRUE);
1146         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1147     }
1148     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1149     {
1150         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1151         {
1152             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1153             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1154         }
1155         glStencilMask(~0U);
1156         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1157     }
1158
1159     glDisable(GL_SCISSOR_TEST);
1160     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1161
1162     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1163             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1164     checkGLcall("glBlitFramebuffer()");
1165
1166     LEAVE_GL();
1167
1168     if (wined3d_settings.strict_draw_ordering)
1169         wglFlush(); /* Flush to ensure ordering across contexts. */
1170
1171     context_release(context);
1172 }
1173
1174 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1175  * Depth / stencil is not supported. */
1176 static void surface_blt_fbo(struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1177         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1178         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1179 {
1180     const struct wined3d_gl_info *gl_info;
1181     struct wined3d_context *context;
1182     RECT src_rect, dst_rect;
1183     GLenum gl_filter;
1184     GLenum buffer;
1185
1186     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1187     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1188             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1189     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1190             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1191
1192     src_rect = *src_rect_in;
1193     dst_rect = *dst_rect_in;
1194
1195     switch (filter)
1196     {
1197         case WINED3DTEXF_LINEAR:
1198             gl_filter = GL_LINEAR;
1199             break;
1200
1201         default:
1202             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1203         case WINED3DTEXF_NONE:
1204         case WINED3DTEXF_POINT:
1205             gl_filter = GL_NEAREST;
1206             break;
1207     }
1208
1209     if (src_location == SFLAG_INDRAWABLE && surface_is_offscreen(src_surface))
1210         src_location = SFLAG_INTEXTURE;
1211     if (dst_location == SFLAG_INDRAWABLE && surface_is_offscreen(dst_surface))
1212         dst_location = SFLAG_INTEXTURE;
1213
1214     /* Make sure the locations are up-to-date. Loading the destination
1215      * surface isn't required if the entire surface is overwritten. (And is
1216      * in fact harmful if we're being called by surface_load_location() with
1217      * the purpose of loading the destination surface.) */
1218     surface_load_location(src_surface, src_location, NULL);
1219     if (!surface_is_full_rect(dst_surface, &dst_rect))
1220         surface_load_location(dst_surface, dst_location, NULL);
1221
1222     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1223     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1224     else context = context_acquire(device, NULL);
1225
1226     if (!context->valid)
1227     {
1228         context_release(context);
1229         WARN("Invalid context, skipping blit.\n");
1230         return;
1231     }
1232
1233     gl_info = context->gl_info;
1234
1235     if (src_location == SFLAG_INDRAWABLE)
1236     {
1237         TRACE("Source surface %p is onscreen.\n", src_surface);
1238         buffer = surface_get_gl_buffer(src_surface);
1239         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1240     }
1241     else
1242     {
1243         TRACE("Source surface %p is offscreen.\n", src_surface);
1244         buffer = GL_COLOR_ATTACHMENT0;
1245     }
1246
1247     ENTER_GL();
1248     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1249     glReadBuffer(buffer);
1250     checkGLcall("glReadBuffer()");
1251     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1252     LEAVE_GL();
1253
1254     if (dst_location == SFLAG_INDRAWABLE)
1255     {
1256         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1257         buffer = surface_get_gl_buffer(dst_surface);
1258         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1259     }
1260     else
1261     {
1262         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1263         buffer = GL_COLOR_ATTACHMENT0;
1264     }
1265
1266     ENTER_GL();
1267     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1268     context_set_draw_buffer(context, buffer);
1269     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1270     context_invalidate_state(context, STATE_FRAMEBUFFER);
1271
1272     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1273     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1274     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1275     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1276     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1277
1278     glDisable(GL_SCISSOR_TEST);
1279     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1280
1281     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1282             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1283     checkGLcall("glBlitFramebuffer()");
1284
1285     LEAVE_GL();
1286
1287     if (wined3d_settings.strict_draw_ordering
1288             || (dst_location == SFLAG_INDRAWABLE
1289             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1290         wglFlush();
1291
1292     context_release(context);
1293 }
1294
1295 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1296         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1297         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1298 {
1299     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1300         return FALSE;
1301
1302     /* Source and/or destination need to be on the GL side */
1303     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1304         return FALSE;
1305
1306     switch (blit_op)
1307     {
1308         case WINED3D_BLIT_OP_COLOR_BLIT:
1309             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1310                 return FALSE;
1311             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1312                 return FALSE;
1313             break;
1314
1315         case WINED3D_BLIT_OP_DEPTH_BLIT:
1316             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1317                 return FALSE;
1318             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1319                 return FALSE;
1320             break;
1321
1322         default:
1323             return FALSE;
1324     }
1325
1326     if (!(src_format->id == dst_format->id
1327             || (is_identity_fixup(src_format->color_fixup)
1328             && is_identity_fixup(dst_format->color_fixup))))
1329         return FALSE;
1330
1331     return TRUE;
1332 }
1333
1334 /* This function checks if the primary render target uses the 8bit paletted format. */
1335 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1336 {
1337     if (device->fb.render_targets && device->fb.render_targets[0])
1338     {
1339         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1340         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1341                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1342             return TRUE;
1343     }
1344     return FALSE;
1345 }
1346
1347 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1348         DWORD color, WINED3DCOLORVALUE *float_color)
1349 {
1350     const struct wined3d_format *format = surface->resource.format;
1351     const struct wined3d_device *device = surface->resource.device;
1352
1353     switch (format->id)
1354     {
1355         case WINED3DFMT_P8_UINT:
1356             if (surface->palette)
1357             {
1358                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1359                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1360                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1361             }
1362             else
1363             {
1364                 float_color->r = 0.0f;
1365                 float_color->g = 0.0f;
1366                 float_color->b = 0.0f;
1367             }
1368             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1369             break;
1370
1371         case WINED3DFMT_B5G6R5_UNORM:
1372             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1373             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1374             float_color->b = (color & 0x1f) / 31.0f;
1375             float_color->a = 1.0f;
1376             break;
1377
1378         case WINED3DFMT_B8G8R8_UNORM:
1379         case WINED3DFMT_B8G8R8X8_UNORM:
1380             float_color->r = D3DCOLOR_R(color);
1381             float_color->g = D3DCOLOR_G(color);
1382             float_color->b = D3DCOLOR_B(color);
1383             float_color->a = 1.0f;
1384             break;
1385
1386         case WINED3DFMT_B8G8R8A8_UNORM:
1387             float_color->r = D3DCOLOR_R(color);
1388             float_color->g = D3DCOLOR_G(color);
1389             float_color->b = D3DCOLOR_B(color);
1390             float_color->a = D3DCOLOR_A(color);
1391             break;
1392
1393         default:
1394             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1395             return FALSE;
1396     }
1397
1398     return TRUE;
1399 }
1400
1401 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1402 {
1403     const struct wined3d_format *format = surface->resource.format;
1404
1405     switch (format->id)
1406     {
1407         case WINED3DFMT_S1_UINT_D15_UNORM:
1408             *float_depth = depth / (float)0x00007fff;
1409             break;
1410
1411         case WINED3DFMT_D16_UNORM:
1412             *float_depth = depth / (float)0x0000ffff;
1413             break;
1414
1415         case WINED3DFMT_D24_UNORM_S8_UINT:
1416         case WINED3DFMT_X8D24_UNORM:
1417             *float_depth = depth / (float)0x00ffffff;
1418             break;
1419
1420         case WINED3DFMT_D32_UNORM:
1421             *float_depth = depth / (float)0xffffffff;
1422             break;
1423
1424         default:
1425             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1426             return FALSE;
1427     }
1428
1429     return TRUE;
1430 }
1431
1432 /* Do not call while under the GL lock. */
1433 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1434 {
1435     const struct wined3d_resource *resource = &surface->resource;
1436     struct wined3d_device *device = resource->device;
1437     const struct blit_shader *blitter;
1438
1439     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1440             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1441     if (!blitter)
1442     {
1443         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1444         return WINED3DERR_INVALIDCALL;
1445     }
1446
1447     return blitter->depth_fill(device, surface, rect, depth);
1448 }
1449
1450 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1451         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1452 {
1453     struct wined3d_device *device = src_surface->resource.device;
1454
1455     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1456             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1457             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1458         return WINED3DERR_INVALIDCALL;
1459
1460     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1461
1462     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1463             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1464     surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1465
1466     return WINED3D_OK;
1467 }
1468
1469 /* Do not call while under the GL lock. */
1470 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1471         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1472         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1473 {
1474     const struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1475     struct wined3d_device *device = dst_surface->resource.device;
1476     DWORD src_ds_flags, dst_ds_flags;
1477     RECT src_rect, dst_rect;
1478
1479     static const DWORD simple_blit = WINEDDBLT_ASYNC
1480             | WINEDDBLT_COLORFILL
1481             | WINEDDBLT_WAIT
1482             | WINEDDBLT_DEPTHFILL
1483             | WINEDDBLT_DONOTWAIT;
1484
1485     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1486             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1487             flags, fx, debug_d3dtexturefiltertype(filter));
1488     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1489
1490     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1491     {
1492         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1493         return WINEDDERR_SURFACEBUSY;
1494     }
1495
1496     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1497
1498     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1499             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1500             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1501             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1502             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1503     {
1504         /* The destination rect can be out of bounds on the condition
1505          * that a clipper is set for the surface. */
1506         if (dst_surface->clipper)
1507             FIXME("Blit clipping not implemented.\n");
1508         else
1509             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1510         return WINEDDERR_INVALIDRECT;
1511     }
1512
1513     if (src_surface)
1514     {
1515         surface_get_rect(src_surface, src_rect_in, &src_rect);
1516
1517         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1518                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1519                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1520                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1521                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1522         {
1523             WARN("Application gave us bad source rectangle for Blt.\n");
1524             return WINEDDERR_INVALIDRECT;
1525         }
1526     }
1527     else
1528     {
1529         memset(&src_rect, 0, sizeof(src_rect));
1530     }
1531
1532     if (!fx || !(fx->dwDDFX))
1533         flags &= ~WINEDDBLT_DDFX;
1534
1535     if (flags & WINEDDBLT_WAIT)
1536         flags &= ~WINEDDBLT_WAIT;
1537
1538     if (flags & WINEDDBLT_ASYNC)
1539     {
1540         static unsigned int once;
1541
1542         if (!once++)
1543             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1544         flags &= ~WINEDDBLT_ASYNC;
1545     }
1546
1547     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1548     if (flags & WINEDDBLT_DONOTWAIT)
1549     {
1550         static unsigned int once;
1551
1552         if (!once++)
1553             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1554         flags &= ~WINEDDBLT_DONOTWAIT;
1555     }
1556
1557     if (!device->d3d_initialized)
1558     {
1559         WARN("D3D not initialized, using fallback.\n");
1560         goto cpu;
1561     }
1562
1563     if (flags & ~simple_blit)
1564     {
1565         WARN("Using fallback for complex blit (%#x).\n", flags);
1566         goto fallback;
1567     }
1568
1569     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1570         src_swapchain = src_surface->container.u.swapchain;
1571     else
1572         src_swapchain = NULL;
1573
1574     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1575         dst_swapchain = dst_surface->container.u.swapchain;
1576     else
1577         dst_swapchain = NULL;
1578
1579     /* This isn't strictly needed. FBO blits for example could deal with
1580      * cross-swapchain blits by first downloading the source to a texture
1581      * before switching to the destination context. We just have this here to
1582      * not have to deal with the issue, since cross-swapchain blits should be
1583      * rare. */
1584     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1585     {
1586         FIXME("Using fallback for cross-swapchain blit.\n");
1587         goto fallback;
1588     }
1589
1590     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1591     if (src_surface)
1592         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1593     else
1594         src_ds_flags = 0;
1595
1596     if (src_ds_flags || dst_ds_flags)
1597     {
1598         if (flags & WINEDDBLT_DEPTHFILL)
1599         {
1600             float depth;
1601
1602             TRACE("Depth fill.\n");
1603
1604             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1605                 return WINED3DERR_INVALIDCALL;
1606
1607             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1608                 return WINED3D_OK;
1609         }
1610         else
1611         {
1612             /* Accessing depth / stencil surfaces is supposed to fail while in
1613              * a scene, except for fills, which seem to work. */
1614             if (device->inScene)
1615             {
1616                 WARN("Rejecting depth / stencil access while in scene.\n");
1617                 return WINED3DERR_INVALIDCALL;
1618             }
1619
1620             if (src_ds_flags != dst_ds_flags)
1621             {
1622                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1623                 return WINED3DERR_INVALIDCALL;
1624             }
1625
1626             if (src_rect.top || src_rect.left
1627                     || src_rect.bottom != src_surface->resource.height
1628                     || src_rect.right != src_surface->resource.width)
1629             {
1630                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1631                         wine_dbgstr_rect(&src_rect));
1632                 return WINED3DERR_INVALIDCALL;
1633             }
1634
1635             if (dst_rect.top || dst_rect.left
1636                     || dst_rect.bottom != dst_surface->resource.height
1637                     || dst_rect.right != dst_surface->resource.width)
1638             {
1639                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1640                         wine_dbgstr_rect(&src_rect));
1641                 return WINED3DERR_INVALIDCALL;
1642             }
1643
1644             if (src_surface->resource.height != dst_surface->resource.height
1645                     || src_surface->resource.width != dst_surface->resource.width)
1646             {
1647                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1648                 return WINED3DERR_INVALIDCALL;
1649             }
1650
1651             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1652                 return WINED3D_OK;
1653         }
1654     }
1655     else
1656     {
1657         if (flags & WINEDDBLT_COLORFILL)
1658         {
1659             WINED3DCOLORVALUE color;
1660
1661             TRACE("Color fill.\n");
1662
1663             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1664                 goto fallback;
1665
1666             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1667                 return WINED3D_OK;
1668         }
1669         else
1670         {
1671             TRACE("Color blit.\n");
1672
1673             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1674                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1675                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1676             {
1677                 TRACE("Using FBO blit.\n");
1678
1679                 surface_blt_fbo(device, filter,
1680                         src_surface, SFLAG_INDRAWABLE, &src_rect,
1681                         dst_surface, SFLAG_INDRAWABLE, &dst_rect);
1682                 surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
1683                 return WINED3D_OK;
1684             }
1685
1686             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1687                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1688                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1689             {
1690                 TRACE("Using arbfp blit.\n");
1691
1692                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1693                     return WINED3D_OK;
1694             }
1695         }
1696     }
1697
1698 fallback:
1699
1700     /* Special cases for render targets. */
1701     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1702             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1703     {
1704         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1705                 src_surface, &src_rect, flags, fx, filter)))
1706             return WINED3D_OK;
1707     }
1708
1709 cpu:
1710
1711     /* For the rest call the X11 surface implementation. For render targets
1712      * this should be implemented OpenGL accelerated in BltOverride, other
1713      * blits are rather rare. */
1714     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1715 }
1716
1717 /* Do not call while under the GL lock. */
1718 HRESULT CDECL wined3d_surface_bltfast(struct wined3d_surface *dst_surface, DWORD dst_x, DWORD dst_y,
1719         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD trans)
1720 {
1721     RECT src_rect, dst_rect;
1722     DWORD flags = 0;
1723
1724     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect_in %s, trans %#x.\n",
1725             dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
1726
1727     surface_get_rect(src_surface, src_rect_in, &src_rect);
1728
1729     dst_rect.left = dst_x;
1730     dst_rect.top = dst_y;
1731     dst_rect.right = dst_x + src_rect.right - src_rect.left;
1732     dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
1733
1734     if (trans & WINEDDBLTFAST_SRCCOLORKEY)
1735         flags |= WINEDDBLT_KEYSRC;
1736     if (trans & WINEDDBLTFAST_DESTCOLORKEY)
1737         flags |= WINEDDBLT_KEYDEST;
1738     if (trans & WINEDDBLTFAST_WAIT)
1739         flags |= WINEDDBLT_WAIT;
1740     if (trans & WINEDDBLTFAST_DONOTWAIT)
1741         flags |= WINEDDBLT_DONOTWAIT;
1742
1743     return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT);
1744 }
1745
1746 static HRESULT surface_set_mem(struct wined3d_surface *surface, void *mem)
1747 {
1748     TRACE("surface %p, mem %p.\n", surface, mem);
1749
1750     if (mem && mem != surface->resource.allocatedMemory)
1751     {
1752         void *release = NULL;
1753
1754         /* Do I have to copy the old surface content? */
1755         if (surface->flags & SFLAG_DIBSECTION)
1756         {
1757             SelectObject(surface->hDC, surface->dib.holdbitmap);
1758             DeleteDC(surface->hDC);
1759             /* Release the DIB section. */
1760             DeleteObject(surface->dib.DIBsection);
1761             surface->dib.bitmap_data = NULL;
1762             surface->resource.allocatedMemory = NULL;
1763             surface->hDC = NULL;
1764             surface->flags &= ~SFLAG_DIBSECTION;
1765         }
1766         else if (!(surface->flags & SFLAG_USERPTR))
1767         {
1768             release = surface->resource.heapMemory;
1769             surface->resource.heapMemory = NULL;
1770         }
1771         surface->resource.allocatedMemory = mem;
1772         surface->flags |= SFLAG_USERPTR;
1773
1774         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
1775         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1776
1777         /* For client textures OpenGL has to be notified. */
1778         if (surface->flags & SFLAG_CLIENT)
1779             surface_release_client_storage(surface);
1780
1781         /* Now free the old memory if any. */
1782         HeapFree(GetProcessHeap(), 0, release);
1783     }
1784     else if (surface->flags & SFLAG_USERPTR)
1785     {
1786         /* Map and GetDC will re-create the dib section and allocated memory. */
1787         surface->resource.allocatedMemory = NULL;
1788         /* HeapMemory should be NULL already. */
1789         if (surface->resource.heapMemory)
1790             ERR("User pointer surface has heap memory allocated.\n");
1791         surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
1792
1793         if (surface->flags & SFLAG_CLIENT)
1794             surface_release_client_storage(surface);
1795
1796         surface_prepare_system_memory(surface);
1797         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
1798     }
1799
1800     return WINED3D_OK;
1801 }
1802
1803 /* Context activation is done by the caller. */
1804 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1805 {
1806     if (!surface->resource.heapMemory)
1807     {
1808         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1809         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1810                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1811     }
1812
1813     ENTER_GL();
1814     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1815     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1816     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1817             surface->resource.size, surface->resource.allocatedMemory));
1818     checkGLcall("glGetBufferSubDataARB");
1819     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1820     checkGLcall("glDeleteBuffersARB");
1821     LEAVE_GL();
1822
1823     surface->pbo = 0;
1824     surface->flags &= ~SFLAG_PBO;
1825 }
1826
1827 /* Do not call while under the GL lock. */
1828 static void surface_unload(struct wined3d_resource *resource)
1829 {
1830     struct wined3d_surface *surface = surface_from_resource(resource);
1831     struct wined3d_renderbuffer_entry *entry, *entry2;
1832     struct wined3d_device *device = resource->device;
1833     const struct wined3d_gl_info *gl_info;
1834     struct wined3d_context *context;
1835
1836     TRACE("surface %p.\n", surface);
1837
1838     if (resource->pool == WINED3DPOOL_DEFAULT)
1839     {
1840         /* Default pool resources are supposed to be destroyed before Reset is called.
1841          * Implicit resources stay however. So this means we have an implicit render target
1842          * or depth stencil. The content may be destroyed, but we still have to tear down
1843          * opengl resources, so we cannot leave early.
1844          *
1845          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1846          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1847          * or the depth stencil into an FBO the texture or render buffer will be removed
1848          * and all flags get lost
1849          */
1850         surface_init_sysmem(surface);
1851     }
1852     else
1853     {
1854         /* Load the surface into system memory */
1855         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1856         surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
1857     }
1858     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1859     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1860     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1861
1862     context = context_acquire(device, NULL);
1863     gl_info = context->gl_info;
1864
1865     /* Destroy PBOs, but load them into real sysmem before */
1866     if (surface->flags & SFLAG_PBO)
1867         surface_remove_pbo(surface, gl_info);
1868
1869     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1870      * all application-created targets the application has to release the surface
1871      * before calling _Reset
1872      */
1873     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1874     {
1875         ENTER_GL();
1876         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1877         LEAVE_GL();
1878         list_remove(&entry->entry);
1879         HeapFree(GetProcessHeap(), 0, entry);
1880     }
1881     list_init(&surface->renderbuffers);
1882     surface->current_renderbuffer = NULL;
1883
1884     /* If we're in a texture, the texture name belongs to the texture.
1885      * Otherwise, destroy it. */
1886     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1887     {
1888         ENTER_GL();
1889         glDeleteTextures(1, &surface->texture_name);
1890         surface->texture_name = 0;
1891         glDeleteTextures(1, &surface->texture_name_srgb);
1892         surface->texture_name_srgb = 0;
1893         LEAVE_GL();
1894     }
1895
1896     context_release(context);
1897
1898     resource_unload(resource);
1899 }
1900
1901 static const struct wined3d_resource_ops surface_resource_ops =
1902 {
1903     surface_unload,
1904 };
1905
1906 static const struct wined3d_surface_ops surface_ops =
1907 {
1908     surface_private_setup,
1909     surface_cleanup,
1910     surface_realize_palette,
1911     surface_draw_overlay,
1912     surface_preload,
1913     surface_map,
1914     surface_unmap,
1915     surface_getdc,
1916     surface_flip,
1917     surface_set_mem,
1918 };
1919
1920 /*****************************************************************************
1921  * Initializes the GDI surface, aka creates the DIB section we render to
1922  * The DIB section creation is done by calling GetDC, which will create the
1923  * section and releasing the dc to allow the app to use it. The dib section
1924  * will stay until the surface is released
1925  *
1926  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1927  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1928  * avoid confusion in the shared surface code.
1929  *
1930  * Returns:
1931  *  WINED3D_OK on success
1932  *  The return values of called methods on failure
1933  *
1934  *****************************************************************************/
1935 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1936 {
1937     HRESULT hr;
1938
1939     TRACE("surface %p.\n", surface);
1940
1941     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1942     {
1943         ERR("Overlays not yet supported by GDI surfaces.\n");
1944         return WINED3DERR_INVALIDCALL;
1945     }
1946
1947     /* Sysmem textures have memory already allocated - release it,
1948      * this avoids an unnecessary memcpy. */
1949     hr = surface_create_dib_section(surface);
1950     if (SUCCEEDED(hr))
1951     {
1952         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1953         surface->resource.heapMemory = NULL;
1954         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1955     }
1956
1957     /* We don't mind the nonpow2 stuff in GDI. */
1958     surface->pow2Width = surface->resource.width;
1959     surface->pow2Height = surface->resource.height;
1960
1961     return WINED3D_OK;
1962 }
1963
1964 static void surface_gdi_cleanup(struct wined3d_surface *surface)
1965 {
1966     TRACE("surface %p.\n", surface);
1967
1968     if (surface->flags & SFLAG_DIBSECTION)
1969     {
1970         /* Release the DC. */
1971         SelectObject(surface->hDC, surface->dib.holdbitmap);
1972         DeleteDC(surface->hDC);
1973         /* Release the DIB section. */
1974         DeleteObject(surface->dib.DIBsection);
1975         surface->dib.bitmap_data = NULL;
1976         surface->resource.allocatedMemory = NULL;
1977     }
1978
1979     if (surface->flags & SFLAG_USERPTR)
1980         wined3d_surface_set_mem(surface, NULL);
1981     if (surface->overlay_dest)
1982         list_remove(&surface->overlay_entry);
1983
1984     HeapFree(GetProcessHeap(), 0, surface->palette9);
1985
1986     resource_cleanup(&surface->resource);
1987 }
1988
1989 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1990 {
1991     struct wined3d_palette *palette = surface->palette;
1992
1993     TRACE("surface %p.\n", surface);
1994
1995     if (!palette) return;
1996
1997     if (surface->flags & SFLAG_DIBSECTION)
1998     {
1999         RGBQUAD col[256];
2000         unsigned int i;
2001
2002         TRACE("Updating the DC's palette.\n");
2003
2004         for (i = 0; i < 256; ++i)
2005         {
2006             col[i].rgbRed = palette->palents[i].peRed;
2007             col[i].rgbGreen = palette->palents[i].peGreen;
2008             col[i].rgbBlue = palette->palents[i].peBlue;
2009             col[i].rgbReserved = 0;
2010         }
2011         SetDIBColorTable(surface->hDC, 0, 256, col);
2012     }
2013
2014     /* Update the image because of the palette change. Some games like e.g.
2015      * Red Alert call SetEntries a lot to implement fading. */
2016     /* Tell the swapchain to update the screen. */
2017     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2018     {
2019         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2020         if (surface == swapchain->front_buffer)
2021         {
2022             x11_copy_to_screen(swapchain, NULL);
2023         }
2024     }
2025 }
2026
2027 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
2028 {
2029     FIXME("GDI surfaces can't draw overlays yet.\n");
2030     return E_FAIL;
2031 }
2032
2033 static void gdi_surface_preload(struct wined3d_surface *surface)
2034 {
2035     TRACE("surface %p.\n", surface);
2036
2037     ERR("Preloading GDI surfaces is not supported.\n");
2038 }
2039
2040 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2041 {
2042     TRACE("surface %p, rect %s, flags %#x.\n",
2043             surface, wine_dbgstr_rect(rect), flags);
2044
2045     if (!surface->resource.allocatedMemory)
2046     {
2047         /* This happens on gdi surfaces if the application set a user pointer
2048          * and resets it. Recreate the DIB section. */
2049         surface_create_dib_section(surface);
2050         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2051     }
2052 }
2053
2054 static void gdi_surface_unmap(struct wined3d_surface *surface)
2055 {
2056     TRACE("surface %p.\n", surface);
2057
2058     /* Tell the swapchain to update the screen. */
2059     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2060     {
2061         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2062         if (surface == swapchain->front_buffer)
2063         {
2064             x11_copy_to_screen(swapchain, &surface->lockedRect);
2065         }
2066     }
2067
2068     memset(&surface->lockedRect, 0, sizeof(RECT));
2069 }
2070
2071 static HRESULT gdi_surface_getdc(struct wined3d_surface *surface)
2072 {
2073     WINED3DLOCKED_RECT lock;
2074     HRESULT hr;
2075
2076     TRACE("surface %p.\n", surface);
2077
2078     /* Should have a DIB section already. */
2079     if (!(surface->flags & SFLAG_DIBSECTION))
2080     {
2081         WARN("DC not supported on this surface\n");
2082         return WINED3DERR_INVALIDCALL;
2083     }
2084
2085     /* Map the surface. */
2086     hr = wined3d_surface_map(surface, &lock, NULL, 0);
2087     if (FAILED(hr))
2088         ERR("Map failed, hr %#x.\n", hr);
2089
2090     return hr;
2091 }
2092
2093 static HRESULT gdi_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override)
2094 {
2095     TRACE("surface %p, override %p.\n", surface, override);
2096
2097     return WINED3D_OK;
2098 }
2099
2100 static HRESULT gdi_surface_set_mem(struct wined3d_surface *surface, void *mem)
2101 {
2102     TRACE("surface %p, mem %p.\n", surface, mem);
2103
2104     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
2105     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
2106     {
2107         ERR("Not supported on render targets.\n");
2108         return WINED3DERR_INVALIDCALL;
2109     }
2110
2111     if (mem && mem != surface->resource.allocatedMemory)
2112     {
2113         void *release = NULL;
2114
2115         /* Do I have to copy the old surface content? */
2116         if (surface->flags & SFLAG_DIBSECTION)
2117         {
2118             SelectObject(surface->hDC, surface->dib.holdbitmap);
2119             DeleteDC(surface->hDC);
2120             /* Release the DIB section. */
2121             DeleteObject(surface->dib.DIBsection);
2122             surface->dib.bitmap_data = NULL;
2123             surface->resource.allocatedMemory = NULL;
2124             surface->hDC = NULL;
2125             surface->flags &= ~SFLAG_DIBSECTION;
2126         }
2127         else if (!(surface->flags & SFLAG_USERPTR))
2128         {
2129             release = surface->resource.allocatedMemory;
2130         }
2131         surface->resource.allocatedMemory = mem;
2132         surface->flags |= SFLAG_USERPTR | SFLAG_INSYSMEM;
2133
2134         /* Now free the old memory, if any. */
2135         HeapFree(GetProcessHeap(), 0, release);
2136     }
2137     else if (surface->flags & SFLAG_USERPTR)
2138     {
2139         /* Map() and GetDC() will re-create the dib section and allocated memory. */
2140         surface->resource.allocatedMemory = NULL;
2141         surface->flags &= ~SFLAG_USERPTR;
2142     }
2143
2144     return WINED3D_OK;
2145 }
2146
2147 static const struct wined3d_surface_ops gdi_surface_ops =
2148 {
2149     gdi_surface_private_setup,
2150     surface_gdi_cleanup,
2151     gdi_surface_realize_palette,
2152     gdi_surface_draw_overlay,
2153     gdi_surface_preload,
2154     gdi_surface_map,
2155     gdi_surface_unmap,
2156     gdi_surface_getdc,
2157     gdi_surface_flip,
2158     gdi_surface_set_mem,
2159 };
2160
2161 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2162 {
2163     GLuint *name;
2164     DWORD flag;
2165
2166     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2167
2168     if(srgb)
2169     {
2170         name = &surface->texture_name_srgb;
2171         flag = SFLAG_INSRGBTEX;
2172     }
2173     else
2174     {
2175         name = &surface->texture_name;
2176         flag = SFLAG_INTEXTURE;
2177     }
2178
2179     if (!*name && new_name)
2180     {
2181         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2182          * surface has no texture name yet. See if we can get rid of this. */
2183         if (surface->flags & flag)
2184             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2185         surface_modify_location(surface, flag, FALSE);
2186     }
2187
2188     *name = new_name;
2189     surface_force_reload(surface);
2190 }
2191
2192 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2193 {
2194     TRACE("surface %p, target %#x.\n", surface, target);
2195
2196     if (surface->texture_target != target)
2197     {
2198         if (target == GL_TEXTURE_RECTANGLE_ARB)
2199         {
2200             surface->flags &= ~SFLAG_NORMCOORD;
2201         }
2202         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2203         {
2204             surface->flags |= SFLAG_NORMCOORD;
2205         }
2206     }
2207     surface->texture_target = target;
2208     surface_force_reload(surface);
2209 }
2210
2211 /* Context activation is done by the caller. */
2212 void surface_bind(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
2213 {
2214     TRACE("surface %p, gl_info %p, srgb %#x.\n", surface, gl_info, srgb);
2215
2216     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2217     {
2218         struct wined3d_texture *texture = surface->container.u.texture;
2219
2220         TRACE("Passing to container (%p).\n", texture);
2221         texture->texture_ops->texture_bind(texture, gl_info, srgb);
2222     }
2223     else
2224     {
2225         if (surface->texture_level)
2226         {
2227             ERR("Standalone surface %p is non-zero texture level %u.\n",
2228                     surface, surface->texture_level);
2229         }
2230
2231         if (srgb)
2232             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2233
2234         ENTER_GL();
2235
2236         if (!surface->texture_name)
2237         {
2238             glGenTextures(1, &surface->texture_name);
2239             checkGLcall("glGenTextures");
2240
2241             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2242
2243             glBindTexture(surface->texture_target, surface->texture_name);
2244             checkGLcall("glBindTexture");
2245             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2246             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2247             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2248             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2249             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2250             checkGLcall("glTexParameteri");
2251         }
2252         else
2253         {
2254             glBindTexture(surface->texture_target, surface->texture_name);
2255             checkGLcall("glBindTexture");
2256         }
2257
2258         LEAVE_GL();
2259     }
2260 }
2261
2262 /* This call just downloads data, the caller is responsible for binding the
2263  * correct texture. */
2264 /* Context activation is done by the caller. */
2265 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2266 {
2267     const struct wined3d_format *format = surface->resource.format;
2268
2269     /* Only support read back of converted P8 surfaces. */
2270     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2271     {
2272         FIXME("Readback conversion not supported for format %s.\n", debug_d3dformat(format->id));
2273         return;
2274     }
2275
2276     ENTER_GL();
2277
2278     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2279     {
2280         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2281                 surface, surface->texture_level, format->glFormat, format->glType,
2282                 surface->resource.allocatedMemory);
2283
2284         if (surface->flags & SFLAG_PBO)
2285         {
2286             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2287             checkGLcall("glBindBufferARB");
2288             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2289             checkGLcall("glGetCompressedTexImageARB");
2290             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2291             checkGLcall("glBindBufferARB");
2292         }
2293         else
2294         {
2295             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2296                     surface->texture_level, surface->resource.allocatedMemory));
2297             checkGLcall("glGetCompressedTexImageARB");
2298         }
2299
2300         LEAVE_GL();
2301     }
2302     else
2303     {
2304         void *mem;
2305         GLenum gl_format = format->glFormat;
2306         GLenum gl_type = format->glType;
2307         int src_pitch = 0;
2308         int dst_pitch = 0;
2309
2310         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2311         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2312         {
2313             gl_format = GL_ALPHA;
2314             gl_type = GL_UNSIGNED_BYTE;
2315         }
2316
2317         if (surface->flags & SFLAG_NONPOW2)
2318         {
2319             unsigned char alignment = surface->resource.device->surface_alignment;
2320             src_pitch = format->byte_count * surface->pow2Width;
2321             dst_pitch = wined3d_surface_get_pitch(surface);
2322             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2323             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2324         }
2325         else
2326         {
2327             mem = surface->resource.allocatedMemory;
2328         }
2329
2330         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2331                 surface, surface->texture_level, gl_format, gl_type, mem);
2332
2333         if (surface->flags & SFLAG_PBO)
2334         {
2335             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2336             checkGLcall("glBindBufferARB");
2337
2338             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2339             checkGLcall("glGetTexImage");
2340
2341             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2342             checkGLcall("glBindBufferARB");
2343         }
2344         else
2345         {
2346             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2347             checkGLcall("glGetTexImage");
2348         }
2349         LEAVE_GL();
2350
2351         if (surface->flags & SFLAG_NONPOW2)
2352         {
2353             const BYTE *src_data;
2354             BYTE *dst_data;
2355             UINT y;
2356             /*
2357              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2358              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2359              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2360              *
2361              * We're doing this...
2362              *
2363              * instead of boxing the texture :
2364              * |<-texture width ->|  -->pow2width|   /\
2365              * |111111111111111111|              |   |
2366              * |222 Texture 222222| boxed empty  | texture height
2367              * |3333 Data 33333333|              |   |
2368              * |444444444444444444|              |   \/
2369              * -----------------------------------   |
2370              * |     boxed  empty | boxed empty  | pow2height
2371              * |                  |              |   \/
2372              * -----------------------------------
2373              *
2374              *
2375              * we're repacking the data to the expected texture width
2376              *
2377              * |<-texture width ->|  -->pow2width|   /\
2378              * |111111111111111111222222222222222|   |
2379              * |222333333333333333333444444444444| texture height
2380              * |444444                           |   |
2381              * |                                 |   \/
2382              * |                                 |   |
2383              * |            empty                | pow2height
2384              * |                                 |   \/
2385              * -----------------------------------
2386              *
2387              * == is the same as
2388              *
2389              * |<-texture width ->|    /\
2390              * |111111111111111111|
2391              * |222222222222222222|texture height
2392              * |333333333333333333|
2393              * |444444444444444444|    \/
2394              * --------------------
2395              *
2396              * this also means that any references to allocatedMemory should work with the data as if were a
2397              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2398              *
2399              * internally the texture is still stored in a boxed format so any references to textureName will
2400              * get a boxed texture with width pow2width and not a texture of width resource.width.
2401              *
2402              * Performance should not be an issue, because applications normally do not lock the surfaces when
2403              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2404              * and doesn't have to be re-read. */
2405             src_data = mem;
2406             dst_data = surface->resource.allocatedMemory;
2407             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2408             for (y = 1; y < surface->resource.height; ++y)
2409             {
2410                 /* skip the first row */
2411                 src_data += src_pitch;
2412                 dst_data += dst_pitch;
2413                 memcpy(dst_data, src_data, dst_pitch);
2414             }
2415
2416             HeapFree(GetProcessHeap(), 0, mem);
2417         }
2418     }
2419
2420     /* Surface has now been downloaded */
2421     surface->flags |= SFLAG_INSYSMEM;
2422 }
2423
2424 /* This call just uploads data, the caller is responsible for binding the
2425  * correct texture. */
2426 /* Context activation is done by the caller. */
2427 void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2428         const struct wined3d_format *format, const RECT *src_rect, UINT src_w, const POINT *dst_point,
2429         BOOL srgb, const struct wined3d_bo_address *data)
2430 {
2431     UINT update_w = src_rect->right - src_rect->left;
2432     UINT update_h = src_rect->bottom - src_rect->top;
2433
2434     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_w %u, dst_point %p, srgb %#x, data {%#x:%p}.\n",
2435             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_w,
2436             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2437
2438     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2439         update_h *= format->heightscale;
2440
2441     ENTER_GL();
2442
2443     if (data->buffer_object)
2444     {
2445         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2446         checkGLcall("glBindBufferARB");
2447     }
2448
2449     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2450     {
2451         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2452         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2453         UINT src_pitch = wined3d_format_calculate_size(format, 1, src_w, 1);
2454         const BYTE *addr = data->addr;
2455         GLenum internal;
2456
2457         addr += (src_rect->top / format->block_height) * src_pitch;
2458         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2459
2460         if (srgb)
2461             internal = format->glGammaInternal;
2462         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2463             internal = format->rtInternal;
2464         else
2465             internal = format->glInternal;
2466
2467         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2468                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2469                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2470
2471         if (row_length == src_pitch)
2472         {
2473             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2474                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2475         }
2476         else
2477         {
2478             UINT row, y;
2479
2480             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2481              * can't use the unpack row length like below. */
2482             for (row = 0, y = dst_point->y; row < row_count; ++row)
2483             {
2484                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2485                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2486                 y += format->block_height;
2487                 addr += src_pitch;
2488             }
2489         }
2490         checkGLcall("glCompressedTexSubImage2DARB");
2491     }
2492     else
2493     {
2494         const BYTE *addr = data->addr;
2495
2496         addr += src_rect->top * src_w * format->byte_count;
2497         addr += src_rect->left * format->byte_count;
2498
2499         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2500                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2501                 update_w, update_h, format->glFormat, format->glType, addr);
2502
2503         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_w);
2504         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2505                 update_w, update_h, format->glFormat, format->glType, addr);
2506         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2507         checkGLcall("glTexSubImage2D");
2508     }
2509
2510     if (data->buffer_object)
2511     {
2512         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2513         checkGLcall("glBindBufferARB");
2514     }
2515
2516     LEAVE_GL();
2517
2518     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2519     {
2520         struct wined3d_device *device = surface->resource.device;
2521         unsigned int i;
2522
2523         for (i = 0; i < device->context_count; ++i)
2524         {
2525             context_surface_update(device->contexts[i], surface);
2526         }
2527     }
2528 }
2529
2530 /* This call just allocates the texture, the caller is responsible for binding
2531  * the correct texture. */
2532 /* Context activation is done by the caller. */
2533 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2534         const struct wined3d_format *format, BOOL srgb)
2535 {
2536     BOOL enable_client_storage = FALSE;
2537     GLsizei width = surface->pow2Width;
2538     GLsizei height = surface->pow2Height;
2539     const BYTE *mem = NULL;
2540     GLenum internal;
2541
2542     if (srgb)
2543     {
2544         internal = format->glGammaInternal;
2545     }
2546     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2547     {
2548         internal = format->rtInternal;
2549     }
2550     else
2551     {
2552         internal = format->glInternal;
2553     }
2554
2555     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2556
2557     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2558             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2559             internal, width, height, format->glFormat, format->glType);
2560
2561     ENTER_GL();
2562
2563     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2564     {
2565         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2566                 || !surface->resource.allocatedMemory)
2567         {
2568             /* In some cases we want to disable client storage.
2569              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2570              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2571              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2572              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2573              */
2574             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2575             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2576             surface->flags &= ~SFLAG_CLIENT;
2577             enable_client_storage = TRUE;
2578         }
2579         else
2580         {
2581             surface->flags |= SFLAG_CLIENT;
2582
2583             /* Point OpenGL to our allocated texture memory. Do not use
2584              * resource.allocatedMemory here because it might point into a
2585              * PBO. Instead use heapMemory, but get the alignment right. */
2586             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2587                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2588         }
2589     }
2590
2591     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2592     {
2593         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2594                 internal, width, height, 0, surface->resource.size, mem));
2595         checkGLcall("glCompressedTexImage2DARB");
2596     }
2597     else
2598     {
2599         glTexImage2D(surface->texture_target, surface->texture_level,
2600                 internal, width, height, 0, format->glFormat, format->glType, mem);
2601         checkGLcall("glTexImage2D");
2602     }
2603
2604     if(enable_client_storage) {
2605         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2606         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2607     }
2608     LEAVE_GL();
2609 }
2610
2611 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2612  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2613 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2614 /* GL locking is done by the caller */
2615 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2616 {
2617     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2618     struct wined3d_renderbuffer_entry *entry;
2619     GLuint renderbuffer = 0;
2620     unsigned int src_width, src_height;
2621     unsigned int width, height;
2622
2623     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2624     {
2625         width = rt->pow2Width;
2626         height = rt->pow2Height;
2627     }
2628     else
2629     {
2630         width = surface->pow2Width;
2631         height = surface->pow2Height;
2632     }
2633
2634     src_width = surface->pow2Width;
2635     src_height = surface->pow2Height;
2636
2637     /* A depth stencil smaller than the render target is not valid */
2638     if (width > src_width || height > src_height) return;
2639
2640     /* Remove any renderbuffer set if the sizes match */
2641     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2642             || (width == src_width && height == src_height))
2643     {
2644         surface->current_renderbuffer = NULL;
2645         return;
2646     }
2647
2648     /* Look if we've already got a renderbuffer of the correct dimensions */
2649     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2650     {
2651         if (entry->width == width && entry->height == height)
2652         {
2653             renderbuffer = entry->id;
2654             surface->current_renderbuffer = entry;
2655             break;
2656         }
2657     }
2658
2659     if (!renderbuffer)
2660     {
2661         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2662         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2663         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2664                 surface->resource.format->glInternal, width, height);
2665
2666         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2667         entry->width = width;
2668         entry->height = height;
2669         entry->id = renderbuffer;
2670         list_add_head(&surface->renderbuffers, &entry->entry);
2671
2672         surface->current_renderbuffer = entry;
2673     }
2674
2675     checkGLcall("set_compatible_renderbuffer");
2676 }
2677
2678 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2679 {
2680     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2681
2682     TRACE("surface %p.\n", surface);
2683
2684     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2685     {
2686         ERR("Surface %p is not on a swapchain.\n", surface);
2687         return GL_NONE;
2688     }
2689
2690     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2691     {
2692         if (swapchain->render_to_fbo)
2693         {
2694             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2695             return GL_COLOR_ATTACHMENT0;
2696         }
2697         TRACE("Returning GL_BACK\n");
2698         return GL_BACK;
2699     }
2700     else if (surface == swapchain->front_buffer)
2701     {
2702         TRACE("Returning GL_FRONT\n");
2703         return GL_FRONT;
2704     }
2705
2706     FIXME("Higher back buffer, returning GL_BACK\n");
2707     return GL_BACK;
2708 }
2709
2710 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2711 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2712 {
2713     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2714
2715     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2716         /* No partial locking for textures yet. */
2717         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2718
2719     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2720     if (dirty_rect)
2721     {
2722         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2723         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2724         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2725         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2726     }
2727     else
2728     {
2729         surface->dirtyRect.left = 0;
2730         surface->dirtyRect.top = 0;
2731         surface->dirtyRect.right = surface->resource.width;
2732         surface->dirtyRect.bottom = surface->resource.height;
2733     }
2734
2735     /* if the container is a texture then mark it dirty. */
2736     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2737     {
2738         TRACE("Passing to container.\n");
2739         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2740     }
2741 }
2742
2743 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2744 {
2745     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2746     BOOL ck_changed;
2747
2748     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2749
2750     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2751     {
2752         ERR("Not supported on scratch surfaces.\n");
2753         return WINED3DERR_INVALIDCALL;
2754     }
2755
2756     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2757
2758     /* Reload if either the texture and sysmem have different ideas about the
2759      * color key, or the actual key values changed. */
2760     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2761             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2762             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2763     {
2764         TRACE("Reloading because of color keying\n");
2765         /* To perform the color key conversion we need a sysmem copy of
2766          * the surface. Make sure we have it. */
2767
2768         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2769         /* Make sure the texture is reloaded because of the color key change,
2770          * this kills performance though :( */
2771         /* TODO: This is not necessarily needed with hw palettized texture support. */
2772         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2773         /* Switching color keying on / off may change the internal format. */
2774         if (ck_changed)
2775             surface_force_reload(surface);
2776     }
2777     else if (!(surface->flags & flag))
2778     {
2779         TRACE("Reloading because surface is dirty.\n");
2780     }
2781     else
2782     {
2783         TRACE("surface is already in texture\n");
2784         return WINED3D_OK;
2785     }
2786
2787     /* No partial locking for textures yet. */
2788     surface_load_location(surface, flag, NULL);
2789     surface_evict_sysmem(surface);
2790
2791     return WINED3D_OK;
2792 }
2793
2794 /* See also float_16_to_32() in wined3d_private.h */
2795 static inline unsigned short float_32_to_16(const float *in)
2796 {
2797     int exp = 0;
2798     float tmp = fabsf(*in);
2799     unsigned int mantissa;
2800     unsigned short ret;
2801
2802     /* Deal with special numbers */
2803     if (*in == 0.0f)
2804         return 0x0000;
2805     if (isnan(*in))
2806         return 0x7c01;
2807     if (isinf(*in))
2808         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2809
2810     if (tmp < powf(2, 10))
2811     {
2812         do
2813         {
2814             tmp = tmp * 2.0f;
2815             exp--;
2816         } while (tmp < powf(2, 10));
2817     }
2818     else if (tmp >= powf(2, 11))
2819     {
2820         do
2821         {
2822             tmp /= 2.0f;
2823             exp++;
2824         } while (tmp >= powf(2, 11));
2825     }
2826
2827     mantissa = (unsigned int)tmp;
2828     if (tmp - mantissa >= 0.5f)
2829         ++mantissa; /* Round to nearest, away from zero. */
2830
2831     exp += 10;  /* Normalize the mantissa. */
2832     exp += 15;  /* Exponent is encoded with excess 15. */
2833
2834     if (exp > 30) /* too big */
2835     {
2836         ret = 0x7c00; /* INF */
2837     }
2838     else if (exp <= 0)
2839     {
2840         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2841         while (exp <= 0)
2842         {
2843             mantissa = mantissa >> 1;
2844             ++exp;
2845         }
2846         ret = mantissa & 0x3ff;
2847     }
2848     else
2849     {
2850         ret = (exp << 10) | (mantissa & 0x3ff);
2851     }
2852
2853     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2854     return ret;
2855 }
2856
2857 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2858 {
2859     ULONG refcount;
2860
2861     TRACE("Surface %p, container %p of type %#x.\n",
2862             surface, surface->container.u.base, surface->container.type);
2863
2864     switch (surface->container.type)
2865     {
2866         case WINED3D_CONTAINER_TEXTURE:
2867             return wined3d_texture_incref(surface->container.u.texture);
2868
2869         case WINED3D_CONTAINER_SWAPCHAIN:
2870             return wined3d_swapchain_incref(surface->container.u.swapchain);
2871
2872         default:
2873             ERR("Unhandled container type %#x.\n", surface->container.type);
2874         case WINED3D_CONTAINER_NONE:
2875             break;
2876     }
2877
2878     refcount = InterlockedIncrement(&surface->resource.ref);
2879     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2880
2881     return refcount;
2882 }
2883
2884 /* Do not call while under the GL lock. */
2885 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2886 {
2887     ULONG refcount;
2888
2889     TRACE("Surface %p, container %p of type %#x.\n",
2890             surface, surface->container.u.base, surface->container.type);
2891
2892     switch (surface->container.type)
2893     {
2894         case WINED3D_CONTAINER_TEXTURE:
2895             return wined3d_texture_decref(surface->container.u.texture);
2896
2897         case WINED3D_CONTAINER_SWAPCHAIN:
2898             return wined3d_swapchain_decref(surface->container.u.swapchain);
2899
2900         default:
2901             ERR("Unhandled container type %#x.\n", surface->container.type);
2902         case WINED3D_CONTAINER_NONE:
2903             break;
2904     }
2905
2906     refcount = InterlockedDecrement(&surface->resource.ref);
2907     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2908
2909     if (!refcount)
2910     {
2911         surface->surface_ops->surface_cleanup(surface);
2912         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2913
2914         TRACE("Destroyed surface %p.\n", surface);
2915         HeapFree(GetProcessHeap(), 0, surface);
2916     }
2917
2918     return refcount;
2919 }
2920
2921 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2922 {
2923     return resource_set_priority(&surface->resource, priority);
2924 }
2925
2926 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2927 {
2928     return resource_get_priority(&surface->resource);
2929 }
2930
2931 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2932 {
2933     TRACE("surface %p.\n", surface);
2934
2935     surface->surface_ops->surface_preload(surface);
2936 }
2937
2938 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2939 {
2940     TRACE("surface %p.\n", surface);
2941
2942     return surface->resource.parent;
2943 }
2944
2945 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2946 {
2947     TRACE("surface %p.\n", surface);
2948
2949     return &surface->resource;
2950 }
2951
2952 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2953 {
2954     TRACE("surface %p, flags %#x.\n", surface, flags);
2955
2956     switch (flags)
2957     {
2958         case WINEDDGBS_CANBLT:
2959         case WINEDDGBS_ISBLTDONE:
2960             return WINED3D_OK;
2961
2962         default:
2963             return WINED3DERR_INVALIDCALL;
2964     }
2965 }
2966
2967 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2968 {
2969     TRACE("surface %p, flags %#x.\n", surface, flags);
2970
2971     /* XXX: DDERR_INVALIDSURFACETYPE */
2972
2973     switch (flags)
2974     {
2975         case WINEDDGFS_CANFLIP:
2976         case WINEDDGFS_ISFLIPDONE:
2977             return WINED3D_OK;
2978
2979         default:
2980             return WINED3DERR_INVALIDCALL;
2981     }
2982 }
2983
2984 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2985 {
2986     TRACE("surface %p.\n", surface);
2987
2988     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2989     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2990 }
2991
2992 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2993 {
2994     TRACE("surface %p.\n", surface);
2995
2996     /* So far we don't lose anything :) */
2997     surface->flags &= ~SFLAG_LOST;
2998     return WINED3D_OK;
2999 }
3000
3001 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3002 {
3003     TRACE("surface %p, palette %p.\n", surface, palette);
3004
3005     if (surface->palette == palette)
3006     {
3007         TRACE("Nop palette change.\n");
3008         return WINED3D_OK;
3009     }
3010
3011     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3012         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3013
3014     surface->palette = palette;
3015
3016     if (palette)
3017     {
3018         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3019             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3020
3021         surface->surface_ops->surface_realize_palette(surface);
3022     }
3023
3024     return WINED3D_OK;
3025 }
3026
3027 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3028         DWORD flags, const WINEDDCOLORKEY *color_key)
3029 {
3030     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3031
3032     if (flags & WINEDDCKEY_COLORSPACE)
3033     {
3034         FIXME(" colorkey value not supported (%08x) !\n", flags);
3035         return WINED3DERR_INVALIDCALL;
3036     }
3037
3038     /* Dirtify the surface, but only if a key was changed. */
3039     if (color_key)
3040     {
3041         switch (flags & ~WINEDDCKEY_COLORSPACE)
3042         {
3043             case WINEDDCKEY_DESTBLT:
3044                 surface->DestBltCKey = *color_key;
3045                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3046                 break;
3047
3048             case WINEDDCKEY_DESTOVERLAY:
3049                 surface->DestOverlayCKey = *color_key;
3050                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3051                 break;
3052
3053             case WINEDDCKEY_SRCOVERLAY:
3054                 surface->SrcOverlayCKey = *color_key;
3055                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3056                 break;
3057
3058             case WINEDDCKEY_SRCBLT:
3059                 surface->SrcBltCKey = *color_key;
3060                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3061                 break;
3062         }
3063     }
3064     else
3065     {
3066         switch (flags & ~WINEDDCKEY_COLORSPACE)
3067         {
3068             case WINEDDCKEY_DESTBLT:
3069                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3070                 break;
3071
3072             case WINEDDCKEY_DESTOVERLAY:
3073                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3074                 break;
3075
3076             case WINEDDCKEY_SRCOVERLAY:
3077                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3078                 break;
3079
3080             case WINEDDCKEY_SRCBLT:
3081                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3082                 break;
3083         }
3084     }
3085
3086     return WINED3D_OK;
3087 }
3088
3089 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3090 {
3091     TRACE("surface %p.\n", surface);
3092
3093     return surface->palette;
3094 }
3095
3096 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3097 {
3098     const struct wined3d_format *format = surface->resource.format;
3099     DWORD pitch;
3100
3101     TRACE("surface %p.\n", surface);
3102
3103     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3104     {
3105         /* Since compressed formats are block based, pitch means the amount of
3106          * bytes to the next row of block rather than the next row of pixels. */
3107         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3108         pitch = row_block_count * format->block_byte_count;
3109     }
3110     else
3111     {
3112         unsigned char alignment = surface->resource.device->surface_alignment;
3113         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3114         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3115     }
3116
3117     TRACE("Returning %u.\n", pitch);
3118
3119     return pitch;
3120 }
3121
3122 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3123 {
3124     TRACE("surface %p, mem %p.\n", surface, mem);
3125
3126     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3127     {
3128         WARN("Surface is locked or the DC is in use.\n");
3129         return WINED3DERR_INVALIDCALL;
3130     }
3131
3132     return surface->surface_ops->surface_set_mem(surface, mem);
3133 }
3134
3135 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3136 {
3137     LONG w, h;
3138
3139     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3140
3141     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3142     {
3143         WARN("Not an overlay surface.\n");
3144         return WINEDDERR_NOTAOVERLAYSURFACE;
3145     }
3146
3147     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3148     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3149     surface->overlay_destrect.left = x;
3150     surface->overlay_destrect.top = y;
3151     surface->overlay_destrect.right = x + w;
3152     surface->overlay_destrect.bottom = y + h;
3153
3154     surface->surface_ops->surface_draw_overlay(surface);
3155
3156     return WINED3D_OK;
3157 }
3158
3159 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3160 {
3161     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3162
3163     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3164     {
3165         TRACE("Not an overlay surface.\n");
3166         return WINEDDERR_NOTAOVERLAYSURFACE;
3167     }
3168
3169     if (!surface->overlay_dest)
3170     {
3171         TRACE("Overlay not visible.\n");
3172         *x = 0;
3173         *y = 0;
3174         return WINEDDERR_OVERLAYNOTVISIBLE;
3175     }
3176
3177     *x = surface->overlay_destrect.left;
3178     *y = surface->overlay_destrect.top;
3179
3180     TRACE("Returning position %d, %d.\n", *x, *y);
3181
3182     return WINED3D_OK;
3183 }
3184
3185 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3186         DWORD flags, struct wined3d_surface *ref)
3187 {
3188     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3189
3190     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3191     {
3192         TRACE("Not an overlay surface.\n");
3193         return WINEDDERR_NOTAOVERLAYSURFACE;
3194     }
3195
3196     return WINED3D_OK;
3197 }
3198
3199 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3200         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3201 {
3202     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3203             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3204
3205     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3206     {
3207         WARN("Not an overlay surface.\n");
3208         return WINEDDERR_NOTAOVERLAYSURFACE;
3209     }
3210     else if (!dst_surface)
3211     {
3212         WARN("Dest surface is NULL.\n");
3213         return WINED3DERR_INVALIDCALL;
3214     }
3215
3216     if (src_rect)
3217     {
3218         surface->overlay_srcrect = *src_rect;
3219     }
3220     else
3221     {
3222         surface->overlay_srcrect.left = 0;
3223         surface->overlay_srcrect.top = 0;
3224         surface->overlay_srcrect.right = surface->resource.width;
3225         surface->overlay_srcrect.bottom = surface->resource.height;
3226     }
3227
3228     if (dst_rect)
3229     {
3230         surface->overlay_destrect = *dst_rect;
3231     }
3232     else
3233     {
3234         surface->overlay_destrect.left = 0;
3235         surface->overlay_destrect.top = 0;
3236         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3237         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3238     }
3239
3240     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3241     {
3242         list_remove(&surface->overlay_entry);
3243     }
3244
3245     if (flags & WINEDDOVER_SHOW)
3246     {
3247         if (surface->overlay_dest != dst_surface)
3248         {
3249             surface->overlay_dest = dst_surface;
3250             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3251         }
3252     }
3253     else if (flags & WINEDDOVER_HIDE)
3254     {
3255         /* tests show that the rectangles are erased on hide */
3256         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3257         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3258         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3259         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3260         surface->overlay_dest = NULL;
3261     }
3262
3263     surface->surface_ops->surface_draw_overlay(surface);
3264
3265     return WINED3D_OK;
3266 }
3267
3268 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3269 {
3270     TRACE("surface %p, clipper %p.\n", surface, clipper);
3271
3272     surface->clipper = clipper;
3273
3274     return WINED3D_OK;
3275 }
3276
3277 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3278 {
3279     TRACE("surface %p.\n", surface);
3280
3281     return surface->clipper;
3282 }
3283
3284 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3285 {
3286     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3287
3288     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3289
3290     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3291     {
3292         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3293         return WINED3DERR_INVALIDCALL;
3294     }
3295
3296     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3297             surface->pow2Width, surface->pow2Height);
3298     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3299     surface->resource.format = format;
3300
3301     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3302     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3303             format->glFormat, format->glInternal, format->glType);
3304
3305     return WINED3D_OK;
3306 }
3307
3308 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3309         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3310 {
3311     unsigned short *dst_s;
3312     const float *src_f;
3313     unsigned int x, y;
3314
3315     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3316
3317     for (y = 0; y < h; ++y)
3318     {
3319         src_f = (const float *)(src + y * pitch_in);
3320         dst_s = (unsigned short *) (dst + y * pitch_out);
3321         for (x = 0; x < w; ++x)
3322         {
3323             dst_s[x] = float_32_to_16(src_f + x);
3324         }
3325     }
3326 }
3327
3328 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3329         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3330 {
3331     static const unsigned char convert_5to8[] =
3332     {
3333         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3334         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3335         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3336         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3337     };
3338     static const unsigned char convert_6to8[] =
3339     {
3340         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3341         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3342         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3343         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3344         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3345         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3346         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3347         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3348     };
3349     unsigned int x, y;
3350
3351     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3352
3353     for (y = 0; y < h; ++y)
3354     {
3355         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3356         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3357         for (x = 0; x < w; ++x)
3358         {
3359             WORD pixel = src_line[x];
3360             dst_line[x] = 0xff000000
3361                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3362                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3363                     | convert_5to8[(pixel & 0x001f)];
3364         }
3365     }
3366 }
3367
3368 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3369  * in both cases we're just setting the X / Alpha channel to 0xff. */
3370 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3371         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3372 {
3373     unsigned int x, y;
3374
3375     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3376
3377     for (y = 0; y < h; ++y)
3378     {
3379         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3380         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3381
3382         for (x = 0; x < w; ++x)
3383         {
3384             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3385         }
3386     }
3387 }
3388
3389 static inline BYTE cliptobyte(int x)
3390 {
3391     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3392 }
3393
3394 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3395         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3396 {
3397     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3398     unsigned int x, y;
3399
3400     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3401
3402     for (y = 0; y < h; ++y)
3403     {
3404         const BYTE *src_line = src + y * pitch_in;
3405         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3406         for (x = 0; x < w; ++x)
3407         {
3408             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3409              *     C = Y - 16; D = U - 128; E = V - 128;
3410              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3411              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3412              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3413              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3414              * U and V are shared between the pixels. */
3415             if (!(x & 1)) /* For every even pixel, read new U and V. */
3416             {
3417                 d = (int) src_line[1] - 128;
3418                 e = (int) src_line[3] - 128;
3419                 r2 = 409 * e + 128;
3420                 g2 = - 100 * d - 208 * e + 128;
3421                 b2 = 516 * d + 128;
3422             }
3423             c2 = 298 * ((int) src_line[0] - 16);
3424             dst_line[x] = 0xff000000
3425                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3426                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3427                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3428                 /* Scale RGB values to 0..255 range,
3429                  * then clip them if still not in range (may be negative),
3430                  * then shift them within DWORD if necessary. */
3431             src_line += 2;
3432         }
3433     }
3434 }
3435
3436 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3437         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3438 {
3439     unsigned int x, y;
3440     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3441
3442     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3443
3444     for (y = 0; y < h; ++y)
3445     {
3446         const BYTE *src_line = src + y * pitch_in;
3447         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3448         for (x = 0; x < w; ++x)
3449         {
3450             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3451              *     C = Y - 16; D = U - 128; E = V - 128;
3452              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3453              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3454              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3455              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3456              * U and V are shared between the pixels. */
3457             if (!(x & 1)) /* For every even pixel, read new U and V. */
3458             {
3459                 d = (int) src_line[1] - 128;
3460                 e = (int) src_line[3] - 128;
3461                 r2 = 409 * e + 128;
3462                 g2 = - 100 * d - 208 * e + 128;
3463                 b2 = 516 * d + 128;
3464             }
3465             c2 = 298 * ((int) src_line[0] - 16);
3466             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3467                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3468                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3469                 /* Scale RGB values to 0..255 range,
3470                  * then clip them if still not in range (may be negative),
3471                  * then shift them within DWORD if necessary. */
3472             src_line += 2;
3473         }
3474     }
3475 }
3476
3477 struct d3dfmt_convertor_desc
3478 {
3479     enum wined3d_format_id from, to;
3480     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3481 };
3482
3483 static const struct d3dfmt_convertor_desc convertors[] =
3484 {
3485     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3486     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3487     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3488     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3489     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3490     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3491 };
3492
3493 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3494         enum wined3d_format_id to)
3495 {
3496     unsigned int i;
3497
3498     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3499     {
3500         if (convertors[i].from == from && convertors[i].to == to)
3501             return &convertors[i];
3502     }
3503
3504     return NULL;
3505 }
3506
3507 /*****************************************************************************
3508  * surface_convert_format
3509  *
3510  * Creates a duplicate of a surface in a different format. Is used by Blt to
3511  * blit between surfaces with different formats.
3512  *
3513  * Parameters
3514  *  source: Source surface
3515  *  fmt: Requested destination format
3516  *
3517  *****************************************************************************/
3518 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3519 {
3520     const struct d3dfmt_convertor_desc *conv;
3521     WINED3DLOCKED_RECT lock_src, lock_dst;
3522     struct wined3d_surface *ret = NULL;
3523     HRESULT hr;
3524
3525     conv = find_convertor(source->resource.format->id, to_fmt);
3526     if (!conv)
3527     {
3528         FIXME("Cannot find a conversion function from format %s to %s.\n",
3529                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3530         return NULL;
3531     }
3532
3533     wined3d_surface_create(source->resource.device, source->resource.width,
3534             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3535             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3536             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3537     if (!ret)
3538     {
3539         ERR("Failed to create a destination surface for conversion.\n");
3540         return NULL;
3541     }
3542
3543     memset(&lock_src, 0, sizeof(lock_src));
3544     memset(&lock_dst, 0, sizeof(lock_dst));
3545
3546     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3547     if (FAILED(hr))
3548     {
3549         ERR("Failed to lock the source surface.\n");
3550         wined3d_surface_decref(ret);
3551         return NULL;
3552     }
3553     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3554     if (FAILED(hr))
3555     {
3556         ERR("Failed to lock the destination surface.\n");
3557         wined3d_surface_unmap(source);
3558         wined3d_surface_decref(ret);
3559         return NULL;
3560     }
3561
3562     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3563             source->resource.width, source->resource.height);
3564
3565     wined3d_surface_unmap(ret);
3566     wined3d_surface_unmap(source);
3567
3568     return ret;
3569 }
3570
3571 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3572         unsigned int bpp, UINT pitch, DWORD color)
3573 {
3574     BYTE *first;
3575     int x, y;
3576
3577     /* Do first row */
3578
3579 #define COLORFILL_ROW(type) \
3580 do { \
3581     type *d = (type *)buf; \
3582     for (x = 0; x < width; ++x) \
3583         d[x] = (type)color; \
3584 } while(0)
3585
3586     switch (bpp)
3587     {
3588         case 1:
3589             COLORFILL_ROW(BYTE);
3590             break;
3591
3592         case 2:
3593             COLORFILL_ROW(WORD);
3594             break;
3595
3596         case 3:
3597         {
3598             BYTE *d = buf;
3599             for (x = 0; x < width; ++x, d += 3)
3600             {
3601                 d[0] = (color      ) & 0xFF;
3602                 d[1] = (color >>  8) & 0xFF;
3603                 d[2] = (color >> 16) & 0xFF;
3604             }
3605             break;
3606         }
3607         case 4:
3608             COLORFILL_ROW(DWORD);
3609             break;
3610
3611         default:
3612             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3613             return WINED3DERR_NOTAVAILABLE;
3614     }
3615
3616 #undef COLORFILL_ROW
3617
3618     /* Now copy first row. */
3619     first = buf;
3620     for (y = 1; y < height; ++y)
3621     {
3622         buf += pitch;
3623         memcpy(buf, first, width * bpp);
3624     }
3625
3626     return WINED3D_OK;
3627 }
3628
3629 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3630 {
3631     TRACE("surface %p.\n", surface);
3632
3633     if (!(surface->flags & SFLAG_LOCKED))
3634     {
3635         WARN("Trying to unmap unmapped surface.\n");
3636         return WINEDDERR_NOTLOCKED;
3637     }
3638     surface->flags &= ~SFLAG_LOCKED;
3639
3640     surface->surface_ops->surface_unmap(surface);
3641
3642     return WINED3D_OK;
3643 }
3644
3645 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3646         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3647 {
3648     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3649             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3650
3651     if (surface->flags & SFLAG_LOCKED)
3652     {
3653         WARN("Surface is already mapped.\n");
3654         return WINED3DERR_INVALIDCALL;
3655     }
3656     surface->flags |= SFLAG_LOCKED;
3657
3658     if (!(surface->flags & SFLAG_LOCKABLE))
3659         WARN("Trying to lock unlockable surface.\n");
3660
3661     surface->surface_ops->surface_map(surface, rect, flags);
3662
3663     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3664
3665     if (!rect)
3666     {
3667         locked_rect->pBits = surface->resource.allocatedMemory;
3668         surface->lockedRect.left = 0;
3669         surface->lockedRect.top = 0;
3670         surface->lockedRect.right = surface->resource.width;
3671         surface->lockedRect.bottom = surface->resource.height;
3672     }
3673     else
3674     {
3675         const struct wined3d_format *format = surface->resource.format;
3676
3677         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3678         {
3679             /* Compressed textures are block based, so calculate the offset of
3680              * the block that contains the top-left pixel of the locked rectangle. */
3681             locked_rect->pBits = surface->resource.allocatedMemory
3682                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3683                     + ((rect->left / format->block_width) * format->block_byte_count);
3684         }
3685         else
3686         {
3687             locked_rect->pBits = surface->resource.allocatedMemory
3688                     + (locked_rect->Pitch * rect->top)
3689                     + (rect->left * format->byte_count);
3690         }
3691         surface->lockedRect.left = rect->left;
3692         surface->lockedRect.top = rect->top;
3693         surface->lockedRect.right = rect->right;
3694         surface->lockedRect.bottom = rect->bottom;
3695     }
3696
3697     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3698     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3699
3700     return WINED3D_OK;
3701 }
3702
3703 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3704 {
3705     HRESULT hr;
3706
3707     TRACE("surface %p, dc %p.\n", surface, dc);
3708
3709     if (surface->flags & SFLAG_USERPTR)
3710     {
3711         ERR("Not supported on surfaces with application-provided memory.\n");
3712         return WINEDDERR_NODC;
3713     }
3714
3715     /* Give more detailed info for ddraw. */
3716     if (surface->flags & SFLAG_DCINUSE)
3717         return WINEDDERR_DCALREADYCREATED;
3718
3719     /* Can't GetDC if the surface is locked. */
3720     if (surface->flags & SFLAG_LOCKED)
3721         return WINED3DERR_INVALIDCALL;
3722
3723     hr = surface->surface_ops->surface_getdc(surface);
3724     if (FAILED(hr))
3725         return hr;
3726
3727     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3728             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3729     {
3730         /* GetDC on palettized formats is unsupported in D3D9, and the method
3731          * is missing in D3D8, so this should only be used for DX <=7
3732          * surfaces (with non-device palettes). */
3733         const PALETTEENTRY *pal = NULL;
3734
3735         if (surface->palette)
3736         {
3737             pal = surface->palette->palents;
3738         }
3739         else
3740         {
3741             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3742             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3743
3744             if (dds_primary && dds_primary->palette)
3745                 pal = dds_primary->palette->palents;
3746         }
3747
3748         if (pal)
3749         {
3750             RGBQUAD col[256];
3751             unsigned int i;
3752
3753             for (i = 0; i < 256; ++i)
3754             {
3755                 col[i].rgbRed = pal[i].peRed;
3756                 col[i].rgbGreen = pal[i].peGreen;
3757                 col[i].rgbBlue = pal[i].peBlue;
3758                 col[i].rgbReserved = 0;
3759             }
3760             SetDIBColorTable(surface->hDC, 0, 256, col);
3761         }
3762     }
3763
3764     surface->flags |= SFLAG_DCINUSE;
3765
3766     *dc = surface->hDC;
3767     TRACE("Returning dc %p.\n", *dc);
3768
3769     return WINED3D_OK;
3770 }
3771
3772 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3773 {
3774     TRACE("surface %p, dc %p.\n", surface, dc);
3775
3776     if (!(surface->flags & SFLAG_DCINUSE))
3777         return WINEDDERR_NODC;
3778
3779     if (surface->hDC != dc)
3780     {
3781         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3782                 dc, surface->hDC);
3783         return WINEDDERR_NODC;
3784     }
3785
3786     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3787     {
3788         /* Copy the contents of the DIB over to the PBO. */
3789         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
3790     }
3791
3792     /* We locked first, so unlock now. */
3793     wined3d_surface_unmap(surface);
3794
3795     surface->flags &= ~SFLAG_DCINUSE;
3796
3797     return WINED3D_OK;
3798 }
3799
3800 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3801 {
3802     struct wined3d_swapchain *swapchain;
3803     HRESULT hr;
3804
3805     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3806
3807     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
3808     {
3809         ERR("Flipped surface is not on a swapchain.\n");
3810         return WINEDDERR_NOTFLIPPABLE;
3811     }
3812     swapchain = surface->container.u.swapchain;
3813
3814     hr = surface->surface_ops->surface_flip(surface, override);
3815     if (FAILED(hr))
3816         return hr;
3817
3818     /* Just overwrite the swapchain presentation interval. This is ok because
3819      * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
3820      * specify the presentation interval. */
3821     if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
3822         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
3823     else if (flags & WINEDDFLIP_NOVSYNC)
3824         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
3825     else if (flags & WINEDDFLIP_INTERVAL2)
3826         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
3827     else if (flags & WINEDDFLIP_INTERVAL3)
3828         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
3829     else
3830         swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
3831
3832     return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
3833 }
3834
3835 /* Do not call while under the GL lock. */
3836 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3837 {
3838     struct wined3d_device *device = surface->resource.device;
3839
3840     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3841
3842     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3843     {
3844         struct wined3d_texture *texture = surface->container.u.texture;
3845
3846         TRACE("Passing to container (%p).\n", texture);
3847         texture->texture_ops->texture_preload(texture, srgb);
3848     }
3849     else
3850     {
3851         struct wined3d_context *context = NULL;
3852
3853         TRACE("(%p) : About to load surface\n", surface);
3854
3855         if (!device->isInDraw) context = context_acquire(device, NULL);
3856
3857         if (surface->resource.format->id == WINED3DFMT_P8_UINT
3858                 || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3859         {
3860             if (palette9_changed(surface))
3861             {
3862                 TRACE("Reloading surface because the d3d8/9 palette was changed\n");
3863                 /* TODO: This is not necessarily needed with hw palettized texture support */
3864                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3865                 /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
3866                 surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
3867             }
3868         }
3869
3870         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3871
3872         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3873         {
3874             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3875             GLclampf tmp;
3876             tmp = 0.9f;
3877             ENTER_GL();
3878             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3879             LEAVE_GL();
3880         }
3881
3882         if (context) context_release(context);
3883     }
3884 }
3885
3886 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3887 {
3888     if (!surface->resource.allocatedMemory)
3889     {
3890         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3891                 surface->resource.size + RESOURCE_ALIGNMENT);
3892         if (!surface->resource.heapMemory)
3893         {
3894             ERR("Out of memory\n");
3895             return FALSE;
3896         }
3897         surface->resource.allocatedMemory =
3898             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3899     }
3900     else
3901     {
3902         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3903     }
3904
3905     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3906
3907     return TRUE;
3908 }
3909
3910 /* Read the framebuffer back into the surface */
3911 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
3912 {
3913     struct wined3d_device *device = surface->resource.device;
3914     const struct wined3d_gl_info *gl_info;
3915     struct wined3d_context *context;
3916     BYTE *mem;
3917     GLint fmt;
3918     GLint type;
3919     BYTE *row, *top, *bottom;
3920     int i;
3921     BOOL bpp;
3922     RECT local_rect;
3923     BOOL srcIsUpsideDown;
3924     GLint rowLen = 0;
3925     GLint skipPix = 0;
3926     GLint skipRow = 0;
3927
3928     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
3929         static BOOL warned = FALSE;
3930         if(!warned) {
3931             ERR("The application tries to lock the render target, but render target locking is disabled\n");
3932             warned = TRUE;
3933         }
3934         return;
3935     }
3936
3937     context = context_acquire(device, surface);
3938     context_apply_blit_state(context, device);
3939     gl_info = context->gl_info;
3940
3941     ENTER_GL();
3942
3943     /* Select the correct read buffer, and give some debug output.
3944      * There is no need to keep track of the current read buffer or reset it, every part of the code
3945      * that reads sets the read buffer as desired.
3946      */
3947     if (surface_is_offscreen(surface))
3948     {
3949         /* Mapping the primary render target which is not on a swapchain.
3950          * Read from the back buffer. */
3951         TRACE("Mapping offscreen render target.\n");
3952         glReadBuffer(device->offscreenBuffer);
3953         srcIsUpsideDown = TRUE;
3954     }
3955     else
3956     {
3957         /* Onscreen surfaces are always part of a swapchain */
3958         GLenum buffer = surface_get_gl_buffer(surface);
3959         TRACE("Mapping %#x buffer.\n", buffer);
3960         glReadBuffer(buffer);
3961         checkGLcall("glReadBuffer");
3962         srcIsUpsideDown = FALSE;
3963     }
3964
3965     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
3966     if (!rect)
3967     {
3968         local_rect.left = 0;
3969         local_rect.top = 0;
3970         local_rect.right = surface->resource.width;
3971         local_rect.bottom = surface->resource.height;
3972     }
3973     else
3974     {
3975         local_rect = *rect;
3976     }
3977     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
3978
3979     switch (surface->resource.format->id)
3980     {
3981         case WINED3DFMT_P8_UINT:
3982         {
3983             if (primary_render_target_is_p8(device))
3984             {
3985                 /* In case of P8 render targets the index is stored in the alpha component */
3986                 fmt = GL_ALPHA;
3987                 type = GL_UNSIGNED_BYTE;
3988                 mem = dest;
3989                 bpp = surface->resource.format->byte_count;
3990             }
3991             else
3992             {
3993                 /* GL can't return palettized data, so read ARGB pixels into a
3994                  * separate block of memory and convert them into palettized format
3995                  * in software. Slow, but if the app means to use palettized render
3996                  * targets and locks it...
3997                  *
3998                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
3999                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4000                  * for the color channels when palettizing the colors.
4001                  */
4002                 fmt = GL_RGB;
4003                 type = GL_UNSIGNED_BYTE;
4004                 pitch *= 3;
4005                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4006                 if (!mem)
4007                 {
4008                     ERR("Out of memory\n");
4009                     LEAVE_GL();
4010                     return;
4011                 }
4012                 bpp = surface->resource.format->byte_count * 3;
4013             }
4014         }
4015         break;
4016
4017         default:
4018             mem = dest;
4019             fmt = surface->resource.format->glFormat;
4020             type = surface->resource.format->glType;
4021             bpp = surface->resource.format->byte_count;
4022     }
4023
4024     if (surface->flags & SFLAG_PBO)
4025     {
4026         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4027         checkGLcall("glBindBufferARB");
4028         if (mem)
4029         {
4030             ERR("mem not null for pbo -- unexpected\n");
4031             mem = NULL;
4032         }
4033     }
4034
4035     /* Save old pixel store pack state */
4036     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4037     checkGLcall("glGetIntegerv");
4038     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4039     checkGLcall("glGetIntegerv");
4040     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4041     checkGLcall("glGetIntegerv");
4042
4043     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4044     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4045     checkGLcall("glPixelStorei");
4046     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4047     checkGLcall("glPixelStorei");
4048     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4049     checkGLcall("glPixelStorei");
4050
4051     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4052             local_rect.right - local_rect.left,
4053             local_rect.bottom - local_rect.top,
4054             fmt, type, mem);
4055     checkGLcall("glReadPixels");
4056
4057     /* Reset previous pixel store pack state */
4058     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4059     checkGLcall("glPixelStorei");
4060     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4061     checkGLcall("glPixelStorei");
4062     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4063     checkGLcall("glPixelStorei");
4064
4065     if (surface->flags & SFLAG_PBO)
4066     {
4067         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4068         checkGLcall("glBindBufferARB");
4069
4070         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4071          * to get a pointer to it and perform the flipping in software. This is a lot
4072          * faster than calling glReadPixels for each line. In case we want more speed
4073          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4074         if (!srcIsUpsideDown)
4075         {
4076             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4077             checkGLcall("glBindBufferARB");
4078
4079             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4080             checkGLcall("glMapBufferARB");
4081         }
4082     }
4083
4084     /* TODO: Merge this with the palettization loop below for P8 targets */
4085     if(!srcIsUpsideDown) {
4086         UINT len, off;
4087         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4088             Flip the lines in software */
4089         len = (local_rect.right - local_rect.left) * bpp;
4090         off = local_rect.left * bpp;
4091
4092         row = HeapAlloc(GetProcessHeap(), 0, len);
4093         if(!row) {
4094             ERR("Out of memory\n");
4095             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4096                 HeapFree(GetProcessHeap(), 0, mem);
4097             LEAVE_GL();
4098             return;
4099         }
4100
4101         top = mem + pitch * local_rect.top;
4102         bottom = mem + pitch * (local_rect.bottom - 1);
4103         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4104             memcpy(row, top + off, len);
4105             memcpy(top + off, bottom + off, len);
4106             memcpy(bottom + off, row, len);
4107             top += pitch;
4108             bottom -= pitch;
4109         }
4110         HeapFree(GetProcessHeap(), 0, row);
4111
4112         /* Unmap the temp PBO buffer */
4113         if (surface->flags & SFLAG_PBO)
4114         {
4115             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4116             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4117         }
4118     }
4119
4120     LEAVE_GL();
4121     context_release(context);
4122
4123     /* For P8 textures we need to perform an inverse palette lookup. This is
4124      * done by searching for a palette index which matches the RGB value.
4125      * Note this isn't guaranteed to work when there are multiple entries for
4126      * the same color but we have no choice. In case of P8 render targets,
4127      * the index is stored in the alpha component so no conversion is needed. */
4128     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4129     {
4130         const PALETTEENTRY *pal = NULL;
4131         DWORD width = pitch / 3;
4132         int x, y, c;
4133
4134         if (surface->palette)
4135         {
4136             pal = surface->palette->palents;
4137         }
4138         else
4139         {
4140             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4141             HeapFree(GetProcessHeap(), 0, mem);
4142             return;
4143         }
4144
4145         for(y = local_rect.top; y < local_rect.bottom; y++) {
4146             for(x = local_rect.left; x < local_rect.right; x++) {
4147                 /*                      start              lines            pixels      */
4148                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4149                 const BYTE *green = blue  + 1;
4150                 const BYTE *red = green + 1;
4151
4152                 for(c = 0; c < 256; c++) {
4153                     if(*red   == pal[c].peRed   &&
4154                        *green == pal[c].peGreen &&
4155                        *blue  == pal[c].peBlue)
4156                     {
4157                         *((BYTE *) dest + y * width + x) = c;
4158                         break;
4159                     }
4160                 }
4161             }
4162         }
4163         HeapFree(GetProcessHeap(), 0, mem);
4164     }
4165 }
4166
4167 /* Read the framebuffer contents into a texture */
4168 static void read_from_framebuffer_texture(struct wined3d_surface *surface, BOOL srgb)
4169 {
4170     struct wined3d_device *device = surface->resource.device;
4171     const struct wined3d_gl_info *gl_info;
4172     struct wined3d_context *context;
4173
4174     if (!surface_is_offscreen(surface))
4175     {
4176         /* We would need to flip onscreen surfaces, but there's no efficient
4177          * way to do that here. It makes more sense for the caller to
4178          * explicitly go through sysmem. */
4179         ERR("Not supported for onscreen targets.\n");
4180         return;
4181     }
4182
4183     /* Activate the surface to read from. In some situations it isn't the currently active target(e.g. backbuffer
4184      * locking during offscreen rendering). RESOURCELOAD is ok because glCopyTexSubImage2D isn't affected by any
4185      * states in the stateblock, and no driver was found yet that had bugs in that regard.
4186      */
4187     context = context_acquire(device, surface);
4188     gl_info = context->gl_info;
4189     device_invalidate_state(device, STATE_FRAMEBUFFER);
4190
4191     surface_prepare_texture(surface, gl_info, srgb);
4192     surface_bind_and_dirtify(surface, gl_info, srgb);
4193
4194     TRACE("Reading back offscreen render target %p.\n", surface);
4195
4196     ENTER_GL();
4197
4198     glReadBuffer(device->offscreenBuffer);
4199     checkGLcall("glReadBuffer");
4200
4201     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4202             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4203     checkGLcall("glCopyTexSubImage2D");
4204
4205     LEAVE_GL();
4206
4207     context_release(context);
4208 }
4209
4210 /* Context activation is done by the caller. */
4211 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4212         const struct wined3d_gl_info *gl_info, BOOL srgb)
4213 {
4214     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4215     CONVERT_TYPES convert;
4216     struct wined3d_format format;
4217
4218     if (surface->flags & alloc_flag) return;
4219
4220     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4221     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4222     else surface->flags &= ~SFLAG_CONVERTED;
4223
4224     surface_bind_and_dirtify(surface, gl_info, srgb);
4225     surface_allocate_surface(surface, gl_info, &format, srgb);
4226     surface->flags |= alloc_flag;
4227 }
4228
4229 /* Context activation is done by the caller. */
4230 void surface_prepare_texture(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL srgb)
4231 {
4232     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4233     {
4234         struct wined3d_texture *texture = surface->container.u.texture;
4235         UINT sub_count = texture->level_count * texture->layer_count;
4236         UINT i;
4237
4238         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4239
4240         for (i = 0; i < sub_count; ++i)
4241         {
4242             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4243             surface_prepare_texture_internal(s, gl_info, srgb);
4244         }
4245
4246         return;
4247     }
4248
4249     surface_prepare_texture_internal(surface, gl_info, srgb);
4250 }
4251
4252 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4253         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4254 {
4255     struct wined3d_device *device = surface->resource.device;
4256     UINT pitch = wined3d_surface_get_pitch(surface);
4257     const struct wined3d_gl_info *gl_info;
4258     struct wined3d_context *context;
4259     RECT local_rect;
4260     UINT w, h;
4261
4262     surface_get_rect(surface, rect, &local_rect);
4263
4264     mem += local_rect.top * pitch + local_rect.left * bpp;
4265     w = local_rect.right - local_rect.left;
4266     h = local_rect.bottom - local_rect.top;
4267
4268     /* Activate the correct context for the render target */
4269     context = context_acquire(device, surface);
4270     context_apply_blit_state(context, device);
4271     gl_info = context->gl_info;
4272
4273     ENTER_GL();
4274
4275     if (!surface_is_offscreen(surface))
4276     {
4277         GLenum buffer = surface_get_gl_buffer(surface);
4278         TRACE("Unlocking %#x buffer.\n", buffer);
4279         context_set_draw_buffer(context, buffer);
4280
4281         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4282         glPixelZoom(1.0f, -1.0f);
4283     }
4284     else
4285     {
4286         /* Primary offscreen render target */
4287         TRACE("Offscreen render target.\n");
4288         context_set_draw_buffer(context, device->offscreenBuffer);
4289
4290         glPixelZoom(1.0f, 1.0f);
4291     }
4292
4293     glRasterPos3i(local_rect.left, local_rect.top, 1);
4294     checkGLcall("glRasterPos3i");
4295
4296     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4297     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4298
4299     if (surface->flags & SFLAG_PBO)
4300     {
4301         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4302         checkGLcall("glBindBufferARB");
4303     }
4304
4305     glDrawPixels(w, h, fmt, type, mem);
4306     checkGLcall("glDrawPixels");
4307
4308     if (surface->flags & SFLAG_PBO)
4309     {
4310         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4311         checkGLcall("glBindBufferARB");
4312     }
4313
4314     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4315     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4316
4317     LEAVE_GL();
4318
4319     if (wined3d_settings.strict_draw_ordering
4320             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4321             && surface->container.u.swapchain->front_buffer == surface))
4322         wglFlush();
4323
4324     context_release(context);
4325 }
4326
4327 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4328         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4329 {
4330     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4331     const struct wined3d_device *device = surface->resource.device;
4332     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4333     BOOL blit_supported = FALSE;
4334
4335     /* Copy the default values from the surface. Below we might perform fixups */
4336     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4337     *format = *surface->resource.format;
4338     *convert = NO_CONVERSION;
4339
4340     /* Ok, now look if we have to do any conversion */
4341     switch (surface->resource.format->id)
4342     {
4343         case WINED3DFMT_P8_UINT:
4344             /* Below the call to blit_supported is disabled for Wine 1.2
4345              * because the function isn't operating correctly yet. At the
4346              * moment 8-bit blits are handled in software and if certain GL
4347              * extensions are around, surface conversion is performed at
4348              * upload time. The blit_supported call recognizes it as a
4349              * destination fixup. This type of upload 'fixup' and 8-bit to
4350              * 8-bit blits need to be handled by the blit_shader.
4351              * TODO: get rid of this #if 0. */
4352 #if 0
4353             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4354                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4355                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4356 #endif
4357             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4358
4359             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4360              * texturing. Further also use conversion in case of color keying.
4361              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4362              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4363              * conflicts with this.
4364              */
4365             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4366                     || colorkey_active || !use_texturing)
4367             {
4368                 format->glFormat = GL_RGBA;
4369                 format->glInternal = GL_RGBA;
4370                 format->glType = GL_UNSIGNED_BYTE;
4371                 format->conv_byte_count = 4;
4372                 if (colorkey_active)
4373                     *convert = CONVERT_PALETTED_CK;
4374                 else
4375                     *convert = CONVERT_PALETTED;
4376             }
4377             break;
4378
4379         case WINED3DFMT_B2G3R3_UNORM:
4380             /* **********************
4381                 GL_UNSIGNED_BYTE_3_3_2
4382                 ********************** */
4383             if (colorkey_active) {
4384                 /* This texture format will never be used.. So do not care about color keying
4385                     up until the point in time it will be needed :-) */
4386                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4387             }
4388             break;
4389
4390         case WINED3DFMT_B5G6R5_UNORM:
4391             if (colorkey_active)
4392             {
4393                 *convert = CONVERT_CK_565;
4394                 format->glFormat = GL_RGBA;
4395                 format->glInternal = GL_RGB5_A1;
4396                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4397                 format->conv_byte_count = 2;
4398             }
4399             break;
4400
4401         case WINED3DFMT_B5G5R5X1_UNORM:
4402             if (colorkey_active)
4403             {
4404                 *convert = CONVERT_CK_5551;
4405                 format->glFormat = GL_BGRA;
4406                 format->glInternal = GL_RGB5_A1;
4407                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4408                 format->conv_byte_count = 2;
4409             }
4410             break;
4411
4412         case WINED3DFMT_B8G8R8_UNORM:
4413             if (colorkey_active)
4414             {
4415                 *convert = CONVERT_CK_RGB24;
4416                 format->glFormat = GL_RGBA;
4417                 format->glInternal = GL_RGBA8;
4418                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4419                 format->conv_byte_count = 4;
4420             }
4421             break;
4422
4423         case WINED3DFMT_B8G8R8X8_UNORM:
4424             if (colorkey_active)
4425             {
4426                 *convert = CONVERT_RGB32_888;
4427                 format->glFormat = GL_RGBA;
4428                 format->glInternal = GL_RGBA8;
4429                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4430                 format->conv_byte_count = 4;
4431             }
4432             break;
4433
4434         default:
4435             break;
4436     }
4437
4438     return WINED3D_OK;
4439 }
4440
4441 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4442 {
4443     const struct wined3d_device *device = surface->resource.device;
4444     const struct wined3d_palette *pal = surface->palette;
4445     BOOL index_in_alpha = FALSE;
4446     unsigned int i;
4447
4448     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4449      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4450      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4451      * duplicate entries. Store the color key in the unused alpha component to speed the
4452      * download up and to make conversion unneeded. */
4453     index_in_alpha = primary_render_target_is_p8(device);
4454
4455     if (!pal)
4456     {
4457         /* In DirectDraw the palette is a property of the surface, there are no such things as device palettes. */
4458         if (device->wined3d->flags & WINED3D_PALETTE_PER_SURFACE)
4459         {
4460             ERR("This code should never get entered for DirectDraw!, expect problems\n");
4461             if (index_in_alpha)
4462             {
4463                 /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4464                  * there's no palette at this time. */
4465                 for (i = 0; i < 256; i++) table[i][3] = i;
4466             }
4467         }
4468         else
4469         {
4470             /* Direct3D >= 8 palette usage style: P8 textures use device palettes, palette entry format is A8R8G8B8,
4471              * alpha is stored in peFlags and may be used by the app if D3DPTEXTURECAPS_ALPHAPALETTE device
4472              * capability flag is present (wine does advertise this capability) */
4473             for (i = 0; i < 256; ++i)
4474             {
4475                 table[i][0] = device->palettes[device->currentPalette][i].peRed;
4476                 table[i][1] = device->palettes[device->currentPalette][i].peGreen;
4477                 table[i][2] = device->palettes[device->currentPalette][i].peBlue;
4478                 table[i][3] = device->palettes[device->currentPalette][i].peFlags;
4479             }
4480         }
4481     }
4482     else
4483     {
4484         TRACE("Using surface palette %p\n", pal);
4485         /* Get the surface's palette */
4486         for (i = 0; i < 256; ++i)
4487         {
4488             table[i][0] = pal->palents[i].peRed;
4489             table[i][1] = pal->palents[i].peGreen;
4490             table[i][2] = pal->palents[i].peBlue;
4491
4492             /* When index_in_alpha is set the palette index is stored in the
4493              * alpha component. In case of a readback we can then read
4494              * GL_ALPHA. Color keying is handled in BltOverride using a
4495              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4496              * color key itself is passed to glAlphaFunc in other cases the
4497              * alpha component of pixels that should be masked away is set to 0. */
4498             if (index_in_alpha)
4499             {
4500                 table[i][3] = i;
4501             }
4502             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4503                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4504             {
4505                 table[i][3] = 0x00;
4506             }
4507             else if (pal->flags & WINEDDPCAPS_ALPHA)
4508             {
4509                 table[i][3] = pal->palents[i].peFlags;
4510             }
4511             else
4512             {
4513                 table[i][3] = 0xFF;
4514             }
4515         }
4516     }
4517 }
4518
4519 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4520         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4521 {
4522     const BYTE *source;
4523     BYTE *dest;
4524     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4525
4526     switch (convert) {
4527         case NO_CONVERSION:
4528         {
4529             memcpy(dst, src, pitch * height);
4530             break;
4531         }
4532         case CONVERT_PALETTED:
4533         case CONVERT_PALETTED_CK:
4534         {
4535             BYTE table[256][4];
4536             unsigned int x, y;
4537
4538             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4539
4540             for (y = 0; y < height; y++)
4541             {
4542                 source = src + pitch * y;
4543                 dest = dst + outpitch * y;
4544                 /* This is an 1 bpp format, using the width here is fine */
4545                 for (x = 0; x < width; x++) {
4546                     BYTE color = *source++;
4547                     *dest++ = table[color][0];
4548                     *dest++ = table[color][1];
4549                     *dest++ = table[color][2];
4550                     *dest++ = table[color][3];
4551                 }
4552             }
4553         }
4554         break;
4555
4556         case CONVERT_CK_565:
4557         {
4558             /* Converting the 565 format in 5551 packed to emulate color-keying.
4559
4560               Note : in all these conversion, it would be best to average the averaging
4561                       pixels to get the color of the pixel that will be color-keyed to
4562                       prevent 'color bleeding'. This will be done later on if ever it is
4563                       too visible.
4564
4565               Note2: Nvidia documents say that their driver does not support alpha + color keying
4566                      on the same surface and disables color keying in such a case
4567             */
4568             unsigned int x, y;
4569             const WORD *Source;
4570             WORD *Dest;
4571
4572             TRACE("Color keyed 565\n");
4573
4574             for (y = 0; y < height; y++) {
4575                 Source = (const WORD *)(src + y * pitch);
4576                 Dest = (WORD *) (dst + y * outpitch);
4577                 for (x = 0; x < width; x++ ) {
4578                     WORD color = *Source++;
4579                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4580                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4581                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4582                         *Dest |= 0x0001;
4583                     Dest++;
4584                 }
4585             }
4586         }
4587         break;
4588
4589         case CONVERT_CK_5551:
4590         {
4591             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4592             unsigned int x, y;
4593             const WORD *Source;
4594             WORD *Dest;
4595             TRACE("Color keyed 5551\n");
4596             for (y = 0; y < height; y++) {
4597                 Source = (const WORD *)(src + y * pitch);
4598                 Dest = (WORD *) (dst + y * outpitch);
4599                 for (x = 0; x < width; x++ ) {
4600                     WORD color = *Source++;
4601                     *Dest = color;
4602                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4603                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4604                         *Dest |= (1 << 15);
4605                     else
4606                         *Dest &= ~(1 << 15);
4607                     Dest++;
4608                 }
4609             }
4610         }
4611         break;
4612
4613         case CONVERT_CK_RGB24:
4614         {
4615             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4616             unsigned int x, y;
4617             for (y = 0; y < height; y++)
4618             {
4619                 source = src + pitch * y;
4620                 dest = dst + outpitch * y;
4621                 for (x = 0; x < width; x++) {
4622                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4623                     DWORD dstcolor = color << 8;
4624                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4625                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4626                         dstcolor |= 0xff;
4627                     *(DWORD*)dest = dstcolor;
4628                     source += 3;
4629                     dest += 4;
4630                 }
4631             }
4632         }
4633         break;
4634
4635         case CONVERT_RGB32_888:
4636         {
4637             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4638             unsigned int x, y;
4639             for (y = 0; y < height; y++)
4640             {
4641                 source = src + pitch * y;
4642                 dest = dst + outpitch * y;
4643                 for (x = 0; x < width; x++) {
4644                     DWORD color = 0xffffff & *(const DWORD*)source;
4645                     DWORD dstcolor = color << 8;
4646                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4647                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4648                         dstcolor |= 0xff;
4649                     *(DWORD*)dest = dstcolor;
4650                     source += 4;
4651                     dest += 4;
4652                 }
4653             }
4654         }
4655         break;
4656
4657         default:
4658             ERR("Unsupported conversion type %#x.\n", convert);
4659     }
4660     return WINED3D_OK;
4661 }
4662
4663 BOOL palette9_changed(struct wined3d_surface *surface)
4664 {
4665     struct wined3d_device *device = surface->resource.device;
4666
4667     if (surface->palette || (surface->resource.format->id != WINED3DFMT_P8_UINT
4668             && surface->resource.format->id != WINED3DFMT_P8_UINT_A8_UNORM))
4669     {
4670         /* If a ddraw-style palette is attached assume no d3d9 palette change.
4671          * Also the palette isn't interesting if the surface format isn't P8 or A8P8
4672          */
4673         return FALSE;
4674     }
4675
4676     if (surface->palette9)
4677     {
4678         if (!memcmp(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256))
4679         {
4680             return FALSE;
4681         }
4682     }
4683     else
4684     {
4685         surface->palette9 = HeapAlloc(GetProcessHeap(), 0, sizeof(PALETTEENTRY) * 256);
4686     }
4687     memcpy(surface->palette9, device->palettes[device->currentPalette], sizeof(PALETTEENTRY) * 256);
4688
4689     return TRUE;
4690 }
4691
4692 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4693 {
4694     /* Flip the surface contents */
4695     /* Flip the DC */
4696     {
4697         HDC tmp;
4698         tmp = front->hDC;
4699         front->hDC = back->hDC;
4700         back->hDC = tmp;
4701     }
4702
4703     /* Flip the DIBsection */
4704     {
4705         HBITMAP tmp;
4706         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4707         tmp = front->dib.DIBsection;
4708         front->dib.DIBsection = back->dib.DIBsection;
4709         back->dib.DIBsection = tmp;
4710
4711         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4712         else front->flags &= ~SFLAG_DIBSECTION;
4713         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4714         else back->flags &= ~SFLAG_DIBSECTION;
4715     }
4716
4717     /* Flip the surface data */
4718     {
4719         void* tmp;
4720
4721         tmp = front->dib.bitmap_data;
4722         front->dib.bitmap_data = back->dib.bitmap_data;
4723         back->dib.bitmap_data = tmp;
4724
4725         tmp = front->resource.allocatedMemory;
4726         front->resource.allocatedMemory = back->resource.allocatedMemory;
4727         back->resource.allocatedMemory = tmp;
4728
4729         tmp = front->resource.heapMemory;
4730         front->resource.heapMemory = back->resource.heapMemory;
4731         back->resource.heapMemory = tmp;
4732     }
4733
4734     /* Flip the PBO */
4735     {
4736         GLuint tmp_pbo = front->pbo;
4737         front->pbo = back->pbo;
4738         back->pbo = tmp_pbo;
4739     }
4740
4741     /* client_memory should not be different, but just in case */
4742     {
4743         BOOL tmp;
4744         tmp = front->dib.client_memory;
4745         front->dib.client_memory = back->dib.client_memory;
4746         back->dib.client_memory = tmp;
4747     }
4748
4749     /* Flip the opengl texture */
4750     {
4751         GLuint tmp;
4752
4753         tmp = back->texture_name;
4754         back->texture_name = front->texture_name;
4755         front->texture_name = tmp;
4756
4757         tmp = back->texture_name_srgb;
4758         back->texture_name_srgb = front->texture_name_srgb;
4759         front->texture_name_srgb = tmp;
4760
4761         resource_unload(&back->resource);
4762         resource_unload(&front->resource);
4763     }
4764
4765     {
4766         DWORD tmp_flags = back->flags;
4767         back->flags = front->flags;
4768         front->flags = tmp_flags;
4769     }
4770 }
4771
4772 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4773  * pixel copy calls. */
4774 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4775         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4776 {
4777     struct wined3d_device *device = dst_surface->resource.device;
4778     float xrel, yrel;
4779     UINT row;
4780     struct wined3d_context *context;
4781     BOOL upsidedown = FALSE;
4782     RECT dst_rect = *dst_rect_in;
4783
4784     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4785      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4786      */
4787     if(dst_rect.top > dst_rect.bottom) {
4788         UINT tmp = dst_rect.bottom;
4789         dst_rect.bottom = dst_rect.top;
4790         dst_rect.top = tmp;
4791         upsidedown = TRUE;
4792     }
4793
4794     context = context_acquire(device, src_surface);
4795     context_apply_blit_state(context, device);
4796     surface_internal_preload(dst_surface, SRGB_RGB);
4797     ENTER_GL();
4798
4799     /* Bind the target texture */
4800     glBindTexture(dst_surface->texture_target, dst_surface->texture_name);
4801     checkGLcall("glBindTexture");
4802     if (surface_is_offscreen(src_surface))
4803     {
4804         TRACE("Reading from an offscreen target\n");
4805         upsidedown = !upsidedown;
4806         glReadBuffer(device->offscreenBuffer);
4807     }
4808     else
4809     {
4810         glReadBuffer(surface_get_gl_buffer(src_surface));
4811     }
4812     checkGLcall("glReadBuffer");
4813
4814     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4815     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4816
4817     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4818     {
4819         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4820
4821         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4822             ERR("Texture filtering not supported in direct blit\n");
4823         }
4824     }
4825     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4826             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4827     {
4828         ERR("Texture filtering not supported in direct blit\n");
4829     }
4830
4831     if (upsidedown
4832             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4833             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4834     {
4835         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4836
4837         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4838                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4839                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4840                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4841     }
4842     else
4843     {
4844         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4845         /* I have to process this row by row to swap the image,
4846          * otherwise it would be upside down, so stretching in y direction
4847          * doesn't cost extra time
4848          *
4849          * However, stretching in x direction can be avoided if not necessary
4850          */
4851         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4852             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4853             {
4854                 /* Well, that stuff works, but it's very slow.
4855                  * find a better way instead
4856                  */
4857                 UINT col;
4858
4859                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4860                 {
4861                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4862                             dst_rect.left + col /* x offset */, row /* y offset */,
4863                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4864                 }
4865             }
4866             else
4867             {
4868                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4869                         dst_rect.left /* x offset */, row /* y offset */,
4870                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4871             }
4872         }
4873     }
4874     checkGLcall("glCopyTexSubImage2D");
4875
4876     LEAVE_GL();
4877     context_release(context);
4878
4879     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4880      * path is never entered
4881      */
4882     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4883 }
4884
4885 /* Uses the hardware to stretch and flip the image */
4886 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4887         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4888 {
4889     struct wined3d_device *device = dst_surface->resource.device;
4890     struct wined3d_swapchain *src_swapchain = NULL;
4891     GLuint src, backup = 0;
4892     float left, right, top, bottom; /* Texture coordinates */
4893     UINT fbwidth = src_surface->resource.width;
4894     UINT fbheight = src_surface->resource.height;
4895     struct wined3d_context *context;
4896     GLenum drawBuffer = GL_BACK;
4897     GLenum texture_target;
4898     BOOL noBackBufferBackup;
4899     BOOL src_offscreen;
4900     BOOL upsidedown = FALSE;
4901     RECT dst_rect = *dst_rect_in;
4902
4903     TRACE("Using hwstretch blit\n");
4904     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4905     context = context_acquire(device, src_surface);
4906     context_apply_blit_state(context, device);
4907     surface_internal_preload(dst_surface, SRGB_RGB);
4908
4909     src_offscreen = surface_is_offscreen(src_surface);
4910     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4911     if (!noBackBufferBackup && !src_surface->texture_name)
4912     {
4913         /* Get it a description */
4914         surface_internal_preload(src_surface, SRGB_RGB);
4915     }
4916     ENTER_GL();
4917
4918     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4919      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4920      */
4921     if (context->aux_buffers >= 2)
4922     {
4923         /* Got more than one aux buffer? Use the 2nd aux buffer */
4924         drawBuffer = GL_AUX1;
4925     }
4926     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4927     {
4928         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
4929         drawBuffer = GL_AUX0;
4930     }
4931
4932     if(noBackBufferBackup) {
4933         glGenTextures(1, &backup);
4934         checkGLcall("glGenTextures");
4935         glBindTexture(GL_TEXTURE_2D, backup);
4936         checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
4937         texture_target = GL_TEXTURE_2D;
4938     } else {
4939         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
4940          * we are reading from the back buffer, the backup can be used as source texture
4941          */
4942         texture_target = src_surface->texture_target;
4943         glBindTexture(texture_target, src_surface->texture_name);
4944         checkGLcall("glBindTexture(texture_target, src_surface->texture_name)");
4945         glEnable(texture_target);
4946         checkGLcall("glEnable(texture_target)");
4947
4948         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
4949         src_surface->flags &= ~SFLAG_INTEXTURE;
4950     }
4951
4952     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4953      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4954      */
4955     if(dst_rect.top > dst_rect.bottom) {
4956         UINT tmp = dst_rect.bottom;
4957         dst_rect.bottom = dst_rect.top;
4958         dst_rect.top = tmp;
4959         upsidedown = TRUE;
4960     }
4961
4962     if (src_offscreen)
4963     {
4964         TRACE("Reading from an offscreen target\n");
4965         upsidedown = !upsidedown;
4966         glReadBuffer(device->offscreenBuffer);
4967     }
4968     else
4969     {
4970         glReadBuffer(surface_get_gl_buffer(src_surface));
4971     }
4972
4973     /* TODO: Only back up the part that will be overwritten */
4974     glCopyTexSubImage2D(texture_target, 0,
4975                         0, 0 /* read offsets */,
4976                         0, 0,
4977                         fbwidth,
4978                         fbheight);
4979
4980     checkGLcall("glCopyTexSubImage2D");
4981
4982     /* No issue with overriding these - the sampler is dirty due to blit usage */
4983     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
4984             wined3d_gl_mag_filter(magLookup, Filter));
4985     checkGLcall("glTexParameteri");
4986     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
4987             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
4988     checkGLcall("glTexParameteri");
4989
4990     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
4991         src_swapchain = src_surface->container.u.swapchain;
4992     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
4993     {
4994         src = backup ? backup : src_surface->texture_name;
4995     }
4996     else
4997     {
4998         glReadBuffer(GL_FRONT);
4999         checkGLcall("glReadBuffer(GL_FRONT)");
5000
5001         glGenTextures(1, &src);
5002         checkGLcall("glGenTextures(1, &src)");
5003         glBindTexture(GL_TEXTURE_2D, src);
5004         checkGLcall("glBindTexture(GL_TEXTURE_2D, src)");
5005
5006         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5007          * out for power of 2 sizes
5008          */
5009         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5010                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5011         checkGLcall("glTexImage2D");
5012         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5013                             0, 0 /* read offsets */,
5014                             0, 0,
5015                             fbwidth,
5016                             fbheight);
5017
5018         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5019         checkGLcall("glTexParameteri");
5020         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5021         checkGLcall("glTexParameteri");
5022
5023         glReadBuffer(GL_BACK);
5024         checkGLcall("glReadBuffer(GL_BACK)");
5025
5026         if(texture_target != GL_TEXTURE_2D) {
5027             glDisable(texture_target);
5028             glEnable(GL_TEXTURE_2D);
5029             texture_target = GL_TEXTURE_2D;
5030         }
5031     }
5032     checkGLcall("glEnd and previous");
5033
5034     left = src_rect->left;
5035     right = src_rect->right;
5036
5037     if (!upsidedown)
5038     {
5039         top = src_surface->resource.height - src_rect->top;
5040         bottom = src_surface->resource.height - src_rect->bottom;
5041     }
5042     else
5043     {
5044         top = src_surface->resource.height - src_rect->bottom;
5045         bottom = src_surface->resource.height - src_rect->top;
5046     }
5047
5048     if (src_surface->flags & SFLAG_NORMCOORD)
5049     {
5050         left /= src_surface->pow2Width;
5051         right /= src_surface->pow2Width;
5052         top /= src_surface->pow2Height;
5053         bottom /= src_surface->pow2Height;
5054     }
5055
5056     /* draw the source texture stretched and upside down. The correct surface is bound already */
5057     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5058     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5059
5060     context_set_draw_buffer(context, drawBuffer);
5061     glReadBuffer(drawBuffer);
5062
5063     glBegin(GL_QUADS);
5064         /* bottom left */
5065         glTexCoord2f(left, bottom);
5066         glVertex2i(0, 0);
5067
5068         /* top left */
5069         glTexCoord2f(left, top);
5070         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5071
5072         /* top right */
5073         glTexCoord2f(right, top);
5074         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5075
5076         /* bottom right */
5077         glTexCoord2f(right, bottom);
5078         glVertex2i(dst_rect.right - dst_rect.left, 0);
5079     glEnd();
5080     checkGLcall("glEnd and previous");
5081
5082     if (texture_target != dst_surface->texture_target)
5083     {
5084         glDisable(texture_target);
5085         glEnable(dst_surface->texture_target);
5086         texture_target = dst_surface->texture_target;
5087     }
5088
5089     /* Now read the stretched and upside down image into the destination texture */
5090     glBindTexture(texture_target, dst_surface->texture_name);
5091     checkGLcall("glBindTexture");
5092     glCopyTexSubImage2D(texture_target,
5093                         0,
5094                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5095                         0, 0, /* We blitted the image to the origin */
5096                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5097     checkGLcall("glCopyTexSubImage2D");
5098
5099     if(drawBuffer == GL_BACK) {
5100         /* Write the back buffer backup back */
5101         if(backup) {
5102             if(texture_target != GL_TEXTURE_2D) {
5103                 glDisable(texture_target);
5104                 glEnable(GL_TEXTURE_2D);
5105                 texture_target = GL_TEXTURE_2D;
5106             }
5107             glBindTexture(GL_TEXTURE_2D, backup);
5108             checkGLcall("glBindTexture(GL_TEXTURE_2D, backup)");
5109         }
5110         else
5111         {
5112             if (texture_target != src_surface->texture_target)
5113             {
5114                 glDisable(texture_target);
5115                 glEnable(src_surface->texture_target);
5116                 texture_target = src_surface->texture_target;
5117             }
5118             glBindTexture(src_surface->texture_target, src_surface->texture_name);
5119             checkGLcall("glBindTexture(src_surface->texture_target, src_surface->texture_name)");
5120         }
5121
5122         glBegin(GL_QUADS);
5123             /* top left */
5124             glTexCoord2f(0.0f, 0.0f);
5125             glVertex2i(0, fbheight);
5126
5127             /* bottom left */
5128             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5129             glVertex2i(0, 0);
5130
5131             /* bottom right */
5132             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5133                     (float)fbheight / (float)src_surface->pow2Height);
5134             glVertex2i(fbwidth, 0);
5135
5136             /* top right */
5137             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5138             glVertex2i(fbwidth, fbheight);
5139         glEnd();
5140     }
5141     glDisable(texture_target);
5142     checkGLcall("glDisable(texture_target)");
5143
5144     /* Cleanup */
5145     if (src != src_surface->texture_name && src != backup)
5146     {
5147         glDeleteTextures(1, &src);
5148         checkGLcall("glDeleteTextures(1, &src)");
5149     }
5150     if(backup) {
5151         glDeleteTextures(1, &backup);
5152         checkGLcall("glDeleteTextures(1, &backup)");
5153     }
5154
5155     LEAVE_GL();
5156
5157     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5158
5159     context_release(context);
5160
5161     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5162      * path is never entered
5163      */
5164     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5165 }
5166
5167 /* Front buffer coordinates are always full screen coordinates, but our GL
5168  * drawable is limited to the window's client area. The sysmem and texture
5169  * copies do have the full screen size. Note that GL has a bottom-left
5170  * origin, while D3D has a top-left origin. */
5171 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5172 {
5173     UINT drawable_height;
5174
5175     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5176             && surface == surface->container.u.swapchain->front_buffer)
5177     {
5178         POINT offset = {0, 0};
5179         RECT windowsize;
5180
5181         ScreenToClient(window, &offset);
5182         OffsetRect(rect, offset.x, offset.y);
5183
5184         GetClientRect(window, &windowsize);
5185         drawable_height = windowsize.bottom - windowsize.top;
5186     }
5187     else
5188     {
5189         drawable_height = surface->resource.height;
5190     }
5191
5192     rect->top = drawable_height - rect->top;
5193     rect->bottom = drawable_height - rect->bottom;
5194 }
5195
5196 static void surface_blt_to_drawable(struct wined3d_device *device,
5197         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5198         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5199         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5200 {
5201     struct wined3d_context *context;
5202     RECT src_rect, dst_rect;
5203
5204     src_rect = *src_rect_in;
5205     dst_rect = *dst_rect_in;
5206
5207     /* Make sure the surface is up-to-date. This should probably use
5208      * surface_load_location() and worry about the destination surface too,
5209      * unless we're overwriting it completely. */
5210     surface_internal_preload(src_surface, SRGB_RGB);
5211
5212     /* Activate the destination context, set it up for blitting */
5213     context = context_acquire(device, dst_surface);
5214     context_apply_blit_state(context, device);
5215
5216     if (!surface_is_offscreen(dst_surface))
5217         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5218
5219     device->blitter->set_shader(device->blit_priv, context->gl_info, src_surface);
5220
5221     ENTER_GL();
5222
5223     if (color_key)
5224     {
5225         glEnable(GL_ALPHA_TEST);
5226         checkGLcall("glEnable(GL_ALPHA_TEST)");
5227
5228         /* When the primary render target uses P8, the alpha component
5229          * contains the palette index. Which means that the colorkey is one of
5230          * the palette entries. In other cases pixels that should be masked
5231          * away have alpha set to 0. */
5232         if (primary_render_target_is_p8(device))
5233             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5234         else
5235             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5236         checkGLcall("glAlphaFunc");
5237     }
5238     else
5239     {
5240         glDisable(GL_ALPHA_TEST);
5241         checkGLcall("glDisable(GL_ALPHA_TEST)");
5242     }
5243
5244     draw_textured_quad(src_surface, &src_rect, &dst_rect, filter);
5245
5246     if (color_key)
5247     {
5248         glDisable(GL_ALPHA_TEST);
5249         checkGLcall("glDisable(GL_ALPHA_TEST)");
5250     }
5251
5252     LEAVE_GL();
5253
5254     /* Leave the opengl state valid for blitting */
5255     device->blitter->unset_shader(context->gl_info);
5256
5257     if (wined3d_settings.strict_draw_ordering
5258             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5259             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5260         wglFlush(); /* Flush to ensure ordering across contexts. */
5261
5262     context_release(context);
5263 }
5264
5265 /* Do not call while under the GL lock. */
5266 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5267 {
5268     struct wined3d_device *device = s->resource.device;
5269     const struct blit_shader *blitter;
5270
5271     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5272             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5273     if (!blitter)
5274     {
5275         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5276         return WINED3DERR_INVALIDCALL;
5277     }
5278
5279     return blitter->color_fill(device, s, rect, color);
5280 }
5281
5282 /* Do not call while under the GL lock. */
5283 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5284         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5285         WINED3DTEXTUREFILTERTYPE Filter)
5286 {
5287     struct wined3d_device *device = dst_surface->resource.device;
5288     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5289     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5290
5291     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5292             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5293             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5294
5295     /* Get the swapchain. One of the surfaces has to be a primary surface */
5296     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5297     {
5298         WARN("Destination is in sysmem, rejecting gl blt\n");
5299         return WINED3DERR_INVALIDCALL;
5300     }
5301
5302     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5303         dstSwapchain = dst_surface->container.u.swapchain;
5304
5305     if (src_surface)
5306     {
5307         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5308         {
5309             WARN("Src is in sysmem, rejecting gl blt\n");
5310             return WINED3DERR_INVALIDCALL;
5311         }
5312
5313         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5314             srcSwapchain = src_surface->container.u.swapchain;
5315     }
5316
5317     /* Early sort out of cases where no render target is used */
5318     if (!dstSwapchain && !srcSwapchain
5319             && src_surface != device->fb.render_targets[0]
5320             && dst_surface != device->fb.render_targets[0])
5321     {
5322         TRACE("No surface is render target, not using hardware blit.\n");
5323         return WINED3DERR_INVALIDCALL;
5324     }
5325
5326     /* No destination color keying supported */
5327     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5328     {
5329         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5330         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5331         return WINED3DERR_INVALIDCALL;
5332     }
5333
5334     /* The only case where both surfaces on a swapchain are supported is a back buffer -> front buffer blit on the same swapchain */
5335     if (dstSwapchain && dstSwapchain == srcSwapchain && dstSwapchain->back_buffers
5336             && dst_surface == dstSwapchain->front_buffer
5337             && src_surface == dstSwapchain->back_buffers[0])
5338     {
5339         /* Half-Life does a Blt from the back buffer to the front buffer,
5340          * Full surface size, no flags... Use present instead
5341          *
5342          * This path will only be entered for d3d7 and ddraw apps, because d3d8/9 offer no way to blit TO the front buffer
5343          */
5344
5345         /* Check rects - wined3d_swapchain_present() doesn't handle them. */
5346         for (;;)
5347         {
5348             TRACE("Looking if a Present can be done...\n");
5349             /* Source Rectangle must be full surface */
5350             if (src_rect->left || src_rect->top
5351                     || src_rect->right != src_surface->resource.width
5352                     || src_rect->bottom != src_surface->resource.height)
5353             {
5354                 TRACE("No, Source rectangle doesn't match\n");
5355                 break;
5356             }
5357
5358             /* No stretching may occur */
5359             if (src_rect->right != dst_rect->right - dst_rect->left
5360                     || src_rect->bottom != dst_rect->bottom - dst_rect->top)
5361             {
5362                 TRACE("No, stretching is done\n");
5363                 break;
5364             }
5365
5366             /* Destination must be full surface or match the clipping rectangle */
5367             if (dst_surface->clipper && dst_surface->clipper->hWnd)
5368             {
5369                 RECT cliprect;
5370                 POINT pos[2];
5371                 GetClientRect(dst_surface->clipper->hWnd, &cliprect);
5372                 pos[0].x = dst_rect->left;
5373                 pos[0].y = dst_rect->top;
5374                 pos[1].x = dst_rect->right;
5375                 pos[1].y = dst_rect->bottom;
5376                 MapWindowPoints(GetDesktopWindow(), dst_surface->clipper->hWnd, pos, 2);
5377
5378                 if (pos[0].x != cliprect.left || pos[0].y != cliprect.top
5379                         || pos[1].x != cliprect.right || pos[1].y != cliprect.bottom)
5380                 {
5381                     TRACE("No, dest rectangle doesn't match(clipper)\n");
5382                     TRACE("Clip rect at %s\n", wine_dbgstr_rect(&cliprect));
5383                     TRACE("Blt dest: %s\n", wine_dbgstr_rect(dst_rect));
5384                     break;
5385                 }
5386             }
5387             else if (dst_rect->left || dst_rect->top
5388                     || dst_rect->right != dst_surface->resource.width
5389                     || dst_rect->bottom != dst_surface->resource.height)
5390             {
5391                 TRACE("No, dest rectangle doesn't match(surface size)\n");
5392                 break;
5393             }
5394
5395             TRACE("Yes\n");
5396
5397             /* These flags are unimportant for the flag check, remove them */
5398             if (!(flags & ~(WINEDDBLT_DONOTWAIT | WINEDDBLT_WAIT)))
5399             {
5400                 WINED3DSWAPEFFECT orig_swap = dstSwapchain->presentParms.SwapEffect;
5401
5402                 /* The idea behind this is that a glReadPixels and a glDrawPixels call
5403                     * take very long, while a flip is fast.
5404                     * This applies to Half-Life, which does such Blts every time it finished
5405                     * a frame, and to Prince of Persia 3D, which uses this to draw at least the main
5406                     * menu. This is also used by all apps when they do windowed rendering
5407                     *
5408                     * The problem is that flipping is not really the same as copying. After a
5409                     * Blt the front buffer is a copy of the back buffer, and the back buffer is
5410                     * untouched. Therefore it's necessary to override the swap effect
5411                     * and to set it back after the flip.
5412                     *
5413                     * Windowed Direct3D < 7 apps do the same. The D3D7 sdk demos are nice
5414                     * testcases.
5415                     */
5416
5417                 dstSwapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
5418                 dstSwapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
5419
5420                 TRACE("Full screen back buffer -> front buffer blt, performing a flip instead.\n");
5421                 wined3d_swapchain_present(dstSwapchain, NULL, NULL, dstSwapchain->win_handle, NULL, 0);
5422
5423                 dstSwapchain->presentParms.SwapEffect = orig_swap;
5424
5425                 return WINED3D_OK;
5426             }
5427             break;
5428         }
5429
5430         TRACE("Unsupported blit between buffers on the same swapchain\n");
5431         return WINED3DERR_INVALIDCALL;
5432     } else if(dstSwapchain && dstSwapchain == srcSwapchain) {
5433         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5434         return WINED3DERR_INVALIDCALL;
5435     } else if(dstSwapchain && srcSwapchain) {
5436         FIXME("Implement hardware blit between two different swapchains\n");
5437         return WINED3DERR_INVALIDCALL;
5438     }
5439     else if (dstSwapchain)
5440     {
5441         /* Handled with regular texture -> swapchain blit */
5442         if (src_surface == device->fb.render_targets[0])
5443             TRACE("Blit from active render target to a swapchain\n");
5444     }
5445     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5446     {
5447         FIXME("Implement blit from a swapchain to the active render target\n");
5448         return WINED3DERR_INVALIDCALL;
5449     }
5450
5451     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5452     {
5453         /* Blit from render target to texture */
5454         BOOL stretchx;
5455
5456         /* P8 read back is not implemented */
5457         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5458                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5459         {
5460             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5461             return WINED3DERR_INVALIDCALL;
5462         }
5463
5464         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5465         {
5466             TRACE("Color keying not supported by frame buffer to texture blit\n");
5467             return WINED3DERR_INVALIDCALL;
5468             /* Destination color key is checked above */
5469         }
5470
5471         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5472             stretchx = TRUE;
5473         else
5474             stretchx = FALSE;
5475
5476         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5477          * flip the image nor scale it.
5478          *
5479          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5480          * -> If the app wants a image width an unscaled width, copy it line per line
5481          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5482          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5483          *    back buffer. This is slower than reading line per line, thus not used for flipping
5484          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5485          *    pixel by pixel. */
5486         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5487                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5488         {
5489             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5490             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5491         } else {
5492             TRACE("Using hardware stretching to flip / stretch the texture\n");
5493             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5494         }
5495
5496         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5497         {
5498             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5499             dst_surface->resource.allocatedMemory = NULL;
5500             dst_surface->resource.heapMemory = NULL;
5501         }
5502         else
5503         {
5504             dst_surface->flags &= ~SFLAG_INSYSMEM;
5505         }
5506
5507         return WINED3D_OK;
5508     }
5509     else if (src_surface)
5510     {
5511         /* Blit from offscreen surface to render target */
5512         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5513         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5514
5515         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5516
5517         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5518                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5519                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5520         {
5521             FIXME("Unsupported blit operation falling back to software\n");
5522             return WINED3DERR_INVALIDCALL;
5523         }
5524
5525         /* Color keying: Check if we have to do a color keyed blt,
5526          * and if not check if a color key is activated.
5527          *
5528          * Just modify the color keying parameters in the surface and restore them afterwards
5529          * The surface keeps track of the color key last used to load the opengl surface.
5530          * PreLoad will catch the change to the flags and color key and reload if necessary.
5531          */
5532         if (flags & WINEDDBLT_KEYSRC)
5533         {
5534             /* Use color key from surface */
5535         }
5536         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5537         {
5538             /* Use color key from DDBltFx */
5539             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5540             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5541         }
5542         else
5543         {
5544             /* Do not use color key */
5545             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5546         }
5547
5548         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5549                 src_surface, src_rect, dst_surface, dst_rect);
5550
5551         /* Restore the color key parameters */
5552         src_surface->CKeyFlags = oldCKeyFlags;
5553         src_surface->SrcBltCKey = oldBltCKey;
5554
5555         surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
5556
5557         return WINED3D_OK;
5558     }
5559
5560     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5561     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5562     return WINED3DERR_INVALIDCALL;
5563 }
5564
5565 /* GL locking is done by the caller */
5566 static void surface_depth_blt(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
5567         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5568 {
5569     struct wined3d_device *device = surface->resource.device;
5570     GLint compare_mode = GL_NONE;
5571     struct blt_info info;
5572     GLint old_binding = 0;
5573     RECT rect;
5574
5575     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5576
5577     glDisable(GL_CULL_FACE);
5578     glDisable(GL_BLEND);
5579     glDisable(GL_ALPHA_TEST);
5580     glDisable(GL_SCISSOR_TEST);
5581     glDisable(GL_STENCIL_TEST);
5582     glEnable(GL_DEPTH_TEST);
5583     glDepthFunc(GL_ALWAYS);
5584     glDepthMask(GL_TRUE);
5585     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5586     glViewport(x, y, w, h);
5587
5588     SetRect(&rect, 0, h, w, 0);
5589     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5590     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
5591     glGetIntegerv(info.binding, &old_binding);
5592     glBindTexture(info.bind_target, texture);
5593     if (gl_info->supported[ARB_SHADOW])
5594     {
5595         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5596         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5597     }
5598
5599     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5600             gl_info, info.tex_type, &surface->ds_current_size);
5601
5602     glBegin(GL_TRIANGLE_STRIP);
5603     glTexCoord3fv(info.coords[0]);
5604     glVertex2f(-1.0f, -1.0f);
5605     glTexCoord3fv(info.coords[1]);
5606     glVertex2f(1.0f, -1.0f);
5607     glTexCoord3fv(info.coords[2]);
5608     glVertex2f(-1.0f, 1.0f);
5609     glTexCoord3fv(info.coords[3]);
5610     glVertex2f(1.0f, 1.0f);
5611     glEnd();
5612
5613     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5614     glBindTexture(info.bind_target, old_binding);
5615
5616     glPopAttrib();
5617
5618     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5619 }
5620
5621 void surface_modify_ds_location(struct wined3d_surface *surface,
5622         DWORD location, UINT w, UINT h)
5623 {
5624     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5625
5626     if (location & ~SFLAG_DS_LOCATIONS)
5627         FIXME("Invalid location (%#x) specified.\n", location);
5628
5629     surface->ds_current_size.cx = w;
5630     surface->ds_current_size.cy = h;
5631     surface->flags &= ~SFLAG_DS_LOCATIONS;
5632     surface->flags |= location;
5633 }
5634
5635 /* Context activation is done by the caller. */
5636 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5637 {
5638     struct wined3d_device *device = surface->resource.device;
5639     const struct wined3d_gl_info *gl_info = context->gl_info;
5640     GLsizei w, h;
5641
5642     TRACE("surface %p, new location %#x.\n", surface, location);
5643
5644     /* TODO: Make this work for modes other than FBO */
5645     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5646
5647     if (!(surface->flags & location))
5648     {
5649         w = surface->ds_current_size.cx;
5650         h = surface->ds_current_size.cy;
5651         surface->ds_current_size.cx = 0;
5652         surface->ds_current_size.cy = 0;
5653     }
5654     else
5655     {
5656         w = surface->resource.width;
5657         h = surface->resource.height;
5658     }
5659
5660     if (surface->ds_current_size.cx == surface->resource.width
5661             && surface->ds_current_size.cy == surface->resource.height)
5662     {
5663         TRACE("Location (%#x) is already up to date.\n", location);
5664         return;
5665     }
5666
5667     if (surface->current_renderbuffer)
5668     {
5669         FIXME("Not supported with fixed up depth stencil.\n");
5670         return;
5671     }
5672
5673     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5674     {
5675         /* This mostly happens when a depth / stencil is used without being
5676          * cleared first. In principle we could upload from sysmem, or
5677          * explicitly clear before first usage. For the moment there don't
5678          * appear to be a lot of applications depending on this, so a FIXME
5679          * should do. */
5680         FIXME("No up to date depth stencil location.\n");
5681         surface->flags |= location;
5682         surface->ds_current_size.cx = surface->resource.width;
5683         surface->ds_current_size.cy = surface->resource.height;
5684         return;
5685     }
5686
5687     if (location == SFLAG_DS_OFFSCREEN)
5688     {
5689         GLint old_binding = 0;
5690         GLenum bind_target;
5691
5692         /* The render target is allowed to be smaller than the depth/stencil
5693          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5694          * than the offscreen surface. Don't overwrite the offscreen surface
5695          * with undefined data. */
5696         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5697         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5698
5699         TRACE("Copying onscreen depth buffer to depth texture.\n");
5700
5701         ENTER_GL();
5702
5703         if (!device->depth_blt_texture)
5704         {
5705             glGenTextures(1, &device->depth_blt_texture);
5706         }
5707
5708         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5709          * directly on the FBO texture. That's because we need to flip. */
5710         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5711                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5712         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5713         {
5714             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5715             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5716         }
5717         else
5718         {
5719             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5720             bind_target = GL_TEXTURE_2D;
5721         }
5722         glBindTexture(bind_target, device->depth_blt_texture);
5723         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5724          * internal format, because the internal format might include stencil
5725          * data. In principle we should copy stencil data as well, but unless
5726          * the driver supports stencil export it's hard to do, and doesn't
5727          * seem to be needed in practice. If the hardware doesn't support
5728          * writing stencil data, the glCopyTexImage2D() call might trigger
5729          * software fallbacks. */
5730         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5731         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5732         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5733         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5734         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5735         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5736         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5737         glBindTexture(bind_target, old_binding);
5738
5739         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5740                 NULL, surface, SFLAG_INTEXTURE);
5741         context_set_draw_buffer(context, GL_NONE);
5742         glReadBuffer(GL_NONE);
5743
5744         /* Do the actual blit */
5745         surface_depth_blt(surface, gl_info, device->depth_blt_texture, 0, 0, w, h, bind_target);
5746         checkGLcall("depth_blt");
5747
5748         context_invalidate_state(context, STATE_FRAMEBUFFER);
5749
5750         LEAVE_GL();
5751
5752         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5753     }
5754     else if (location == SFLAG_DS_ONSCREEN)
5755     {
5756         TRACE("Copying depth texture to onscreen depth buffer.\n");
5757
5758         ENTER_GL();
5759
5760         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5761                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5762         surface_depth_blt(surface, gl_info, surface->texture_name,
5763                 0, surface->pow2Height - h, w, h, surface->texture_target);
5764         checkGLcall("depth_blt");
5765
5766         context_invalidate_state(context, STATE_FRAMEBUFFER);
5767
5768         LEAVE_GL();
5769
5770         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5771     }
5772     else
5773     {
5774         ERR("Invalid location (%#x) specified.\n", location);
5775     }
5776
5777     surface->flags |= location;
5778     surface->ds_current_size.cx = surface->resource.width;
5779     surface->ds_current_size.cy = surface->resource.height;
5780 }
5781
5782 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5783 {
5784     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5785     struct wined3d_surface *overlay;
5786
5787     TRACE("surface %p, location %s, persistent %#x.\n",
5788             surface, debug_surflocation(location), persistent);
5789
5790     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
5791     {
5792         if (surface_is_offscreen(surface))
5793         {
5794             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
5795              * for offscreen targets. */
5796             if (location & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE))
5797                 location |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
5798         }
5799         else
5800         {
5801             TRACE("Surface %p is an onscreen surface.\n", surface);
5802         }
5803     }
5804
5805     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5806             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5807         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5808
5809     if (persistent)
5810     {
5811         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5812                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5813         {
5814             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5815             {
5816                 TRACE("Passing to container.\n");
5817                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5818             }
5819         }
5820         surface->flags &= ~SFLAG_LOCATIONS;
5821         surface->flags |= location;
5822
5823         /* Redraw emulated overlays, if any */
5824         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5825         {
5826             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5827             {
5828                 overlay->surface_ops->surface_draw_overlay(overlay);
5829             }
5830         }
5831     }
5832     else
5833     {
5834         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5835         {
5836             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5837             {
5838                 TRACE("Passing to container\n");
5839                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5840             }
5841         }
5842         surface->flags &= ~location;
5843     }
5844
5845     if (!(surface->flags & SFLAG_LOCATIONS))
5846     {
5847         ERR("Surface %p does not have any up to date location.\n", surface);
5848     }
5849 }
5850
5851 static DWORD resource_access_from_location(DWORD location)
5852 {
5853     switch (location)
5854     {
5855         case SFLAG_INSYSMEM:
5856             return WINED3D_RESOURCE_ACCESS_CPU;
5857
5858         case SFLAG_INDRAWABLE:
5859         case SFLAG_INSRGBTEX:
5860         case SFLAG_INTEXTURE:
5861             return WINED3D_RESOURCE_ACCESS_GPU;
5862
5863         default:
5864             FIXME("Unhandled location %#x.\n", location);
5865             return 0;
5866     }
5867 }
5868
5869 static void surface_load_sysmem(struct wined3d_surface *surface,
5870         const struct wined3d_gl_info *gl_info, const RECT *rect)
5871 {
5872     surface_prepare_system_memory(surface);
5873
5874     /* Download the surface to system memory. */
5875     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5876     {
5877         struct wined3d_device *device = surface->resource.device;
5878         struct wined3d_context *context = NULL;
5879
5880         if (!device->isInDraw)
5881             context = context_acquire(device, NULL);
5882
5883         surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
5884         surface_download_data(surface, gl_info);
5885
5886         if (context)
5887             context_release(context);
5888
5889         return;
5890     }
5891
5892     /* Note: It might be faster to download into a texture first. */
5893     read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5894             wined3d_surface_get_pitch(surface));
5895 }
5896
5897 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5898         const struct wined3d_gl_info *gl_info, const RECT *rect)
5899 {
5900     struct wined3d_device *device = surface->resource.device;
5901     struct wined3d_format format;
5902     CONVERT_TYPES convert;
5903     UINT byte_count;
5904     BYTE *mem;
5905
5906     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5907         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5908
5909     if (surface->flags & SFLAG_INTEXTURE)
5910     {
5911         RECT r;
5912
5913         surface_get_rect(surface, rect, &r);
5914         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5915
5916         return WINED3D_OK;
5917     }
5918
5919     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5920     {
5921         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5922          * path through sysmem. */
5923         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5924     }
5925
5926     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5927
5928     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5929      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5930      * called. */
5931     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5932     {
5933         struct wined3d_context *context = NULL;
5934
5935         TRACE("Removing the pbo attached to surface %p.\n", surface);
5936
5937         if (!device->isInDraw)
5938             context = context_acquire(device, NULL);
5939
5940         surface_remove_pbo(surface, gl_info);
5941
5942         if (context)
5943             context_release(context);
5944     }
5945
5946     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5947     {
5948         UINT height = surface->resource.height;
5949         UINT width = surface->resource.width;
5950         UINT src_pitch, dst_pitch;
5951
5952         byte_count = format.conv_byte_count;
5953         src_pitch = wined3d_surface_get_pitch(surface);
5954
5955         /* Stick to the alignment for the converted surface too, makes it
5956          * easier to load the surface. */
5957         dst_pitch = width * byte_count;
5958         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5959
5960         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5961         {
5962             ERR("Out of memory (%u).\n", dst_pitch * height);
5963             return E_OUTOFMEMORY;
5964         }
5965
5966         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5967                 src_pitch, width, height, dst_pitch, convert, surface);
5968
5969         surface->flags |= SFLAG_CONVERTED;
5970     }
5971     else
5972     {
5973         surface->flags &= ~SFLAG_CONVERTED;
5974         mem = surface->resource.allocatedMemory;
5975         byte_count = format.byte_count;
5976     }
5977
5978     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5979
5980     /* Don't delete PBO memory. */
5981     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5982         HeapFree(GetProcessHeap(), 0, mem);
5983
5984     return WINED3D_OK;
5985 }
5986
5987 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5988         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5989 {
5990     const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
5991     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5992     struct wined3d_device *device = surface->resource.device;
5993     struct wined3d_context *context = NULL;
5994     UINT width, src_pitch, dst_pitch;
5995     struct wined3d_bo_address data;
5996     struct wined3d_format format;
5997     POINT dst_point = {0, 0};
5998     CONVERT_TYPES convert;
5999     BYTE *mem;
6000
6001     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
6002             && surface_is_offscreen(surface)
6003             && (surface->flags & SFLAG_INDRAWABLE))
6004     {
6005         read_from_framebuffer_texture(surface, srgb);
6006
6007         return WINED3D_OK;
6008     }
6009
6010     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
6011             && (surface->resource.format->flags & attach_flags) == attach_flags
6012             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6013                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6014                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6015     {
6016         if (srgb)
6017             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6018                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6019         else
6020             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6021                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6022
6023         return WINED3D_OK;
6024     }
6025
6026     /* Upload from system memory */
6027
6028     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6029             TRUE /* We will use textures */, &format, &convert);
6030
6031     if (srgb)
6032     {
6033         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6034         {
6035             /* Performance warning... */
6036             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6037             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6038         }
6039     }
6040     else
6041     {
6042         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6043         {
6044             /* Performance warning... */
6045             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6046             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6047         }
6048     }
6049
6050     if (!(surface->flags & SFLAG_INSYSMEM))
6051     {
6052         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6053         /* Lets hope we get it from somewhere... */
6054         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6055     }
6056
6057     if (!device->isInDraw)
6058         context = context_acquire(device, NULL);
6059
6060     surface_prepare_texture(surface, gl_info, srgb);
6061     surface_bind_and_dirtify(surface, gl_info, srgb);
6062
6063     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6064     {
6065         surface->flags |= SFLAG_GLCKEY;
6066         surface->glCKey = surface->SrcBltCKey;
6067     }
6068     else surface->flags &= ~SFLAG_GLCKEY;
6069
6070     width = surface->resource.width;
6071     src_pitch = wined3d_surface_get_pitch(surface);
6072
6073     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6074      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6075      * called. */
6076     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6077     {
6078         TRACE("Removing the pbo attached to surface %p.\n", surface);
6079         surface_remove_pbo(surface, gl_info);
6080     }
6081
6082     if (format.convert)
6083     {
6084         /* This code is entered for texture formats which need a fixup. */
6085         UINT height = surface->resource.height;
6086
6087         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6088         dst_pitch = width * format.conv_byte_count;
6089         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6090
6091         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6092         {
6093             ERR("Out of memory (%u).\n", dst_pitch * height);
6094             if (context)
6095                 context_release(context);
6096             return E_OUTOFMEMORY;
6097         }
6098         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6099     }
6100     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6101     {
6102         /* This code is only entered for color keying fixups */
6103         UINT height = surface->resource.height;
6104
6105         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6106         dst_pitch = width * format.conv_byte_count;
6107         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6108
6109         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6110         {
6111             ERR("Out of memory (%u).\n", dst_pitch * height);
6112             if (context)
6113                 context_release(context);
6114             return E_OUTOFMEMORY;
6115         }
6116         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6117                 width, height, dst_pitch, convert, surface);
6118     }
6119     else
6120     {
6121         mem = surface->resource.allocatedMemory;
6122     }
6123
6124     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6125     data.addr = mem;
6126     surface_upload_data(surface, gl_info, &format, &src_rect, width, &dst_point, srgb, &data);
6127
6128     if (context)
6129         context_release(context);
6130
6131     /* Don't delete PBO memory. */
6132     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6133         HeapFree(GetProcessHeap(), 0, mem);
6134
6135     return WINED3D_OK;
6136 }
6137
6138 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6139 {
6140     struct wined3d_device *device = surface->resource.device;
6141     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6142     BOOL in_fbo = FALSE;
6143     HRESULT hr;
6144
6145     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6146
6147     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6148     {
6149         if (location == SFLAG_INTEXTURE)
6150         {
6151             struct wined3d_context *context = context_acquire(device, NULL);
6152             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6153             context_release(context);
6154             return WINED3D_OK;
6155         }
6156         else
6157         {
6158             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6159             return WINED3DERR_INVALIDCALL;
6160         }
6161     }
6162
6163     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6164     {
6165         if (surface_is_offscreen(surface))
6166         {
6167             /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same
6168              * for offscreen targets. Prefer SFLAG_INTEXTURE. */
6169             if (location == SFLAG_INDRAWABLE)
6170                 location = SFLAG_INTEXTURE;
6171             in_fbo = TRUE;
6172         }
6173         else
6174         {
6175             TRACE("Surface %p is an onscreen surface.\n", surface);
6176         }
6177     }
6178
6179     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6180         location = SFLAG_INTEXTURE;
6181
6182     if (surface->flags & location)
6183     {
6184         TRACE("Location already up to date.\n");
6185         return WINED3D_OK;
6186     }
6187
6188     if (WARN_ON(d3d_surface))
6189     {
6190         DWORD required_access = resource_access_from_location(location);
6191         if ((surface->resource.access_flags & required_access) != required_access)
6192             WARN("Operation requires %#x access, but surface only has %#x.\n",
6193                     required_access, surface->resource.access_flags);
6194     }
6195
6196     if (!(surface->flags & SFLAG_LOCATIONS))
6197     {
6198         ERR("Surface %p does not have any up to date location.\n", surface);
6199         surface->flags |= SFLAG_LOST;
6200         return WINED3DERR_DEVICELOST;
6201     }
6202
6203     switch (location)
6204     {
6205         case SFLAG_INSYSMEM:
6206             surface_load_sysmem(surface, gl_info, rect);
6207             break;
6208
6209         case SFLAG_INDRAWABLE:
6210             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6211                 return hr;
6212             break;
6213
6214         case SFLAG_INTEXTURE:
6215         case SFLAG_INSRGBTEX:
6216             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6217                 return hr;
6218             break;
6219
6220         default:
6221             ERR("Don't know how to handle location %#x.\n", location);
6222             break;
6223     }
6224
6225     if (!rect)
6226     {
6227         surface->flags |= location;
6228
6229         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6230             surface_evict_sysmem(surface);
6231     }
6232
6233     if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
6234     {
6235         /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
6236         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
6237     }
6238
6239     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6240             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6241     {
6242         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6243     }
6244
6245     return WINED3D_OK;
6246 }
6247
6248 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6249 {
6250     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6251
6252     /* Not on a swapchain - must be offscreen */
6253     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6254
6255     /* The front buffer is always onscreen */
6256     if (surface == swapchain->front_buffer) return FALSE;
6257
6258     /* If the swapchain is rendered to an FBO, the backbuffer is
6259      * offscreen, otherwise onscreen */
6260     return swapchain->render_to_fbo;
6261 }
6262
6263 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6264 /* Context activation is done by the caller. */
6265 static void ffp_blit_free(struct wined3d_device *device) { }
6266
6267 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6268 /* Context activation is done by the caller. */
6269 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6270 {
6271     BYTE table[256][4];
6272     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6273
6274     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6275
6276     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6277     ENTER_GL();
6278     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6279     LEAVE_GL();
6280 }
6281
6282 /* Context activation is done by the caller. */
6283 static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6284 {
6285     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6286
6287     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6288      * else the surface is converted in software at upload time in LoadLocation.
6289      */
6290     if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6291         ffp_blit_p8_upload_palette(surface, gl_info);
6292
6293     ENTER_GL();
6294     glEnable(surface->texture_target);
6295     checkGLcall("glEnable(surface->texture_target)");
6296     LEAVE_GL();
6297     return WINED3D_OK;
6298 }
6299
6300 /* Context activation is done by the caller. */
6301 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6302 {
6303     ENTER_GL();
6304     glDisable(GL_TEXTURE_2D);
6305     checkGLcall("glDisable(GL_TEXTURE_2D)");
6306     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6307     {
6308         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6309         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6310     }
6311     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6312     {
6313         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6314         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6315     }
6316     LEAVE_GL();
6317 }
6318
6319 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6320         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6321         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6322 {
6323     enum complex_fixup src_fixup;
6324
6325     switch (blit_op)
6326     {
6327         case WINED3D_BLIT_OP_COLOR_BLIT:
6328             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6329                 return FALSE;
6330
6331             src_fixup = get_complex_fixup(src_format->color_fixup);
6332             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6333             {
6334                 TRACE("Checking support for fixup:\n");
6335                 dump_color_fixup_desc(src_format->color_fixup);
6336             }
6337
6338             if (!is_identity_fixup(dst_format->color_fixup))
6339             {
6340                 TRACE("Destination fixups are not supported\n");
6341                 return FALSE;
6342             }
6343
6344             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6345             {
6346                 TRACE("P8 fixup supported\n");
6347                 return TRUE;
6348             }
6349
6350             /* We only support identity conversions. */
6351             if (is_identity_fixup(src_format->color_fixup))
6352             {
6353                 TRACE("[OK]\n");
6354                 return TRUE;
6355             }
6356
6357             TRACE("[FAILED]\n");
6358             return FALSE;
6359
6360         case WINED3D_BLIT_OP_COLOR_FILL:
6361             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6362                 return FALSE;
6363
6364             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6365             {
6366                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6367                     return FALSE;
6368             }
6369             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6370             {
6371                 TRACE("Color fill not supported\n");
6372                 return FALSE;
6373             }
6374
6375             /* FIXME: We should reject color fills on formats with fixups,
6376              * but this would break P8 color fills for example. */
6377
6378             return TRUE;
6379
6380         case WINED3D_BLIT_OP_DEPTH_FILL:
6381             return TRUE;
6382
6383         default:
6384             TRACE("Unsupported blit_op=%d\n", blit_op);
6385             return FALSE;
6386     }
6387 }
6388
6389 /* Do not call while under the GL lock. */
6390 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6391         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6392 {
6393     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6394     struct wined3d_fb_state fb = {&dst_surface, NULL};
6395
6396     return device_clear_render_targets(device, 1, &fb,
6397             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6398 }
6399
6400 /* Do not call while under the GL lock. */
6401 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6402         struct wined3d_surface *surface, const RECT *rect, float depth)
6403 {
6404     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6405     struct wined3d_fb_state fb = {NULL, surface};
6406
6407     return device_clear_render_targets(device, 0, &fb,
6408             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6409 }
6410
6411 const struct blit_shader ffp_blit =  {
6412     ffp_blit_alloc,
6413     ffp_blit_free,
6414     ffp_blit_set,
6415     ffp_blit_unset,
6416     ffp_blit_supported,
6417     ffp_blit_color_fill,
6418     ffp_blit_depth_fill,
6419 };
6420
6421 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6422 {
6423     return WINED3D_OK;
6424 }
6425
6426 /* Context activation is done by the caller. */
6427 static void cpu_blit_free(struct wined3d_device *device)
6428 {
6429 }
6430
6431 /* Context activation is done by the caller. */
6432 static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, struct wined3d_surface *surface)
6433 {
6434     return WINED3D_OK;
6435 }
6436
6437 /* Context activation is done by the caller. */
6438 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6439 {
6440 }
6441
6442 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6443         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6444         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6445 {
6446     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6447     {
6448         return TRUE;
6449     }
6450
6451     return FALSE;
6452 }
6453
6454 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6455         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6456         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6457 {
6458     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6459     const struct wined3d_format *src_format, *dst_format;
6460     struct wined3d_surface *orig_src = src_surface;
6461     WINED3DLOCKED_RECT dlock, slock;
6462     HRESULT hr = WINED3D_OK;
6463     const BYTE *sbuf;
6464     RECT xdst,xsrc;
6465     BYTE *dbuf;
6466     int x, y;
6467
6468     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6469             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6470             flags, fx, debug_d3dtexturefiltertype(filter));
6471
6472     xsrc = *src_rect;
6473
6474     if (!src_surface)
6475     {
6476         RECT full_rect;
6477
6478         full_rect.left = 0;
6479         full_rect.top = 0;
6480         full_rect.right = dst_surface->resource.width;
6481         full_rect.bottom = dst_surface->resource.height;
6482         IntersectRect(&xdst, &full_rect, dst_rect);
6483     }
6484     else
6485     {
6486         BOOL clip_horiz, clip_vert;
6487
6488         xdst = *dst_rect;
6489         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6490         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6491
6492         if (clip_vert || clip_horiz)
6493         {
6494             /* Now check if this is a special case or not... */
6495             if ((flags & WINEDDBLT_DDFX)
6496                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6497                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6498             {
6499                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6500                 return WINED3D_OK;
6501             }
6502
6503             if (clip_horiz)
6504             {
6505                 if (xdst.left < 0)
6506                 {
6507                     xsrc.left -= xdst.left;
6508                     xdst.left = 0;
6509                 }
6510                 if (xdst.right > dst_surface->resource.width)
6511                 {
6512                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6513                     xdst.right = (int)dst_surface->resource.width;
6514                 }
6515             }
6516
6517             if (clip_vert)
6518             {
6519                 if (xdst.top < 0)
6520                 {
6521                     xsrc.top -= xdst.top;
6522                     xdst.top = 0;
6523                 }
6524                 if (xdst.bottom > dst_surface->resource.height)
6525                 {
6526                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6527                     xdst.bottom = (int)dst_surface->resource.height;
6528                 }
6529             }
6530
6531             /* And check if after clipping something is still to be done... */
6532             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6533                     || (xdst.left >= (int)dst_surface->resource.width)
6534                     || (xdst.top >= (int)dst_surface->resource.height)
6535                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6536                     || (xsrc.left >= (int)src_surface->resource.width)
6537                     || (xsrc.top >= (int)src_surface->resource.height))
6538             {
6539                 TRACE("Nothing to be done after clipping.\n");
6540                 return WINED3D_OK;
6541             }
6542         }
6543     }
6544
6545     if (src_surface == dst_surface)
6546     {
6547         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6548         slock = dlock;
6549         src_format = dst_surface->resource.format;
6550         dst_format = src_format;
6551     }
6552     else
6553     {
6554         dst_format = dst_surface->resource.format;
6555         if (src_surface)
6556         {
6557             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6558             {
6559                 src_surface = surface_convert_format(src_surface, dst_format->id);
6560                 if (!src_surface)
6561                 {
6562                     /* The conv function writes a FIXME */
6563                     WARN("Cannot convert source surface format to dest format.\n");
6564                     goto release;
6565                 }
6566             }
6567             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6568             src_format = src_surface->resource.format;
6569         }
6570         else
6571         {
6572             src_format = dst_format;
6573         }
6574         if (dst_rect)
6575             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6576         else
6577             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6578     }
6579
6580     bpp = dst_surface->resource.format->byte_count;
6581     srcheight = xsrc.bottom - xsrc.top;
6582     srcwidth = xsrc.right - xsrc.left;
6583     dstheight = xdst.bottom - xdst.top;
6584     dstwidth = xdst.right - xdst.left;
6585     width = (xdst.right - xdst.left) * bpp;
6586
6587     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6588     {
6589         UINT row_block_count;
6590
6591         if (flags || src_surface == dst_surface)
6592         {
6593             FIXME("Only plain blits supported on compressed surfaces.\n");
6594             hr = E_NOTIMPL;
6595             goto release;
6596         }
6597
6598         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6599
6600         if (srcheight != dstheight || srcwidth != dstwidth)
6601         {
6602             WARN("Stretching not supported on compressed surfaces.\n");
6603             hr = WINED3DERR_INVALIDCALL;
6604             goto release;
6605         }
6606
6607         dbuf = dlock.pBits;
6608         sbuf = slock.pBits;
6609
6610         row_block_count = (dstwidth + dst_format->block_width - 1) / dst_format->block_width;
6611         for (y = 0; y < dstheight; y += dst_format->block_height)
6612         {
6613             memcpy(dbuf, sbuf, row_block_count * dst_format->block_byte_count);
6614             dbuf += dlock.Pitch;
6615             sbuf += slock.Pitch;
6616         }
6617
6618         goto release;
6619     }
6620
6621     if (dst_rect && src_surface != dst_surface)
6622         dbuf = dlock.pBits;
6623     else
6624         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6625
6626     /* First, all the 'source-less' blits */
6627     if (flags & WINEDDBLT_COLORFILL)
6628     {
6629         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6630         flags &= ~WINEDDBLT_COLORFILL;
6631     }
6632
6633     if (flags & WINEDDBLT_DEPTHFILL)
6634     {
6635         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6636     }
6637     if (flags & WINEDDBLT_ROP)
6638     {
6639         /* Catch some degenerate cases here. */
6640         switch (fx->dwROP)
6641         {
6642             case BLACKNESS:
6643                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6644                 break;
6645             case 0xAA0029: /* No-op */
6646                 break;
6647             case WHITENESS:
6648                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6649                 break;
6650             case SRCCOPY: /* Well, we do that below? */
6651                 break;
6652             default:
6653                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6654                 goto error;
6655         }
6656         flags &= ~WINEDDBLT_ROP;
6657     }
6658     if (flags & WINEDDBLT_DDROPS)
6659     {
6660         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6661     }
6662     /* Now the 'with source' blits. */
6663     if (src_surface)
6664     {
6665         const BYTE *sbase;
6666         int sx, xinc, sy, yinc;
6667
6668         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6669             goto release;
6670
6671         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6672                 && (srcwidth != dstwidth || srcheight != dstheight))
6673         {
6674             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6675             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6676         }
6677
6678         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6679         xinc = (srcwidth << 16) / dstwidth;
6680         yinc = (srcheight << 16) / dstheight;
6681
6682         if (!flags)
6683         {
6684             /* No effects, we can cheat here. */
6685             if (dstwidth == srcwidth)
6686             {
6687                 if (dstheight == srcheight)
6688                 {
6689                     /* No stretching in either direction. This needs to be as
6690                      * fast as possible. */
6691                     sbuf = sbase;
6692
6693                     /* Check for overlapping surfaces. */
6694                     if (src_surface != dst_surface || xdst.top < xsrc.top
6695                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6696                     {
6697                         /* No overlap, or dst above src, so copy from top downwards. */
6698                         for (y = 0; y < dstheight; ++y)
6699                         {
6700                             memcpy(dbuf, sbuf, width);
6701                             sbuf += slock.Pitch;
6702                             dbuf += dlock.Pitch;
6703                         }
6704                     }
6705                     else if (xdst.top > xsrc.top)
6706                     {
6707                         /* Copy from bottom upwards. */
6708                         sbuf += (slock.Pitch*dstheight);
6709                         dbuf += (dlock.Pitch*dstheight);
6710                         for (y = 0; y < dstheight; ++y)
6711                         {
6712                             sbuf -= slock.Pitch;
6713                             dbuf -= dlock.Pitch;
6714                             memcpy(dbuf, sbuf, width);
6715                         }
6716                     }
6717                     else
6718                     {
6719                         /* Src and dst overlapping on the same line, use memmove. */
6720                         for (y = 0; y < dstheight; ++y)
6721                         {
6722                             memmove(dbuf, sbuf, width);
6723                             sbuf += slock.Pitch;
6724                             dbuf += dlock.Pitch;
6725                         }
6726                     }
6727                 }
6728                 else
6729                 {
6730                     /* Stretching in y direction only. */
6731                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6732                     {
6733                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6734                         memcpy(dbuf, sbuf, width);
6735                         dbuf += dlock.Pitch;
6736                     }
6737                 }
6738             }
6739             else
6740             {
6741                 /* Stretching in X direction. */
6742                 int last_sy = -1;
6743                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6744                 {
6745                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6746
6747                     if ((sy >> 16) == (last_sy >> 16))
6748                     {
6749                         /* This source row is the same as last source row -
6750                          * Copy the already stretched row. */
6751                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6752                     }
6753                     else
6754                     {
6755 #define STRETCH_ROW(type) \
6756 do { \
6757     const type *s = (const type *)sbuf; \
6758     type *d = (type *)dbuf; \
6759     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6760         d[x] = s[sx >> 16]; \
6761 } while(0)
6762
6763                         switch(bpp)
6764                         {
6765                             case 1:
6766                                 STRETCH_ROW(BYTE);
6767                                 break;
6768                             case 2:
6769                                 STRETCH_ROW(WORD);
6770                                 break;
6771                             case 4:
6772                                 STRETCH_ROW(DWORD);
6773                                 break;
6774                             case 3:
6775                             {
6776                                 const BYTE *s;
6777                                 BYTE *d = dbuf;
6778                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6779                                 {
6780                                     DWORD pixel;
6781
6782                                     s = sbuf + 3 * (sx >> 16);
6783                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6784                                     d[0] = (pixel      ) & 0xff;
6785                                     d[1] = (pixel >>  8) & 0xff;
6786                                     d[2] = (pixel >> 16) & 0xff;
6787                                     d += 3;
6788                                 }
6789                                 break;
6790                             }
6791                             default:
6792                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6793                                 hr = WINED3DERR_NOTAVAILABLE;
6794                                 goto error;
6795                         }
6796 #undef STRETCH_ROW
6797                     }
6798                     dbuf += dlock.Pitch;
6799                     last_sy = sy;
6800                 }
6801             }
6802         }
6803         else
6804         {
6805             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6806             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6807             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6808             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6809             {
6810                 /* The color keying flags are checked for correctness in ddraw */
6811                 if (flags & WINEDDBLT_KEYSRC)
6812                 {
6813                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6814                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6815                 }
6816                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6817                 {
6818                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6819                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6820                 }
6821
6822                 if (flags & WINEDDBLT_KEYDEST)
6823                 {
6824                     /* Destination color keys are taken from the source surface! */
6825                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6826                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6827                 }
6828                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6829                 {
6830                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6831                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6832                 }
6833
6834                 if (bpp == 1)
6835                 {
6836                     keymask = 0xff;
6837                 }
6838                 else
6839                 {
6840                     keymask = src_format->red_mask
6841                             | src_format->green_mask
6842                             | src_format->blue_mask;
6843                 }
6844                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6845             }
6846
6847             if (flags & WINEDDBLT_DDFX)
6848             {
6849                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6850                 LONG tmpxy;
6851                 dTopLeft     = dbuf;
6852                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6853                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6854                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6855
6856                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6857                 {
6858                     /* I don't think we need to do anything about this flag */
6859                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6860                 }
6861                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6862                 {
6863                     tmp          = dTopRight;
6864                     dTopRight    = dTopLeft;
6865                     dTopLeft     = tmp;
6866                     tmp          = dBottomRight;
6867                     dBottomRight = dBottomLeft;
6868                     dBottomLeft  = tmp;
6869                     dstxinc = dstxinc * -1;
6870                 }
6871                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6872                 {
6873                     tmp          = dTopLeft;
6874                     dTopLeft     = dBottomLeft;
6875                     dBottomLeft  = tmp;
6876                     tmp          = dTopRight;
6877                     dTopRight    = dBottomRight;
6878                     dBottomRight = tmp;
6879                     dstyinc = dstyinc * -1;
6880                 }
6881                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6882                 {
6883                     /* I don't think we need to do anything about this flag */
6884                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6885                 }
6886                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6887                 {
6888                     tmp          = dBottomRight;
6889                     dBottomRight = dTopLeft;
6890                     dTopLeft     = tmp;
6891                     tmp          = dBottomLeft;
6892                     dBottomLeft  = dTopRight;
6893                     dTopRight    = tmp;
6894                     dstxinc = dstxinc * -1;
6895                     dstyinc = dstyinc * -1;
6896                 }
6897                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6898                 {
6899                     tmp          = dTopLeft;
6900                     dTopLeft     = dBottomLeft;
6901                     dBottomLeft  = dBottomRight;
6902                     dBottomRight = dTopRight;
6903                     dTopRight    = tmp;
6904                     tmpxy   = dstxinc;
6905                     dstxinc = dstyinc;
6906                     dstyinc = tmpxy;
6907                     dstxinc = dstxinc * -1;
6908                 }
6909                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6910                 {
6911                     tmp          = dTopLeft;
6912                     dTopLeft     = dTopRight;
6913                     dTopRight    = dBottomRight;
6914                     dBottomRight = dBottomLeft;
6915                     dBottomLeft  = tmp;
6916                     tmpxy   = dstxinc;
6917                     dstxinc = dstyinc;
6918                     dstyinc = tmpxy;
6919                     dstyinc = dstyinc * -1;
6920                 }
6921                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
6922                 {
6923                     /* I don't think we need to do anything about this flag */
6924                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
6925                 }
6926                 dbuf = dTopLeft;
6927                 flags &= ~(WINEDDBLT_DDFX);
6928             }
6929
6930 #define COPY_COLORKEY_FX(type) \
6931 do { \
6932     const type *s; \
6933     type *d = (type *)dbuf, *dx, tmp; \
6934     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
6935     { \
6936         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
6937         dx = d; \
6938         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6939         { \
6940             tmp = s[sx >> 16]; \
6941             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
6942                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
6943             { \
6944                 dx[0] = tmp; \
6945             } \
6946             dx = (type *)(((BYTE *)dx) + dstxinc); \
6947         } \
6948         d = (type *)(((BYTE *)d) + dstyinc); \
6949     } \
6950 } while(0)
6951
6952             switch (bpp)
6953             {
6954                 case 1:
6955                     COPY_COLORKEY_FX(BYTE);
6956                     break;
6957                 case 2:
6958                     COPY_COLORKEY_FX(WORD);
6959                     break;
6960                 case 4:
6961                     COPY_COLORKEY_FX(DWORD);
6962                     break;
6963                 case 3:
6964                 {
6965                     const BYTE *s;
6966                     BYTE *d = dbuf, *dx;
6967                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6968                     {
6969                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6970                         dx = d;
6971                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
6972                         {
6973                             DWORD pixel, dpixel = 0;
6974                             s = sbuf + 3 * (sx>>16);
6975                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6976                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
6977                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
6978                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
6979                             {
6980                                 dx[0] = (pixel      ) & 0xff;
6981                                 dx[1] = (pixel >>  8) & 0xff;
6982                                 dx[2] = (pixel >> 16) & 0xff;
6983                             }
6984                             dx += dstxinc;
6985                         }
6986                         d += dstyinc;
6987                     }
6988                     break;
6989                 }
6990                 default:
6991                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
6992                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
6993                     hr = WINED3DERR_NOTAVAILABLE;
6994                     goto error;
6995 #undef COPY_COLORKEY_FX
6996             }
6997         }
6998     }
6999
7000 error:
7001     if (flags && FIXME_ON(d3d_surface))
7002     {
7003         FIXME("\tUnsupported flags: %#x.\n", flags);
7004     }
7005
7006 release:
7007     wined3d_surface_unmap(dst_surface);
7008     if (src_surface && src_surface != dst_surface)
7009         wined3d_surface_unmap(src_surface);
7010     /* Release the converted surface, if any. */
7011     if (src_surface && src_surface != orig_src)
7012         wined3d_surface_decref(src_surface);
7013
7014     return hr;
7015 }
7016
7017 /* Do not call while under the GL lock. */
7018 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7019         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7020 {
7021     static const RECT src_rect;
7022     WINEDDBLTFX BltFx;
7023
7024     memset(&BltFx, 0, sizeof(BltFx));
7025     BltFx.dwSize = sizeof(BltFx);
7026     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7027     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7028             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7029 }
7030
7031 /* Do not call while under the GL lock. */
7032 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7033         struct wined3d_surface *surface, const RECT *rect, float depth)
7034 {
7035     FIXME("Depth filling not implemented by cpu_blit.\n");
7036     return WINED3DERR_INVALIDCALL;
7037 }
7038
7039 const struct blit_shader cpu_blit =  {
7040     cpu_blit_alloc,
7041     cpu_blit_free,
7042     cpu_blit_set,
7043     cpu_blit_unset,
7044     cpu_blit_supported,
7045     cpu_blit_color_fill,
7046     cpu_blit_depth_fill,
7047 };
7048
7049 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7050         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7051         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7052         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7053 {
7054     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7055     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7056     unsigned int resource_size;
7057     HRESULT hr;
7058
7059     if (multisample_quality > 0)
7060     {
7061         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7062         multisample_quality = 0;
7063     }
7064
7065     /* Quick lockable sanity check.
7066      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7067      * this function is too deep to need to care about things like this.
7068      * Levels need to be checked too, since they all affect what can be done. */
7069     switch (pool)
7070     {
7071         case WINED3DPOOL_SCRATCH:
7072             if (!lockable)
7073             {
7074                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7075                         "which are mutually exclusive, setting lockable to TRUE.\n");
7076                 lockable = TRUE;
7077             }
7078             break;
7079
7080         case WINED3DPOOL_SYSTEMMEM:
7081             if (!lockable)
7082                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7083             break;
7084
7085         case WINED3DPOOL_MANAGED:
7086             if (usage & WINED3DUSAGE_DYNAMIC)
7087                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7088             break;
7089
7090         case WINED3DPOOL_DEFAULT:
7091             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7092                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7093             break;
7094
7095         default:
7096             FIXME("Unknown pool %#x.\n", pool);
7097             break;
7098     };
7099
7100     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7101         FIXME("Trying to create a render target that isn't in the default pool.\n");
7102
7103     /* FIXME: Check that the format is supported by the device. */
7104
7105     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7106     if (!resource_size)
7107         return WINED3DERR_INVALIDCALL;
7108
7109     surface->surface_type = surface_type;
7110
7111     switch (surface_type)
7112     {
7113         case SURFACE_OPENGL:
7114             surface->surface_ops = &surface_ops;
7115             break;
7116
7117         case SURFACE_GDI:
7118             surface->surface_ops = &gdi_surface_ops;
7119             break;
7120
7121         default:
7122             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7123             return WINED3DERR_INVALIDCALL;
7124     }
7125
7126     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7127             multisample_type, multisample_quality, usage, pool, width, height, 1,
7128             resource_size, parent, parent_ops, &surface_resource_ops);
7129     if (FAILED(hr))
7130     {
7131         WARN("Failed to initialize resource, returning %#x.\n", hr);
7132         return hr;
7133     }
7134
7135     /* "Standalone" surface. */
7136     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7137
7138     surface->texture_level = level;
7139     list_init(&surface->overlays);
7140
7141     /* Flags */
7142     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7143     if (discard)
7144         surface->flags |= SFLAG_DISCARD;
7145     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7146         surface->flags |= SFLAG_LOCKABLE;
7147     /* I'm not sure if this qualifies as a hack or as an optimization. It
7148      * seems reasonable to assume that lockable render targets will get
7149      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7150      * creation. However, the other reason we want to do this is that several
7151      * ddraw applications access surface memory while the surface isn't
7152      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7153      * future locks prevents these from crashing. */
7154     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7155         surface->flags |= SFLAG_DYNLOCK;
7156
7157     /* Mark the texture as dirty so that it gets loaded first time around. */
7158     surface_add_dirty_rect(surface, NULL);
7159     list_init(&surface->renderbuffers);
7160
7161     TRACE("surface %p, memory %p, size %u\n",
7162             surface, surface->resource.allocatedMemory, surface->resource.size);
7163
7164     /* Call the private setup routine */
7165     hr = surface->surface_ops->surface_private_setup(surface);
7166     if (FAILED(hr))
7167     {
7168         ERR("Private setup failed, returning %#x\n", hr);
7169         surface->surface_ops->surface_cleanup(surface);
7170         return hr;
7171     }
7172
7173     return hr;
7174 }
7175
7176 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7177         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7178         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7179         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7180 {
7181     struct wined3d_surface *object;
7182     HRESULT hr;
7183
7184     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7185             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7186     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7187             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7188     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7189
7190     if (surface_type == SURFACE_OPENGL && !device->adapter)
7191     {
7192         ERR("OpenGL surfaces are not available without OpenGL.\n");
7193         return WINED3DERR_NOTAVAILABLE;
7194     }
7195
7196     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7197     if (!object)
7198     {
7199         ERR("Failed to allocate surface memory.\n");
7200         return WINED3DERR_OUTOFVIDEOMEMORY;
7201     }
7202
7203     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7204             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7205     if (FAILED(hr))
7206     {
7207         WARN("Failed to initialize surface, returning %#x.\n", hr);
7208         HeapFree(GetProcessHeap(), 0, object);
7209         return hr;
7210     }
7211
7212     TRACE("Created surface %p.\n", object);
7213     *surface = object;
7214
7215     return hr;
7216 }