d3d9/tests: Test partial block locks.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         /* Release the DC. */
101         SelectObject(surface->hDC, surface->dib.holdbitmap);
102         DeleteDC(surface->hDC);
103         /* Release the DIB section. */
104         DeleteObject(surface->dib.DIBsection);
105         surface->dib.bitmap_data = NULL;
106         surface->resource.allocatedMemory = NULL;
107     }
108
109     if (surface->flags & SFLAG_USERPTR)
110         wined3d_surface_set_mem(surface, NULL);
111     if (surface->overlay_dest)
112         list_remove(&surface->overlay_entry);
113
114     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
115     {
116         list_remove(&overlay->overlay_entry);
117         overlay->overlay_dest = NULL;
118     }
119
120     resource_cleanup(&surface->resource);
121 }
122
123 void surface_update_draw_binding(struct wined3d_surface *surface)
124 {
125     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
126         surface->draw_binding = SFLAG_INDRAWABLE;
127     else if (surface->resource.multisample_type)
128         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
129     else
130         surface->draw_binding = SFLAG_INTEXTURE;
131 }
132
133 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
134 {
135     TRACE("surface %p, container %p.\n", surface, container);
136
137     if (!container && type != WINED3D_CONTAINER_NONE)
138         ERR("Setting NULL container of type %#x.\n", type);
139
140     if (type == WINED3D_CONTAINER_SWAPCHAIN)
141     {
142         surface->get_drawable_size = get_drawable_size_swapchain;
143     }
144     else
145     {
146         switch (wined3d_settings.offscreen_rendering_mode)
147         {
148             case ORM_FBO:
149                 surface->get_drawable_size = get_drawable_size_fbo;
150                 break;
151
152             case ORM_BACKBUFFER:
153                 surface->get_drawable_size = get_drawable_size_backbuffer;
154                 break;
155
156             default:
157                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
158                 return;
159         }
160     }
161
162     surface->container.type = type;
163     surface->container.u.base = container;
164     surface_update_draw_binding(surface);
165 }
166
167 struct blt_info
168 {
169     GLenum binding;
170     GLenum bind_target;
171     enum tex_types tex_type;
172     GLfloat coords[4][3];
173 };
174
175 struct float_rect
176 {
177     float l;
178     float t;
179     float r;
180     float b;
181 };
182
183 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
184 {
185     f->l = ((r->left * 2.0f) / w) - 1.0f;
186     f->t = ((r->top * 2.0f) / h) - 1.0f;
187     f->r = ((r->right * 2.0f) / w) - 1.0f;
188     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
189 }
190
191 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
192 {
193     GLfloat (*coords)[3] = info->coords;
194     struct float_rect f;
195
196     switch (target)
197     {
198         default:
199             FIXME("Unsupported texture target %#x\n", target);
200             /* Fall back to GL_TEXTURE_2D */
201         case GL_TEXTURE_2D:
202             info->binding = GL_TEXTURE_BINDING_2D;
203             info->bind_target = GL_TEXTURE_2D;
204             info->tex_type = tex_2d;
205             coords[0][0] = (float)rect->left / w;
206             coords[0][1] = (float)rect->top / h;
207             coords[0][2] = 0.0f;
208
209             coords[1][0] = (float)rect->right / w;
210             coords[1][1] = (float)rect->top / h;
211             coords[1][2] = 0.0f;
212
213             coords[2][0] = (float)rect->left / w;
214             coords[2][1] = (float)rect->bottom / h;
215             coords[2][2] = 0.0f;
216
217             coords[3][0] = (float)rect->right / w;
218             coords[3][1] = (float)rect->bottom / h;
219             coords[3][2] = 0.0f;
220             break;
221
222         case GL_TEXTURE_RECTANGLE_ARB:
223             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
224             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
225             info->tex_type = tex_rect;
226             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
227             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
228             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
229             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
230             break;
231
232         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
233             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
234             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
235             info->tex_type = tex_cube;
236             cube_coords_float(rect, w, h, &f);
237
238             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
239             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
240             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
241             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
242             break;
243
244         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
245             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
246             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
247             info->tex_type = tex_cube;
248             cube_coords_float(rect, w, h, &f);
249
250             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
251             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
252             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
253             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
254             break;
255
256         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
257             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
258             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
259             info->tex_type = tex_cube;
260             cube_coords_float(rect, w, h, &f);
261
262             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
263             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
264             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
265             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
266             break;
267
268         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
269             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
270             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
271             info->tex_type = tex_cube;
272             cube_coords_float(rect, w, h, &f);
273
274             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
275             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
276             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
277             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
278             break;
279
280         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
281             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
282             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
283             info->tex_type = tex_cube;
284             cube_coords_float(rect, w, h, &f);
285
286             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
287             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
288             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
289             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
290             break;
291
292         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
293             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
294             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
295             info->tex_type = tex_cube;
296             cube_coords_float(rect, w, h, &f);
297
298             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
299             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
300             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
301             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
302             break;
303     }
304 }
305
306 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
307 {
308     if (rect_in)
309         *rect_out = *rect_in;
310     else
311     {
312         rect_out->left = 0;
313         rect_out->top = 0;
314         rect_out->right = surface->resource.width;
315         rect_out->bottom = surface->resource.height;
316     }
317 }
318
319 /* GL locking and context activation is done by the caller */
320 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
321         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
322 {
323     struct blt_info info;
324
325     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
326
327     glEnable(info.bind_target);
328     checkGLcall("glEnable(bind_target)");
329
330     context_bind_texture(context, info.bind_target, src_surface->texture_name);
331
332     /* Filtering for StretchRect */
333     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
334             wined3d_gl_mag_filter(magLookup, Filter));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
337             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
338     checkGLcall("glTexParameteri");
339     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
340     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
341     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
342         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
343     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
344     checkGLcall("glTexEnvi");
345
346     /* Draw a quad */
347     glBegin(GL_TRIANGLE_STRIP);
348     glTexCoord3fv(info.coords[0]);
349     glVertex2i(dst_rect->left, dst_rect->top);
350
351     glTexCoord3fv(info.coords[1]);
352     glVertex2i(dst_rect->right, dst_rect->top);
353
354     glTexCoord3fv(info.coords[2]);
355     glVertex2i(dst_rect->left, dst_rect->bottom);
356
357     glTexCoord3fv(info.coords[3]);
358     glVertex2i(dst_rect->right, dst_rect->bottom);
359     glEnd();
360
361     /* Unbind the texture */
362     context_bind_texture(context, info.bind_target, 0);
363
364     /* We changed the filtering settings on the texture. Inform the
365      * container about this to get the filters reset properly next draw. */
366     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
367     {
368         struct wined3d_texture *texture = src_surface->container.u.texture;
369         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
370         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
371         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
372         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
373     }
374 }
375
376 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
377 {
378     const struct wined3d_format *format = surface->resource.format;
379     SYSTEM_INFO sysInfo;
380     BITMAPINFO *b_info;
381     int extraline = 0;
382     DWORD *masks;
383     UINT usage;
384     HDC dc;
385
386     TRACE("surface %p.\n", surface);
387
388     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
389     {
390         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
391         return WINED3DERR_INVALIDCALL;
392     }
393
394     switch (format->byte_count)
395     {
396         case 2:
397         case 4:
398             /* Allocate extra space to store the RGB bit masks. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
400             break;
401
402         case 3:
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
404             break;
405
406         default:
407             /* Allocate extra space for a palette. */
408             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
409                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
410             break;
411     }
412
413     if (!b_info)
414         return E_OUTOFMEMORY;
415
416     /* Some applications access the surface in via DWORDs, and do not take
417      * the necessary care at the end of the surface. So we need at least
418      * 4 extra bytes at the end of the surface. Check against the page size,
419      * if the last page used for the surface has at least 4 spare bytes we're
420      * safe, otherwise add an extra line to the DIB section. */
421     GetSystemInfo(&sysInfo);
422     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
423     {
424         extraline = 1;
425         TRACE("Adding an extra line to the DIB section.\n");
426     }
427
428     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
429     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
430     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
431     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
432     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
433             * wined3d_surface_get_pitch(surface);
434     b_info->bmiHeader.biPlanes = 1;
435     b_info->bmiHeader.biBitCount = format->byte_count * 8;
436
437     b_info->bmiHeader.biXPelsPerMeter = 0;
438     b_info->bmiHeader.biYPelsPerMeter = 0;
439     b_info->bmiHeader.biClrUsed = 0;
440     b_info->bmiHeader.biClrImportant = 0;
441
442     /* Get the bit masks */
443     masks = (DWORD *)b_info->bmiColors;
444     switch (surface->resource.format->id)
445     {
446         case WINED3DFMT_B8G8R8_UNORM:
447             usage = DIB_RGB_COLORS;
448             b_info->bmiHeader.biCompression = BI_RGB;
449             break;
450
451         case WINED3DFMT_B5G5R5X1_UNORM:
452         case WINED3DFMT_B5G5R5A1_UNORM:
453         case WINED3DFMT_B4G4R4A4_UNORM:
454         case WINED3DFMT_B4G4R4X4_UNORM:
455         case WINED3DFMT_B2G3R3_UNORM:
456         case WINED3DFMT_B2G3R3A8_UNORM:
457         case WINED3DFMT_R10G10B10A2_UNORM:
458         case WINED3DFMT_R8G8B8A8_UNORM:
459         case WINED3DFMT_R8G8B8X8_UNORM:
460         case WINED3DFMT_B10G10R10A2_UNORM:
461         case WINED3DFMT_B5G6R5_UNORM:
462         case WINED3DFMT_R16G16B16A16_UNORM:
463             usage = 0;
464             b_info->bmiHeader.biCompression = BI_BITFIELDS;
465             masks[0] = format->red_mask;
466             masks[1] = format->green_mask;
467             masks[2] = format->blue_mask;
468             break;
469
470         default:
471             /* Don't know palette */
472             b_info->bmiHeader.biCompression = BI_RGB;
473             usage = 0;
474             break;
475     }
476
477     if (!(dc = GetDC(0)))
478     {
479         HeapFree(GetProcessHeap(), 0, b_info);
480         return HRESULT_FROM_WIN32(GetLastError());
481     }
482
483     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
484             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
485             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
486     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
487     ReleaseDC(0, dc);
488
489     if (!surface->dib.DIBsection)
490     {
491         ERR("Failed to create DIB section.\n");
492         HeapFree(GetProcessHeap(), 0, b_info);
493         return HRESULT_FROM_WIN32(GetLastError());
494     }
495
496     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
497     /* Copy the existing surface to the dib section. */
498     if (surface->resource.allocatedMemory)
499     {
500         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
501                 surface->resource.height * wined3d_surface_get_pitch(surface));
502     }
503     else
504     {
505         /* This is to make maps read the GL texture although memory is allocated. */
506         surface->flags &= ~SFLAG_INSYSMEM;
507     }
508     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
509
510     HeapFree(GetProcessHeap(), 0, b_info);
511
512     /* Now allocate a DC. */
513     surface->hDC = CreateCompatibleDC(0);
514     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
515     TRACE("Using wined3d palette %p.\n", surface->palette);
516     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
517
518     surface->flags |= SFLAG_DIBSECTION;
519
520     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
521     surface->resource.heapMemory = NULL;
522
523     return WINED3D_OK;
524 }
525
526 static void surface_prepare_system_memory(struct wined3d_surface *surface)
527 {
528     struct wined3d_device *device = surface->resource.device;
529     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
530
531     TRACE("surface %p.\n", surface);
532
533     /* Performance optimization: Count how often a surface is locked, if it is
534      * locked regularly do not throw away the system memory copy. This avoids
535      * the need to download the surface from OpenGL all the time. The surface
536      * is still downloaded if the OpenGL texture is changed. */
537     if (!(surface->flags & SFLAG_DYNLOCK))
538     {
539         if (++surface->lockCount > MAXLOCKCOUNT)
540         {
541             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
542             surface->flags |= SFLAG_DYNLOCK;
543         }
544     }
545
546     /* Create a PBO for dynamically locked surfaces but don't do it for
547      * converted or NPOT surfaces. Also don't create a PBO for systemmem
548      * surfaces. */
549     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
550             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
551             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
552     {
553         struct wined3d_context *context;
554         GLenum error;
555
556         context = context_acquire(device, NULL);
557         ENTER_GL();
558
559         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
560         error = glGetError();
561         if (!surface->pbo || error != GL_NO_ERROR)
562             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
563
564         TRACE("Binding PBO %u.\n", surface->pbo);
565
566         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
567         checkGLcall("glBindBufferARB");
568
569         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
570                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
571         checkGLcall("glBufferDataARB");
572
573         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
574         checkGLcall("glBindBufferARB");
575
576         /* We don't need the system memory anymore and we can't even use it for PBOs. */
577         if (!(surface->flags & SFLAG_CLIENT))
578         {
579             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
580             surface->resource.heapMemory = NULL;
581         }
582         surface->resource.allocatedMemory = NULL;
583         surface->flags |= SFLAG_PBO;
584         LEAVE_GL();
585         context_release(context);
586     }
587     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
588     {
589         /* Whatever surface we have, make sure that there is memory allocated
590          * for the downloaded copy, or a PBO to map. */
591         if (!surface->resource.heapMemory)
592             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
593
594         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
595                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
596
597         if (surface->flags & SFLAG_INSYSMEM)
598             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
599     }
600 }
601
602 static void surface_evict_sysmem(struct wined3d_surface *surface)
603 {
604     if (surface->flags & SFLAG_DONOTFREE)
605         return;
606
607     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
608     surface->resource.allocatedMemory = NULL;
609     surface->resource.heapMemory = NULL;
610     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
611 }
612
613 /* Context activation is done by the caller. */
614 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
615         struct wined3d_context *context, BOOL srgb)
616 {
617     struct wined3d_device *device = surface->resource.device;
618     DWORD active_sampler;
619
620     /* We don't need a specific texture unit, but after binding the texture
621      * the current unit is dirty. Read the unit back instead of switching to
622      * 0, this avoids messing around with the state manager's GL states. The
623      * current texture unit should always be a valid one.
624      *
625      * To be more specific, this is tricky because we can implicitly be
626      * called from sampler() in state.c. This means we can't touch anything
627      * other than whatever happens to be the currently active texture, or we
628      * would risk marking already applied sampler states dirty again. */
629     active_sampler = device->rev_tex_unit_map[context->active_texture];
630
631     if (active_sampler != WINED3D_UNMAPPED_STAGE)
632         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
633     surface_bind(surface, context, srgb);
634 }
635
636 static void surface_force_reload(struct wined3d_surface *surface)
637 {
638     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
639 }
640
641 static void surface_release_client_storage(struct wined3d_surface *surface)
642 {
643     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
644
645     ENTER_GL();
646     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
647     if (surface->texture_name)
648     {
649         surface_bind_and_dirtify(surface, context, FALSE);
650         glTexImage2D(surface->texture_target, surface->texture_level,
651                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
652     }
653     if (surface->texture_name_srgb)
654     {
655         surface_bind_and_dirtify(surface, context, TRUE);
656         glTexImage2D(surface->texture_target, surface->texture_level,
657                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
658     }
659     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
660     LEAVE_GL();
661
662     context_release(context);
663
664     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
665     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
666     surface_force_reload(surface);
667 }
668
669 static HRESULT surface_private_setup(struct wined3d_surface *surface)
670 {
671     /* TODO: Check against the maximum texture sizes supported by the video card. */
672     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
673     unsigned int pow2Width, pow2Height;
674
675     TRACE("surface %p.\n", surface);
676
677     surface->texture_name = 0;
678     surface->texture_target = GL_TEXTURE_2D;
679
680     /* Non-power2 support */
681     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
682     {
683         pow2Width = surface->resource.width;
684         pow2Height = surface->resource.height;
685     }
686     else
687     {
688         /* Find the nearest pow2 match */
689         pow2Width = pow2Height = 1;
690         while (pow2Width < surface->resource.width)
691             pow2Width <<= 1;
692         while (pow2Height < surface->resource.height)
693             pow2Height <<= 1;
694     }
695     surface->pow2Width = pow2Width;
696     surface->pow2Height = pow2Height;
697
698     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
699     {
700         /* TODO: Add support for non power two compressed textures. */
701         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
702         {
703             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
704                   surface, surface->resource.width, surface->resource.height);
705             return WINED3DERR_NOTAVAILABLE;
706         }
707     }
708
709     if (pow2Width != surface->resource.width
710             || pow2Height != surface->resource.height)
711     {
712         surface->flags |= SFLAG_NONPOW2;
713     }
714
715     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
716             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
717     {
718         /* One of three options:
719          * 1: Do the same as we do with NPOT and scale the texture, (any
720          *    texture ops would require the texture to be scaled which is
721          *    potentially slow)
722          * 2: Set the texture to the maximum size (bad idea).
723          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
724          * 4: Create the surface, but allow it to be used only for DirectDraw
725          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
726          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
727          *    the render target. */
728         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
729         {
730             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
731             return WINED3DERR_NOTAVAILABLE;
732         }
733
734         /* We should never use this surface in combination with OpenGL! */
735         TRACE("Creating an oversized surface: %ux%u.\n",
736                 surface->pow2Width, surface->pow2Height);
737     }
738     else
739     {
740         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
741          * and EXT_PALETTED_TEXTURE is used in combination with texture
742          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
743          * EXT_PALETTED_TEXTURE doesn't work in combination with
744          * ARB_TEXTURE_RECTANGLE. */
745         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
746                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
747                 && gl_info->supported[EXT_PALETTED_TEXTURE]
748                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
749         {
750             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
751             surface->pow2Width = surface->resource.width;
752             surface->pow2Height = surface->resource.height;
753             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
754         }
755     }
756
757     switch (wined3d_settings.offscreen_rendering_mode)
758     {
759         case ORM_FBO:
760             surface->get_drawable_size = get_drawable_size_fbo;
761             break;
762
763         case ORM_BACKBUFFER:
764             surface->get_drawable_size = get_drawable_size_backbuffer;
765             break;
766
767         default:
768             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
769             return WINED3DERR_INVALIDCALL;
770     }
771
772     surface->flags |= SFLAG_INSYSMEM;
773
774     return WINED3D_OK;
775 }
776
777 static void surface_realize_palette(struct wined3d_surface *surface)
778 {
779     struct wined3d_palette *palette = surface->palette;
780
781     TRACE("surface %p.\n", surface);
782
783     if (!palette) return;
784
785     if (surface->resource.format->id == WINED3DFMT_P8_UINT
786             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
787     {
788         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
789         {
790             /* Make sure the texture is up to date. This call doesn't do
791              * anything if the texture is already up to date. */
792             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
793
794             /* We want to force a palette refresh, so mark the drawable as not being up to date */
795             if (!surface_is_offscreen(surface))
796                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
797         }
798         else
799         {
800             if (!(surface->flags & SFLAG_INSYSMEM))
801             {
802                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
803                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
804             }
805             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
806         }
807     }
808
809     if (surface->flags & SFLAG_DIBSECTION)
810     {
811         RGBQUAD col[256];
812         unsigned int i;
813
814         TRACE("Updating the DC's palette.\n");
815
816         for (i = 0; i < 256; ++i)
817         {
818             col[i].rgbRed   = palette->palents[i].peRed;
819             col[i].rgbGreen = palette->palents[i].peGreen;
820             col[i].rgbBlue  = palette->palents[i].peBlue;
821             col[i].rgbReserved = 0;
822         }
823         SetDIBColorTable(surface->hDC, 0, 256, col);
824     }
825
826     /* Propagate the changes to the drawable when we have a palette. */
827     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
828         surface_load_location(surface, surface->draw_binding, NULL);
829 }
830
831 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
832 {
833     HRESULT hr;
834
835     /* If there's no destination surface there is nothing to do. */
836     if (!surface->overlay_dest)
837         return WINED3D_OK;
838
839     /* Blt calls ModifyLocation on the dest surface, which in turn calls
840      * DrawOverlay to update the overlay. Prevent an endless recursion. */
841     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
842         return WINED3D_OK;
843
844     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
845     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
846             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
847     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
848
849     return hr;
850 }
851
852 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
853 {
854     struct wined3d_device *device = surface->resource.device;
855     const RECT *pass_rect = rect;
856
857     TRACE("surface %p, rect %s, flags %#x.\n",
858             surface, wine_dbgstr_rect(rect), flags);
859
860     if (flags & WINED3DLOCK_DISCARD)
861     {
862         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
863         surface_prepare_system_memory(surface);
864         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
865     }
866     else
867     {
868         /* surface_load_location() does not check if the rectangle specifies
869          * the full surface. Most callers don't need that, so do it here. */
870         if (rect && !rect->top && !rect->left
871                 && rect->right == surface->resource.width
872                 && rect->bottom == surface->resource.height)
873             pass_rect = NULL;
874
875         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
876                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
877                 || surface == device->fb.render_targets[0])))
878             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
879     }
880
881     if (surface->flags & SFLAG_PBO)
882     {
883         const struct wined3d_gl_info *gl_info;
884         struct wined3d_context *context;
885
886         context = context_acquire(device, NULL);
887         gl_info = context->gl_info;
888
889         ENTER_GL();
890         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
891         checkGLcall("glBindBufferARB");
892
893         /* This shouldn't happen but could occur if some other function
894          * didn't handle the PBO properly. */
895         if (surface->resource.allocatedMemory)
896             ERR("The surface already has PBO memory allocated.\n");
897
898         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
899         checkGLcall("glMapBufferARB");
900
901         /* Make sure the PBO isn't set anymore in order not to break non-PBO
902          * calls. */
903         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
904         checkGLcall("glBindBufferARB");
905
906         LEAVE_GL();
907         context_release(context);
908     }
909
910     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
911     {
912         if (!rect)
913             surface_add_dirty_rect(surface, NULL);
914         else
915         {
916             WINED3DBOX b;
917
918             b.Left = rect->left;
919             b.Top = rect->top;
920             b.Right = rect->right;
921             b.Bottom = rect->bottom;
922             b.Front = 0;
923             b.Back = 1;
924             surface_add_dirty_rect(surface, &b);
925         }
926     }
927 }
928
929 static void surface_unmap(struct wined3d_surface *surface)
930 {
931     struct wined3d_device *device = surface->resource.device;
932     BOOL fullsurface;
933
934     TRACE("surface %p.\n", surface);
935
936     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
937
938     if (surface->flags & SFLAG_PBO)
939     {
940         const struct wined3d_gl_info *gl_info;
941         struct wined3d_context *context;
942
943         TRACE("Freeing PBO memory.\n");
944
945         context = context_acquire(device, NULL);
946         gl_info = context->gl_info;
947
948         ENTER_GL();
949         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
950         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
951         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
952         checkGLcall("glUnmapBufferARB");
953         LEAVE_GL();
954         context_release(context);
955
956         surface->resource.allocatedMemory = NULL;
957     }
958
959     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
960
961     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
962     {
963         TRACE("Not dirtified, nothing to do.\n");
964         goto done;
965     }
966
967     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
968             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
969     {
970         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
971         {
972             static BOOL warned = FALSE;
973             if (!warned)
974             {
975                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
976                 warned = TRUE;
977             }
978             goto done;
979         }
980
981         if (!surface->dirtyRect.left && !surface->dirtyRect.top
982                 && surface->dirtyRect.right == surface->resource.width
983                 && surface->dirtyRect.bottom == surface->resource.height)
984         {
985             fullsurface = TRUE;
986         }
987         else
988         {
989             /* TODO: Proper partial rectangle tracking. */
990             fullsurface = FALSE;
991             surface->flags |= SFLAG_INSYSMEM;
992         }
993
994         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
995
996         /* Partial rectangle tracking is not commonly implemented, it is only
997          * done for render targets. INSYSMEM was set before to tell
998          * surface_load_location() where to read the rectangle from.
999          * Indrawable is set because all modifications from the partial
1000          * sysmem copy are written back to the drawable, thus the surface is
1001          * merged again in the drawable. The sysmem copy is not fully up to
1002          * date because only a subrectangle was read in Map(). */
1003         if (!fullsurface)
1004         {
1005             surface_modify_location(surface, surface->draw_binding, TRUE);
1006             surface_evict_sysmem(surface);
1007         }
1008
1009         surface->dirtyRect.left = surface->resource.width;
1010         surface->dirtyRect.top = surface->resource.height;
1011         surface->dirtyRect.right = 0;
1012         surface->dirtyRect.bottom = 0;
1013     }
1014     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1015     {
1016         FIXME("Depth / stencil buffer locking is not implemented.\n");
1017     }
1018
1019 done:
1020     /* Overlays have to be redrawn manually after changes with the GL implementation */
1021     if (surface->overlay_dest)
1022         surface_draw_overlay(surface);
1023 }
1024
1025 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1026 {
1027     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1028         return FALSE;
1029     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1030         return FALSE;
1031     return TRUE;
1032 }
1033
1034 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1035         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1036 {
1037     const struct wined3d_gl_info *gl_info;
1038     struct wined3d_context *context;
1039     DWORD src_mask, dst_mask;
1040     GLbitfield gl_mask;
1041
1042     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1043             device, src_surface, wine_dbgstr_rect(src_rect),
1044             dst_surface, wine_dbgstr_rect(dst_rect));
1045
1046     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1047     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1048
1049     if (src_mask != dst_mask)
1050     {
1051         ERR("Incompatible formats %s and %s.\n",
1052                 debug_d3dformat(src_surface->resource.format->id),
1053                 debug_d3dformat(dst_surface->resource.format->id));
1054         return;
1055     }
1056
1057     if (!src_mask)
1058     {
1059         ERR("Not a depth / stencil format: %s.\n",
1060                 debug_d3dformat(src_surface->resource.format->id));
1061         return;
1062     }
1063
1064     gl_mask = 0;
1065     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1066         gl_mask |= GL_DEPTH_BUFFER_BIT;
1067     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1068         gl_mask |= GL_STENCIL_BUFFER_BIT;
1069
1070     /* Make sure the locations are up-to-date. Loading the destination
1071      * surface isn't required if the entire surface is overwritten. */
1072     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1073     if (!surface_is_full_rect(dst_surface, dst_rect))
1074         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1075
1076     context = context_acquire(device, NULL);
1077     if (!context->valid)
1078     {
1079         context_release(context);
1080         WARN("Invalid context, skipping blit.\n");
1081         return;
1082     }
1083
1084     gl_info = context->gl_info;
1085
1086     ENTER_GL();
1087
1088     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1089     glReadBuffer(GL_NONE);
1090     checkGLcall("glReadBuffer()");
1091     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1092
1093     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1094     context_set_draw_buffer(context, GL_NONE);
1095     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1096
1097     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1098     {
1099         glDepthMask(GL_TRUE);
1100         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1101     }
1102     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1103     {
1104         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1105         {
1106             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1107             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1108         }
1109         glStencilMask(~0U);
1110         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1111     }
1112
1113     glDisable(GL_SCISSOR_TEST);
1114     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1115
1116     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1117             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1118     checkGLcall("glBlitFramebuffer()");
1119
1120     LEAVE_GL();
1121
1122     if (wined3d_settings.strict_draw_ordering)
1123         wglFlush(); /* Flush to ensure ordering across contexts. */
1124
1125     context_release(context);
1126 }
1127
1128 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1129  * Depth / stencil is not supported. */
1130 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1131         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1132         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1133 {
1134     const struct wined3d_gl_info *gl_info;
1135     struct wined3d_context *context;
1136     RECT src_rect, dst_rect;
1137     GLenum gl_filter;
1138     GLenum buffer;
1139
1140     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1141     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1142             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1143     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1144             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1145
1146     src_rect = *src_rect_in;
1147     dst_rect = *dst_rect_in;
1148
1149     switch (filter)
1150     {
1151         case WINED3DTEXF_LINEAR:
1152             gl_filter = GL_LINEAR;
1153             break;
1154
1155         default:
1156             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1157         case WINED3DTEXF_NONE:
1158         case WINED3DTEXF_POINT:
1159             gl_filter = GL_NEAREST;
1160             break;
1161     }
1162
1163     /* Resolve the source surface first if needed. */
1164     if (src_location == SFLAG_INRB_MULTISAMPLE
1165             && (src_surface->resource.format->id != dst_surface->resource.format->id
1166                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1167                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1168         src_location = SFLAG_INRB_RESOLVED;
1169
1170     /* Make sure the locations are up-to-date. Loading the destination
1171      * surface isn't required if the entire surface is overwritten. (And is
1172      * in fact harmful if we're being called by surface_load_location() with
1173      * the purpose of loading the destination surface.) */
1174     surface_load_location(src_surface, src_location, NULL);
1175     if (!surface_is_full_rect(dst_surface, &dst_rect))
1176         surface_load_location(dst_surface, dst_location, NULL);
1177
1178     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1179     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1180     else context = context_acquire(device, NULL);
1181
1182     if (!context->valid)
1183     {
1184         context_release(context);
1185         WARN("Invalid context, skipping blit.\n");
1186         return;
1187     }
1188
1189     gl_info = context->gl_info;
1190
1191     if (src_location == SFLAG_INDRAWABLE)
1192     {
1193         TRACE("Source surface %p is onscreen.\n", src_surface);
1194         buffer = surface_get_gl_buffer(src_surface);
1195         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1196     }
1197     else
1198     {
1199         TRACE("Source surface %p is offscreen.\n", src_surface);
1200         buffer = GL_COLOR_ATTACHMENT0;
1201     }
1202
1203     ENTER_GL();
1204     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1205     glReadBuffer(buffer);
1206     checkGLcall("glReadBuffer()");
1207     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1208     LEAVE_GL();
1209
1210     if (dst_location == SFLAG_INDRAWABLE)
1211     {
1212         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1213         buffer = surface_get_gl_buffer(dst_surface);
1214         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1215     }
1216     else
1217     {
1218         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1219         buffer = GL_COLOR_ATTACHMENT0;
1220     }
1221
1222     ENTER_GL();
1223     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1224     context_set_draw_buffer(context, buffer);
1225     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1226     context_invalidate_state(context, STATE_FRAMEBUFFER);
1227
1228     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1229     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1230     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1231     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1232     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1233
1234     glDisable(GL_SCISSOR_TEST);
1235     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1236
1237     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1238             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1239     checkGLcall("glBlitFramebuffer()");
1240
1241     LEAVE_GL();
1242
1243     if (wined3d_settings.strict_draw_ordering
1244             || (dst_location == SFLAG_INDRAWABLE
1245             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1246         wglFlush();
1247
1248     context_release(context);
1249 }
1250
1251 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1252         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1253         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1254 {
1255     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1256         return FALSE;
1257
1258     /* Source and/or destination need to be on the GL side */
1259     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1260         return FALSE;
1261
1262     switch (blit_op)
1263     {
1264         case WINED3D_BLIT_OP_COLOR_BLIT:
1265             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1266                 return FALSE;
1267             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1268                 return FALSE;
1269             break;
1270
1271         case WINED3D_BLIT_OP_DEPTH_BLIT:
1272             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1273                 return FALSE;
1274             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1275                 return FALSE;
1276             break;
1277
1278         default:
1279             return FALSE;
1280     }
1281
1282     if (!(src_format->id == dst_format->id
1283             || (is_identity_fixup(src_format->color_fixup)
1284             && is_identity_fixup(dst_format->color_fixup))))
1285         return FALSE;
1286
1287     return TRUE;
1288 }
1289
1290 /* This function checks if the primary render target uses the 8bit paletted format. */
1291 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1292 {
1293     if (device->fb.render_targets && device->fb.render_targets[0])
1294     {
1295         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1296         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1297                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1298             return TRUE;
1299     }
1300     return FALSE;
1301 }
1302
1303 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1304         DWORD color, WINED3DCOLORVALUE *float_color)
1305 {
1306     const struct wined3d_format *format = surface->resource.format;
1307     const struct wined3d_device *device = surface->resource.device;
1308
1309     switch (format->id)
1310     {
1311         case WINED3DFMT_P8_UINT:
1312             if (surface->palette)
1313             {
1314                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1315                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1316                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1317             }
1318             else
1319             {
1320                 float_color->r = 0.0f;
1321                 float_color->g = 0.0f;
1322                 float_color->b = 0.0f;
1323             }
1324             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1325             break;
1326
1327         case WINED3DFMT_B5G6R5_UNORM:
1328             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1329             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1330             float_color->b = (color & 0x1f) / 31.0f;
1331             float_color->a = 1.0f;
1332             break;
1333
1334         case WINED3DFMT_B8G8R8_UNORM:
1335         case WINED3DFMT_B8G8R8X8_UNORM:
1336             float_color->r = D3DCOLOR_R(color);
1337             float_color->g = D3DCOLOR_G(color);
1338             float_color->b = D3DCOLOR_B(color);
1339             float_color->a = 1.0f;
1340             break;
1341
1342         case WINED3DFMT_B8G8R8A8_UNORM:
1343             float_color->r = D3DCOLOR_R(color);
1344             float_color->g = D3DCOLOR_G(color);
1345             float_color->b = D3DCOLOR_B(color);
1346             float_color->a = D3DCOLOR_A(color);
1347             break;
1348
1349         default:
1350             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1351             return FALSE;
1352     }
1353
1354     return TRUE;
1355 }
1356
1357 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1358 {
1359     const struct wined3d_format *format = surface->resource.format;
1360
1361     switch (format->id)
1362     {
1363         case WINED3DFMT_S1_UINT_D15_UNORM:
1364             *float_depth = depth / (float)0x00007fff;
1365             break;
1366
1367         case WINED3DFMT_D16_UNORM:
1368             *float_depth = depth / (float)0x0000ffff;
1369             break;
1370
1371         case WINED3DFMT_D24_UNORM_S8_UINT:
1372         case WINED3DFMT_X8D24_UNORM:
1373             *float_depth = depth / (float)0x00ffffff;
1374             break;
1375
1376         case WINED3DFMT_D32_UNORM:
1377             *float_depth = depth / (float)0xffffffff;
1378             break;
1379
1380         default:
1381             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1382             return FALSE;
1383     }
1384
1385     return TRUE;
1386 }
1387
1388 /* Do not call while under the GL lock. */
1389 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1390 {
1391     const struct wined3d_resource *resource = &surface->resource;
1392     struct wined3d_device *device = resource->device;
1393     const struct blit_shader *blitter;
1394
1395     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1396             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1397     if (!blitter)
1398     {
1399         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1400         return WINED3DERR_INVALIDCALL;
1401     }
1402
1403     return blitter->depth_fill(device, surface, rect, depth);
1404 }
1405
1406 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1407         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1408 {
1409     struct wined3d_device *device = src_surface->resource.device;
1410
1411     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1412             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1413             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1414         return WINED3DERR_INVALIDCALL;
1415
1416     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1417
1418     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1419             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1420     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1421
1422     return WINED3D_OK;
1423 }
1424
1425 /* Do not call while under the GL lock. */
1426 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1427         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1428         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1429 {
1430     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1431     struct wined3d_device *device = dst_surface->resource.device;
1432     DWORD src_ds_flags, dst_ds_flags;
1433     RECT src_rect, dst_rect;
1434     BOOL scale, convert;
1435
1436     static const DWORD simple_blit = WINEDDBLT_ASYNC
1437             | WINEDDBLT_COLORFILL
1438             | WINEDDBLT_WAIT
1439             | WINEDDBLT_DEPTHFILL
1440             | WINEDDBLT_DONOTWAIT;
1441
1442     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1443             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1444             flags, fx, debug_d3dtexturefiltertype(filter));
1445     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1446
1447     if (fx)
1448     {
1449         TRACE("dwSize %#x.\n", fx->dwSize);
1450         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1451         TRACE("dwROP %#x.\n", fx->dwROP);
1452         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1453         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1454         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1455         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1456         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1457         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1458         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1459         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1460         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1461         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1462         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1463         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1464         TRACE("dwReserved %#x.\n", fx->dwReserved);
1465         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1466         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1467         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1468         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1469         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1470         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1471                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1472                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1473         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1474                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1475                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1476     }
1477
1478     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1479     {
1480         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1481         return WINEDDERR_SURFACEBUSY;
1482     }
1483
1484     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1485
1486     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1487             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1488             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1489             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1490             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1491     {
1492         /* The destination rect can be out of bounds on the condition
1493          * that a clipper is set for the surface. */
1494         if (dst_surface->clipper)
1495             FIXME("Blit clipping not implemented.\n");
1496         else
1497             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1498         return WINEDDERR_INVALIDRECT;
1499     }
1500
1501     if (src_surface)
1502     {
1503         surface_get_rect(src_surface, src_rect_in, &src_rect);
1504
1505         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1506                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1507                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1508                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1509                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1510         {
1511             WARN("Application gave us bad source rectangle for Blt.\n");
1512             return WINEDDERR_INVALIDRECT;
1513         }
1514     }
1515     else
1516     {
1517         memset(&src_rect, 0, sizeof(src_rect));
1518     }
1519
1520     if (!fx || !(fx->dwDDFX))
1521         flags &= ~WINEDDBLT_DDFX;
1522
1523     if (flags & WINEDDBLT_WAIT)
1524         flags &= ~WINEDDBLT_WAIT;
1525
1526     if (flags & WINEDDBLT_ASYNC)
1527     {
1528         static unsigned int once;
1529
1530         if (!once++)
1531             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1532         flags &= ~WINEDDBLT_ASYNC;
1533     }
1534
1535     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1536     if (flags & WINEDDBLT_DONOTWAIT)
1537     {
1538         static unsigned int once;
1539
1540         if (!once++)
1541             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1542         flags &= ~WINEDDBLT_DONOTWAIT;
1543     }
1544
1545     if (!device->d3d_initialized)
1546     {
1547         WARN("D3D not initialized, using fallback.\n");
1548         goto cpu;
1549     }
1550
1551     /* We want to avoid invalidating the sysmem location for converted
1552      * surfaces, since otherwise we'd have to convert the data back when
1553      * locking them. */
1554     if (dst_surface->flags & SFLAG_CONVERTED)
1555     {
1556         WARN("Converted surface, using CPU blit.\n");
1557         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1558     }
1559
1560     if (flags & ~simple_blit)
1561     {
1562         WARN("Using fallback for complex blit (%#x).\n", flags);
1563         goto fallback;
1564     }
1565
1566     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1567         src_swapchain = src_surface->container.u.swapchain;
1568     else
1569         src_swapchain = NULL;
1570
1571     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1572         dst_swapchain = dst_surface->container.u.swapchain;
1573     else
1574         dst_swapchain = NULL;
1575
1576     /* This isn't strictly needed. FBO blits for example could deal with
1577      * cross-swapchain blits by first downloading the source to a texture
1578      * before switching to the destination context. We just have this here to
1579      * not have to deal with the issue, since cross-swapchain blits should be
1580      * rare. */
1581     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1582     {
1583         FIXME("Using fallback for cross-swapchain blit.\n");
1584         goto fallback;
1585     }
1586
1587     scale = src_surface
1588             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1589             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1590     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1591
1592     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1593     if (src_surface)
1594         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1595     else
1596         src_ds_flags = 0;
1597
1598     if (src_ds_flags || dst_ds_flags)
1599     {
1600         if (flags & WINEDDBLT_DEPTHFILL)
1601         {
1602             float depth;
1603
1604             TRACE("Depth fill.\n");
1605
1606             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1607                 return WINED3DERR_INVALIDCALL;
1608
1609             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1610                 return WINED3D_OK;
1611         }
1612         else
1613         {
1614             /* Accessing depth / stencil surfaces is supposed to fail while in
1615              * a scene, except for fills, which seem to work. */
1616             if (device->inScene)
1617             {
1618                 WARN("Rejecting depth / stencil access while in scene.\n");
1619                 return WINED3DERR_INVALIDCALL;
1620             }
1621
1622             if (src_ds_flags != dst_ds_flags)
1623             {
1624                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1625                 return WINED3DERR_INVALIDCALL;
1626             }
1627
1628             if (src_rect.top || src_rect.left
1629                     || src_rect.bottom != src_surface->resource.height
1630                     || src_rect.right != src_surface->resource.width)
1631             {
1632                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1633                         wine_dbgstr_rect(&src_rect));
1634                 return WINED3DERR_INVALIDCALL;
1635             }
1636
1637             if (dst_rect.top || dst_rect.left
1638                     || dst_rect.bottom != dst_surface->resource.height
1639                     || dst_rect.right != dst_surface->resource.width)
1640             {
1641                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1642                         wine_dbgstr_rect(&src_rect));
1643                 return WINED3DERR_INVALIDCALL;
1644             }
1645
1646             if (scale)
1647             {
1648                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1649                 return WINED3DERR_INVALIDCALL;
1650             }
1651
1652             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1653                 return WINED3D_OK;
1654         }
1655     }
1656     else
1657     {
1658         /* In principle this would apply to depth blits as well, but we don't
1659          * implement those in the CPU blitter at the moment. */
1660         if ((dst_surface->flags & SFLAG_INSYSMEM)
1661                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1662         {
1663             if (scale)
1664                 TRACE("Not doing sysmem blit because of scaling.\n");
1665             else if (convert)
1666                 TRACE("Not doing sysmem blit because of format conversion.\n");
1667             else
1668                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1669         }
1670
1671         if (flags & WINEDDBLT_COLORFILL)
1672         {
1673             WINED3DCOLORVALUE color;
1674
1675             TRACE("Color fill.\n");
1676
1677             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1678                 goto fallback;
1679
1680             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1681                 return WINED3D_OK;
1682         }
1683         else
1684         {
1685             TRACE("Color blit.\n");
1686
1687             /* Upload */
1688             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1689             {
1690                 if (scale)
1691                     TRACE("Not doing upload because of scaling.\n");
1692                 else if (convert)
1693                     TRACE("Not doing upload because of format conversion.\n");
1694                 else
1695                 {
1696                     POINT dst_point = {dst_rect.left, dst_rect.top};
1697
1698                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1699                     {
1700                         if (!surface_is_offscreen(dst_surface))
1701                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1702                         return WINED3D_OK;
1703                     }
1704                 }
1705             }
1706
1707             /* Use present for back -> front blits. The idea behind this is
1708              * that present is potentially faster than a blit, in particular
1709              * when FBO blits aren't available. Some ddraw applications like
1710              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1711              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1712              * applications can't blit directly to the frontbuffer. */
1713             if (dst_swapchain && dst_swapchain->back_buffers
1714                     && dst_surface == dst_swapchain->front_buffer
1715                     && src_surface == dst_swapchain->back_buffers[0])
1716             {
1717                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1718
1719                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1720
1721                 /* Set the swap effect to COPY, we don't want the backbuffer
1722                  * to become undefined. */
1723                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1724                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1725                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1726
1727                 return WINED3D_OK;
1728             }
1729
1730             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1731                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1732                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1733             {
1734                 TRACE("Using FBO blit.\n");
1735
1736                 surface_blt_fbo(device, filter,
1737                         src_surface, src_surface->draw_binding, &src_rect,
1738                         dst_surface, dst_surface->draw_binding, &dst_rect);
1739                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1740                 return WINED3D_OK;
1741             }
1742
1743             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1744                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1745                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1746             {
1747                 TRACE("Using arbfp blit.\n");
1748
1749                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1750                     return WINED3D_OK;
1751             }
1752         }
1753     }
1754
1755 fallback:
1756
1757     /* Special cases for render targets. */
1758     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1759             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1760     {
1761         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1762                 src_surface, &src_rect, flags, fx, filter)))
1763             return WINED3D_OK;
1764     }
1765
1766 cpu:
1767
1768     /* For the rest call the X11 surface implementation. For render targets
1769      * this should be implemented OpenGL accelerated in BltOverride, other
1770      * blits are rather rare. */
1771     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1772 }
1773
1774 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1775         struct wined3d_surface *render_target)
1776 {
1777     TRACE("surface %p, render_target %p.\n", surface, render_target);
1778
1779     /* TODO: Check surface sizes, pools, etc. */
1780
1781     if (render_target->resource.multisample_type)
1782         return WINED3DERR_INVALIDCALL;
1783
1784     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1785 }
1786
1787 /* Context activation is done by the caller. */
1788 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1789 {
1790     if (!surface->resource.heapMemory)
1791     {
1792         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1793         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1794                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1795     }
1796
1797     ENTER_GL();
1798     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1799     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1800     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1801             surface->resource.size, surface->resource.allocatedMemory));
1802     checkGLcall("glGetBufferSubDataARB");
1803     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1804     checkGLcall("glDeleteBuffersARB");
1805     LEAVE_GL();
1806
1807     surface->pbo = 0;
1808     surface->flags &= ~SFLAG_PBO;
1809 }
1810
1811 /* Do not call while under the GL lock. */
1812 static void surface_unload(struct wined3d_resource *resource)
1813 {
1814     struct wined3d_surface *surface = surface_from_resource(resource);
1815     struct wined3d_renderbuffer_entry *entry, *entry2;
1816     struct wined3d_device *device = resource->device;
1817     const struct wined3d_gl_info *gl_info;
1818     struct wined3d_context *context;
1819
1820     TRACE("surface %p.\n", surface);
1821
1822     if (resource->pool == WINED3DPOOL_DEFAULT)
1823     {
1824         /* Default pool resources are supposed to be destroyed before Reset is called.
1825          * Implicit resources stay however. So this means we have an implicit render target
1826          * or depth stencil. The content may be destroyed, but we still have to tear down
1827          * opengl resources, so we cannot leave early.
1828          *
1829          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1830          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1831          * or the depth stencil into an FBO the texture or render buffer will be removed
1832          * and all flags get lost
1833          */
1834         surface_init_sysmem(surface);
1835         /* We also get here when the ddraw swapchain is destroyed, for example
1836          * for a mode switch. In this case this surface won't necessarily be
1837          * an implicit surface. We have to mark it lost so that the
1838          * application can restore it after the mode switch. */
1839         surface->flags |= SFLAG_LOST;
1840     }
1841     else
1842     {
1843         /* Load the surface into system memory */
1844         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1845         surface_modify_location(surface, surface->draw_binding, FALSE);
1846     }
1847     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1848     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1849     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1850
1851     context = context_acquire(device, NULL);
1852     gl_info = context->gl_info;
1853
1854     /* Destroy PBOs, but load them into real sysmem before */
1855     if (surface->flags & SFLAG_PBO)
1856         surface_remove_pbo(surface, gl_info);
1857
1858     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1859      * all application-created targets the application has to release the surface
1860      * before calling _Reset
1861      */
1862     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1863     {
1864         ENTER_GL();
1865         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1866         LEAVE_GL();
1867         list_remove(&entry->entry);
1868         HeapFree(GetProcessHeap(), 0, entry);
1869     }
1870     list_init(&surface->renderbuffers);
1871     surface->current_renderbuffer = NULL;
1872
1873     ENTER_GL();
1874
1875     /* If we're in a texture, the texture name belongs to the texture.
1876      * Otherwise, destroy it. */
1877     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1878     {
1879         glDeleteTextures(1, &surface->texture_name);
1880         surface->texture_name = 0;
1881         glDeleteTextures(1, &surface->texture_name_srgb);
1882         surface->texture_name_srgb = 0;
1883     }
1884     if (surface->rb_multisample)
1885     {
1886         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1887         surface->rb_multisample = 0;
1888     }
1889     if (surface->rb_resolved)
1890     {
1891         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1892         surface->rb_resolved = 0;
1893     }
1894
1895     LEAVE_GL();
1896
1897     context_release(context);
1898
1899     resource_unload(resource);
1900 }
1901
1902 static const struct wined3d_resource_ops surface_resource_ops =
1903 {
1904     surface_unload,
1905 };
1906
1907 static const struct wined3d_surface_ops surface_ops =
1908 {
1909     surface_private_setup,
1910     surface_realize_palette,
1911     surface_map,
1912     surface_unmap,
1913 };
1914
1915 /*****************************************************************************
1916  * Initializes the GDI surface, aka creates the DIB section we render to
1917  * The DIB section creation is done by calling GetDC, which will create the
1918  * section and releasing the dc to allow the app to use it. The dib section
1919  * will stay until the surface is released
1920  *
1921  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1922  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1923  * avoid confusion in the shared surface code.
1924  *
1925  * Returns:
1926  *  WINED3D_OK on success
1927  *  The return values of called methods on failure
1928  *
1929  *****************************************************************************/
1930 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1931 {
1932     HRESULT hr;
1933
1934     TRACE("surface %p.\n", surface);
1935
1936     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1937     {
1938         ERR("Overlays not yet supported by GDI surfaces.\n");
1939         return WINED3DERR_INVALIDCALL;
1940     }
1941
1942     /* Sysmem textures have memory already allocated - release it,
1943      * this avoids an unnecessary memcpy. */
1944     hr = surface_create_dib_section(surface);
1945     if (SUCCEEDED(hr))
1946     {
1947         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1948         surface->resource.heapMemory = NULL;
1949         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1950     }
1951
1952     /* We don't mind the nonpow2 stuff in GDI. */
1953     surface->pow2Width = surface->resource.width;
1954     surface->pow2Height = surface->resource.height;
1955
1956     return WINED3D_OK;
1957 }
1958
1959 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1960 {
1961     struct wined3d_palette *palette = surface->palette;
1962
1963     TRACE("surface %p.\n", surface);
1964
1965     if (!palette) return;
1966
1967     if (surface->flags & SFLAG_DIBSECTION)
1968     {
1969         RGBQUAD col[256];
1970         unsigned int i;
1971
1972         TRACE("Updating the DC's palette.\n");
1973
1974         for (i = 0; i < 256; ++i)
1975         {
1976             col[i].rgbRed = palette->palents[i].peRed;
1977             col[i].rgbGreen = palette->palents[i].peGreen;
1978             col[i].rgbBlue = palette->palents[i].peBlue;
1979             col[i].rgbReserved = 0;
1980         }
1981         SetDIBColorTable(surface->hDC, 0, 256, col);
1982     }
1983
1984     /* Update the image because of the palette change. Some games like e.g.
1985      * Red Alert call SetEntries a lot to implement fading. */
1986     /* Tell the swapchain to update the screen. */
1987     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1988     {
1989         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1990         if (surface == swapchain->front_buffer)
1991         {
1992             x11_copy_to_screen(swapchain, NULL);
1993         }
1994     }
1995 }
1996
1997 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
1998 {
1999     TRACE("surface %p, rect %s, flags %#x.\n",
2000             surface, wine_dbgstr_rect(rect), flags);
2001
2002     if (!surface->resource.allocatedMemory)
2003     {
2004         /* This happens on gdi surfaces if the application set a user pointer
2005          * and resets it. Recreate the DIB section. */
2006         surface_create_dib_section(surface);
2007         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2008     }
2009 }
2010
2011 static void gdi_surface_unmap(struct wined3d_surface *surface)
2012 {
2013     TRACE("surface %p.\n", surface);
2014
2015     /* Tell the swapchain to update the screen. */
2016     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2017     {
2018         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2019         if (surface == swapchain->front_buffer)
2020         {
2021             x11_copy_to_screen(swapchain, &surface->lockedRect);
2022         }
2023     }
2024
2025     memset(&surface->lockedRect, 0, sizeof(RECT));
2026 }
2027
2028 static const struct wined3d_surface_ops gdi_surface_ops =
2029 {
2030     gdi_surface_private_setup,
2031     gdi_surface_realize_palette,
2032     gdi_surface_map,
2033     gdi_surface_unmap,
2034 };
2035
2036 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2037 {
2038     GLuint *name;
2039     DWORD flag;
2040
2041     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2042
2043     if(srgb)
2044     {
2045         name = &surface->texture_name_srgb;
2046         flag = SFLAG_INSRGBTEX;
2047     }
2048     else
2049     {
2050         name = &surface->texture_name;
2051         flag = SFLAG_INTEXTURE;
2052     }
2053
2054     if (!*name && new_name)
2055     {
2056         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2057          * surface has no texture name yet. See if we can get rid of this. */
2058         if (surface->flags & flag)
2059             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2060         surface_modify_location(surface, flag, FALSE);
2061     }
2062
2063     *name = new_name;
2064     surface_force_reload(surface);
2065 }
2066
2067 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2068 {
2069     TRACE("surface %p, target %#x.\n", surface, target);
2070
2071     if (surface->texture_target != target)
2072     {
2073         if (target == GL_TEXTURE_RECTANGLE_ARB)
2074         {
2075             surface->flags &= ~SFLAG_NORMCOORD;
2076         }
2077         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2078         {
2079             surface->flags |= SFLAG_NORMCOORD;
2080         }
2081     }
2082     surface->texture_target = target;
2083     surface_force_reload(surface);
2084 }
2085
2086 /* Context activation is done by the caller. */
2087 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2088 {
2089     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2090
2091     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2092     {
2093         struct wined3d_texture *texture = surface->container.u.texture;
2094
2095         TRACE("Passing to container (%p).\n", texture);
2096         texture->texture_ops->texture_bind(texture, context, srgb);
2097     }
2098     else
2099     {
2100         if (surface->texture_level)
2101         {
2102             ERR("Standalone surface %p is non-zero texture level %u.\n",
2103                     surface, surface->texture_level);
2104         }
2105
2106         if (srgb)
2107             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2108
2109         ENTER_GL();
2110
2111         if (!surface->texture_name)
2112         {
2113             glGenTextures(1, &surface->texture_name);
2114             checkGLcall("glGenTextures");
2115
2116             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2117
2118             context_bind_texture(context, surface->texture_target, surface->texture_name);
2119             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2120             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2121             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2122             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2123             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2124             checkGLcall("glTexParameteri");
2125         }
2126         else
2127         {
2128             context_bind_texture(context, surface->texture_target, surface->texture_name);
2129         }
2130
2131         LEAVE_GL();
2132     }
2133 }
2134
2135 /* This call just downloads data, the caller is responsible for binding the
2136  * correct texture. */
2137 /* Context activation is done by the caller. */
2138 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2139 {
2140     const struct wined3d_format *format = surface->resource.format;
2141
2142     /* Only support read back of converted P8 surfaces. */
2143     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2144     {
2145         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2146         return;
2147     }
2148
2149     ENTER_GL();
2150
2151     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2152     {
2153         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2154                 surface, surface->texture_level, format->glFormat, format->glType,
2155                 surface->resource.allocatedMemory);
2156
2157         if (surface->flags & SFLAG_PBO)
2158         {
2159             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2160             checkGLcall("glBindBufferARB");
2161             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2162             checkGLcall("glGetCompressedTexImageARB");
2163             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2164             checkGLcall("glBindBufferARB");
2165         }
2166         else
2167         {
2168             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2169                     surface->texture_level, surface->resource.allocatedMemory));
2170             checkGLcall("glGetCompressedTexImageARB");
2171         }
2172
2173         LEAVE_GL();
2174     }
2175     else
2176     {
2177         void *mem;
2178         GLenum gl_format = format->glFormat;
2179         GLenum gl_type = format->glType;
2180         int src_pitch = 0;
2181         int dst_pitch = 0;
2182
2183         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2184         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2185         {
2186             gl_format = GL_ALPHA;
2187             gl_type = GL_UNSIGNED_BYTE;
2188         }
2189
2190         if (surface->flags & SFLAG_NONPOW2)
2191         {
2192             unsigned char alignment = surface->resource.device->surface_alignment;
2193             src_pitch = format->byte_count * surface->pow2Width;
2194             dst_pitch = wined3d_surface_get_pitch(surface);
2195             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2196             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2197         }
2198         else
2199         {
2200             mem = surface->resource.allocatedMemory;
2201         }
2202
2203         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2204                 surface, surface->texture_level, gl_format, gl_type, mem);
2205
2206         if (surface->flags & SFLAG_PBO)
2207         {
2208             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2209             checkGLcall("glBindBufferARB");
2210
2211             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2212             checkGLcall("glGetTexImage");
2213
2214             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2215             checkGLcall("glBindBufferARB");
2216         }
2217         else
2218         {
2219             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2220             checkGLcall("glGetTexImage");
2221         }
2222         LEAVE_GL();
2223
2224         if (surface->flags & SFLAG_NONPOW2)
2225         {
2226             const BYTE *src_data;
2227             BYTE *dst_data;
2228             UINT y;
2229             /*
2230              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2231              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2232              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2233              *
2234              * We're doing this...
2235              *
2236              * instead of boxing the texture :
2237              * |<-texture width ->|  -->pow2width|   /\
2238              * |111111111111111111|              |   |
2239              * |222 Texture 222222| boxed empty  | texture height
2240              * |3333 Data 33333333|              |   |
2241              * |444444444444444444|              |   \/
2242              * -----------------------------------   |
2243              * |     boxed  empty | boxed empty  | pow2height
2244              * |                  |              |   \/
2245              * -----------------------------------
2246              *
2247              *
2248              * we're repacking the data to the expected texture width
2249              *
2250              * |<-texture width ->|  -->pow2width|   /\
2251              * |111111111111111111222222222222222|   |
2252              * |222333333333333333333444444444444| texture height
2253              * |444444                           |   |
2254              * |                                 |   \/
2255              * |                                 |   |
2256              * |            empty                | pow2height
2257              * |                                 |   \/
2258              * -----------------------------------
2259              *
2260              * == is the same as
2261              *
2262              * |<-texture width ->|    /\
2263              * |111111111111111111|
2264              * |222222222222222222|texture height
2265              * |333333333333333333|
2266              * |444444444444444444|    \/
2267              * --------------------
2268              *
2269              * this also means that any references to allocatedMemory should work with the data as if were a
2270              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2271              *
2272              * internally the texture is still stored in a boxed format so any references to textureName will
2273              * get a boxed texture with width pow2width and not a texture of width resource.width.
2274              *
2275              * Performance should not be an issue, because applications normally do not lock the surfaces when
2276              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2277              * and doesn't have to be re-read. */
2278             src_data = mem;
2279             dst_data = surface->resource.allocatedMemory;
2280             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2281             for (y = 1; y < surface->resource.height; ++y)
2282             {
2283                 /* skip the first row */
2284                 src_data += src_pitch;
2285                 dst_data += dst_pitch;
2286                 memcpy(dst_data, src_data, dst_pitch);
2287             }
2288
2289             HeapFree(GetProcessHeap(), 0, mem);
2290         }
2291     }
2292
2293     /* Surface has now been downloaded */
2294     surface->flags |= SFLAG_INSYSMEM;
2295 }
2296
2297 /* This call just uploads data, the caller is responsible for binding the
2298  * correct texture. */
2299 /* Context activation is done by the caller. */
2300 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2301         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2302         BOOL srgb, const struct wined3d_bo_address *data)
2303 {
2304     UINT update_w = src_rect->right - src_rect->left;
2305     UINT update_h = src_rect->bottom - src_rect->top;
2306
2307     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2308             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2309             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2310
2311     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2312         update_h *= format->heightscale;
2313
2314     ENTER_GL();
2315
2316     if (data->buffer_object)
2317     {
2318         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2319         checkGLcall("glBindBufferARB");
2320     }
2321
2322     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2323     {
2324         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2325         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2326         const BYTE *addr = data->addr;
2327         GLenum internal;
2328
2329         addr += (src_rect->top / format->block_height) * src_pitch;
2330         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2331
2332         if (srgb)
2333             internal = format->glGammaInternal;
2334         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2335             internal = format->rtInternal;
2336         else
2337             internal = format->glInternal;
2338
2339         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2340                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2341                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2342
2343         if (row_length == src_pitch)
2344         {
2345             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2346                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2347         }
2348         else
2349         {
2350             UINT row, y;
2351
2352             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2353              * can't use the unpack row length like below. */
2354             for (row = 0, y = dst_point->y; row < row_count; ++row)
2355             {
2356                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2357                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2358                 y += format->block_height;
2359                 addr += src_pitch;
2360             }
2361         }
2362         checkGLcall("glCompressedTexSubImage2DARB");
2363     }
2364     else
2365     {
2366         const BYTE *addr = data->addr;
2367
2368         addr += src_rect->top * src_pitch;
2369         addr += src_rect->left * format->byte_count;
2370
2371         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2372                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2373                 update_w, update_h, format->glFormat, format->glType, addr);
2374
2375         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2376         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2377                 update_w, update_h, format->glFormat, format->glType, addr);
2378         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2379         checkGLcall("glTexSubImage2D");
2380     }
2381
2382     if (data->buffer_object)
2383     {
2384         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2385         checkGLcall("glBindBufferARB");
2386     }
2387
2388     LEAVE_GL();
2389
2390     if (wined3d_settings.strict_draw_ordering)
2391         wglFlush();
2392
2393     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2394     {
2395         struct wined3d_device *device = surface->resource.device;
2396         unsigned int i;
2397
2398         for (i = 0; i < device->context_count; ++i)
2399         {
2400             context_surface_update(device->contexts[i], surface);
2401         }
2402     }
2403 }
2404
2405 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2406         struct wined3d_surface *src_surface, const RECT *src_rect)
2407 {
2408     const struct wined3d_format *src_format;
2409     const struct wined3d_format *dst_format;
2410     const struct wined3d_gl_info *gl_info;
2411     struct wined3d_context *context;
2412     struct wined3d_bo_address data;
2413     struct wined3d_format format;
2414     UINT update_w, update_h;
2415     CONVERT_TYPES convert;
2416     UINT dst_w, dst_h;
2417     UINT src_w, src_h;
2418     UINT src_pitch;
2419     POINT p;
2420     RECT r;
2421
2422     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2423             dst_surface, wine_dbgstr_point(dst_point),
2424             src_surface, wine_dbgstr_rect(src_rect));
2425
2426     src_format = src_surface->resource.format;
2427     dst_format = dst_surface->resource.format;
2428
2429     if (src_format->id != dst_format->id)
2430     {
2431         WARN("Source and destination surfaces should have the same format.\n");
2432         return WINED3DERR_INVALIDCALL;
2433     }
2434
2435     if (!dst_point)
2436     {
2437         p.x = 0;
2438         p.y = 0;
2439         dst_point = &p;
2440     }
2441     else if (dst_point->x < 0 || dst_point->y < 0)
2442     {
2443         WARN("Invalid destination point.\n");
2444         return WINED3DERR_INVALIDCALL;
2445     }
2446
2447     if (!src_rect)
2448     {
2449         r.left = 0;
2450         r.top = 0;
2451         r.right = src_surface->resource.width;
2452         r.bottom = src_surface->resource.height;
2453         src_rect = &r;
2454     }
2455     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2456             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2457     {
2458         WARN("Invalid source rectangle.\n");
2459         return WINED3DERR_INVALIDCALL;
2460     }
2461
2462     src_w = src_surface->resource.width;
2463     src_h = src_surface->resource.height;
2464
2465     dst_w = dst_surface->resource.width;
2466     dst_h = dst_surface->resource.height;
2467
2468     update_w = src_rect->right - src_rect->left;
2469     update_h = src_rect->bottom - src_rect->top;
2470
2471     if (update_w > dst_w || dst_point->x > dst_w - update_w
2472             || update_h > dst_h || dst_point->y > dst_h - update_h)
2473     {
2474         WARN("Destination out of bounds.\n");
2475         return WINED3DERR_INVALIDCALL;
2476     }
2477
2478     /* NPOT block sizes would be silly. */
2479     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED)
2480             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2481             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2482     {
2483         WARN("Update rect not block-aligned.\n");
2484         return WINED3DERR_INVALIDCALL;
2485     }
2486
2487     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2488     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2489     if (convert != NO_CONVERSION || format.convert)
2490     {
2491         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2492         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2493     }
2494
2495     context = context_acquire(dst_surface->resource.device, NULL);
2496     gl_info = context->gl_info;
2497
2498     /* Only load the surface for partial updates. For newly allocated texture
2499      * the texture wouldn't be the current location, and we'd upload zeroes
2500      * just to overwrite them again. */
2501     if (update_w == dst_w && update_h == dst_h)
2502         surface_prepare_texture(dst_surface, context, FALSE);
2503     else
2504         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2505     surface_bind(dst_surface, context, FALSE);
2506
2507     data.buffer_object = src_surface->pbo;
2508     data.addr = src_surface->resource.allocatedMemory;
2509     src_pitch = wined3d_surface_get_pitch(src_surface);
2510
2511     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2512
2513     invalidate_active_texture(dst_surface->resource.device, context);
2514
2515     context_release(context);
2516
2517     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2518     return WINED3D_OK;
2519 }
2520
2521 /* This call just allocates the texture, the caller is responsible for binding
2522  * the correct texture. */
2523 /* Context activation is done by the caller. */
2524 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2525         const struct wined3d_format *format, BOOL srgb)
2526 {
2527     BOOL enable_client_storage = FALSE;
2528     GLsizei width = surface->pow2Width;
2529     GLsizei height = surface->pow2Height;
2530     const BYTE *mem = NULL;
2531     GLenum internal;
2532
2533     if (srgb)
2534     {
2535         internal = format->glGammaInternal;
2536     }
2537     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2538     {
2539         internal = format->rtInternal;
2540     }
2541     else
2542     {
2543         internal = format->glInternal;
2544     }
2545
2546     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2547
2548     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2549             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2550             internal, width, height, format->glFormat, format->glType);
2551
2552     ENTER_GL();
2553
2554     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2555     {
2556         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2557                 || !surface->resource.allocatedMemory)
2558         {
2559             /* In some cases we want to disable client storage.
2560              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2561              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2562              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2563              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2564              */
2565             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2566             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2567             surface->flags &= ~SFLAG_CLIENT;
2568             enable_client_storage = TRUE;
2569         }
2570         else
2571         {
2572             surface->flags |= SFLAG_CLIENT;
2573
2574             /* Point OpenGL to our allocated texture memory. Do not use
2575              * resource.allocatedMemory here because it might point into a
2576              * PBO. Instead use heapMemory, but get the alignment right. */
2577             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2578                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2579         }
2580     }
2581
2582     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2583     {
2584         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2585                 internal, width, height, 0, surface->resource.size, mem));
2586         checkGLcall("glCompressedTexImage2DARB");
2587     }
2588     else
2589     {
2590         glTexImage2D(surface->texture_target, surface->texture_level,
2591                 internal, width, height, 0, format->glFormat, format->glType, mem);
2592         checkGLcall("glTexImage2D");
2593     }
2594
2595     if(enable_client_storage) {
2596         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2597         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2598     }
2599     LEAVE_GL();
2600 }
2601
2602 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2603  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2604 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2605 /* GL locking is done by the caller */
2606 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2607 {
2608     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2609     struct wined3d_renderbuffer_entry *entry;
2610     GLuint renderbuffer = 0;
2611     unsigned int src_width, src_height;
2612     unsigned int width, height;
2613
2614     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2615     {
2616         width = rt->pow2Width;
2617         height = rt->pow2Height;
2618     }
2619     else
2620     {
2621         width = surface->pow2Width;
2622         height = surface->pow2Height;
2623     }
2624
2625     src_width = surface->pow2Width;
2626     src_height = surface->pow2Height;
2627
2628     /* A depth stencil smaller than the render target is not valid */
2629     if (width > src_width || height > src_height) return;
2630
2631     /* Remove any renderbuffer set if the sizes match */
2632     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2633             || (width == src_width && height == src_height))
2634     {
2635         surface->current_renderbuffer = NULL;
2636         return;
2637     }
2638
2639     /* Look if we've already got a renderbuffer of the correct dimensions */
2640     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2641     {
2642         if (entry->width == width && entry->height == height)
2643         {
2644             renderbuffer = entry->id;
2645             surface->current_renderbuffer = entry;
2646             break;
2647         }
2648     }
2649
2650     if (!renderbuffer)
2651     {
2652         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2653         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2654         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2655                 surface->resource.format->glInternal, width, height);
2656
2657         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2658         entry->width = width;
2659         entry->height = height;
2660         entry->id = renderbuffer;
2661         list_add_head(&surface->renderbuffers, &entry->entry);
2662
2663         surface->current_renderbuffer = entry;
2664     }
2665
2666     checkGLcall("set_compatible_renderbuffer");
2667 }
2668
2669 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2670 {
2671     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2672
2673     TRACE("surface %p.\n", surface);
2674
2675     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2676     {
2677         ERR("Surface %p is not on a swapchain.\n", surface);
2678         return GL_NONE;
2679     }
2680
2681     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2682     {
2683         if (swapchain->render_to_fbo)
2684         {
2685             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2686             return GL_COLOR_ATTACHMENT0;
2687         }
2688         TRACE("Returning GL_BACK\n");
2689         return GL_BACK;
2690     }
2691     else if (surface == swapchain->front_buffer)
2692     {
2693         TRACE("Returning GL_FRONT\n");
2694         return GL_FRONT;
2695     }
2696
2697     FIXME("Higher back buffer, returning GL_BACK\n");
2698     return GL_BACK;
2699 }
2700
2701 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2702 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2703 {
2704     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2705
2706     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2707         /* No partial locking for textures yet. */
2708         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2709
2710     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2711     if (dirty_rect)
2712     {
2713         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2714         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2715         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2716         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2717     }
2718     else
2719     {
2720         surface->dirtyRect.left = 0;
2721         surface->dirtyRect.top = 0;
2722         surface->dirtyRect.right = surface->resource.width;
2723         surface->dirtyRect.bottom = surface->resource.height;
2724     }
2725
2726     /* if the container is a texture then mark it dirty. */
2727     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2728     {
2729         TRACE("Passing to container.\n");
2730         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2731     }
2732 }
2733
2734 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2735 {
2736     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2737     BOOL ck_changed;
2738
2739     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2740
2741     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2742     {
2743         ERR("Not supported on scratch surfaces.\n");
2744         return WINED3DERR_INVALIDCALL;
2745     }
2746
2747     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2748
2749     /* Reload if either the texture and sysmem have different ideas about the
2750      * color key, or the actual key values changed. */
2751     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2752             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2753             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2754     {
2755         TRACE("Reloading because of color keying\n");
2756         /* To perform the color key conversion we need a sysmem copy of
2757          * the surface. Make sure we have it. */
2758
2759         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2760         /* Make sure the texture is reloaded because of the color key change,
2761          * this kills performance though :( */
2762         /* TODO: This is not necessarily needed with hw palettized texture support. */
2763         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2764         /* Switching color keying on / off may change the internal format. */
2765         if (ck_changed)
2766             surface_force_reload(surface);
2767     }
2768     else if (!(surface->flags & flag))
2769     {
2770         TRACE("Reloading because surface is dirty.\n");
2771     }
2772     else
2773     {
2774         TRACE("surface is already in texture\n");
2775         return WINED3D_OK;
2776     }
2777
2778     /* No partial locking for textures yet. */
2779     surface_load_location(surface, flag, NULL);
2780     surface_evict_sysmem(surface);
2781
2782     return WINED3D_OK;
2783 }
2784
2785 /* See also float_16_to_32() in wined3d_private.h */
2786 static inline unsigned short float_32_to_16(const float *in)
2787 {
2788     int exp = 0;
2789     float tmp = fabsf(*in);
2790     unsigned int mantissa;
2791     unsigned short ret;
2792
2793     /* Deal with special numbers */
2794     if (*in == 0.0f)
2795         return 0x0000;
2796     if (isnan(*in))
2797         return 0x7c01;
2798     if (isinf(*in))
2799         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2800
2801     if (tmp < powf(2, 10))
2802     {
2803         do
2804         {
2805             tmp = tmp * 2.0f;
2806             exp--;
2807         } while (tmp < powf(2, 10));
2808     }
2809     else if (tmp >= powf(2, 11))
2810     {
2811         do
2812         {
2813             tmp /= 2.0f;
2814             exp++;
2815         } while (tmp >= powf(2, 11));
2816     }
2817
2818     mantissa = (unsigned int)tmp;
2819     if (tmp - mantissa >= 0.5f)
2820         ++mantissa; /* Round to nearest, away from zero. */
2821
2822     exp += 10;  /* Normalize the mantissa. */
2823     exp += 15;  /* Exponent is encoded with excess 15. */
2824
2825     if (exp > 30) /* too big */
2826     {
2827         ret = 0x7c00; /* INF */
2828     }
2829     else if (exp <= 0)
2830     {
2831         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2832         while (exp <= 0)
2833         {
2834             mantissa = mantissa >> 1;
2835             ++exp;
2836         }
2837         ret = mantissa & 0x3ff;
2838     }
2839     else
2840     {
2841         ret = (exp << 10) | (mantissa & 0x3ff);
2842     }
2843
2844     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2845     return ret;
2846 }
2847
2848 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2849 {
2850     ULONG refcount;
2851
2852     TRACE("Surface %p, container %p of type %#x.\n",
2853             surface, surface->container.u.base, surface->container.type);
2854
2855     switch (surface->container.type)
2856     {
2857         case WINED3D_CONTAINER_TEXTURE:
2858             return wined3d_texture_incref(surface->container.u.texture);
2859
2860         case WINED3D_CONTAINER_SWAPCHAIN:
2861             return wined3d_swapchain_incref(surface->container.u.swapchain);
2862
2863         default:
2864             ERR("Unhandled container type %#x.\n", surface->container.type);
2865         case WINED3D_CONTAINER_NONE:
2866             break;
2867     }
2868
2869     refcount = InterlockedIncrement(&surface->resource.ref);
2870     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2871
2872     return refcount;
2873 }
2874
2875 /* Do not call while under the GL lock. */
2876 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2877 {
2878     ULONG refcount;
2879
2880     TRACE("Surface %p, container %p of type %#x.\n",
2881             surface, surface->container.u.base, surface->container.type);
2882
2883     switch (surface->container.type)
2884     {
2885         case WINED3D_CONTAINER_TEXTURE:
2886             return wined3d_texture_decref(surface->container.u.texture);
2887
2888         case WINED3D_CONTAINER_SWAPCHAIN:
2889             return wined3d_swapchain_decref(surface->container.u.swapchain);
2890
2891         default:
2892             ERR("Unhandled container type %#x.\n", surface->container.type);
2893         case WINED3D_CONTAINER_NONE:
2894             break;
2895     }
2896
2897     refcount = InterlockedDecrement(&surface->resource.ref);
2898     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2899
2900     if (!refcount)
2901     {
2902         surface_cleanup(surface);
2903         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2904
2905         TRACE("Destroyed surface %p.\n", surface);
2906         HeapFree(GetProcessHeap(), 0, surface);
2907     }
2908
2909     return refcount;
2910 }
2911
2912 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2913 {
2914     return resource_set_priority(&surface->resource, priority);
2915 }
2916
2917 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2918 {
2919     return resource_get_priority(&surface->resource);
2920 }
2921
2922 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2923 {
2924     TRACE("surface %p.\n", surface);
2925
2926     if (!surface->resource.device->d3d_initialized)
2927     {
2928         ERR("D3D not initialized.\n");
2929         return;
2930     }
2931
2932     surface_internal_preload(surface, SRGB_ANY);
2933 }
2934
2935 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2936 {
2937     TRACE("surface %p.\n", surface);
2938
2939     return surface->resource.parent;
2940 }
2941
2942 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2943 {
2944     TRACE("surface %p.\n", surface);
2945
2946     return &surface->resource;
2947 }
2948
2949 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2950 {
2951     TRACE("surface %p, flags %#x.\n", surface, flags);
2952
2953     switch (flags)
2954     {
2955         case WINEDDGBS_CANBLT:
2956         case WINEDDGBS_ISBLTDONE:
2957             return WINED3D_OK;
2958
2959         default:
2960             return WINED3DERR_INVALIDCALL;
2961     }
2962 }
2963
2964 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2965 {
2966     TRACE("surface %p, flags %#x.\n", surface, flags);
2967
2968     /* XXX: DDERR_INVALIDSURFACETYPE */
2969
2970     switch (flags)
2971     {
2972         case WINEDDGFS_CANFLIP:
2973         case WINEDDGFS_ISFLIPDONE:
2974             return WINED3D_OK;
2975
2976         default:
2977             return WINED3DERR_INVALIDCALL;
2978     }
2979 }
2980
2981 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2982 {
2983     TRACE("surface %p.\n", surface);
2984
2985     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2986     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2987 }
2988
2989 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2990 {
2991     TRACE("surface %p.\n", surface);
2992
2993     surface->flags &= ~SFLAG_LOST;
2994     return WINED3D_OK;
2995 }
2996
2997 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
2998 {
2999     TRACE("surface %p, palette %p.\n", surface, palette);
3000
3001     if (surface->palette == palette)
3002     {
3003         TRACE("Nop palette change.\n");
3004         return WINED3D_OK;
3005     }
3006
3007     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3008         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3009
3010     surface->palette = palette;
3011
3012     if (palette)
3013     {
3014         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3015             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3016
3017         surface->surface_ops->surface_realize_palette(surface);
3018     }
3019
3020     return WINED3D_OK;
3021 }
3022
3023 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3024         DWORD flags, const WINEDDCOLORKEY *color_key)
3025 {
3026     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3027
3028     if (flags & WINEDDCKEY_COLORSPACE)
3029     {
3030         FIXME(" colorkey value not supported (%08x) !\n", flags);
3031         return WINED3DERR_INVALIDCALL;
3032     }
3033
3034     /* Dirtify the surface, but only if a key was changed. */
3035     if (color_key)
3036     {
3037         switch (flags & ~WINEDDCKEY_COLORSPACE)
3038         {
3039             case WINEDDCKEY_DESTBLT:
3040                 surface->DestBltCKey = *color_key;
3041                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3042                 break;
3043
3044             case WINEDDCKEY_DESTOVERLAY:
3045                 surface->DestOverlayCKey = *color_key;
3046                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3047                 break;
3048
3049             case WINEDDCKEY_SRCOVERLAY:
3050                 surface->SrcOverlayCKey = *color_key;
3051                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3052                 break;
3053
3054             case WINEDDCKEY_SRCBLT:
3055                 surface->SrcBltCKey = *color_key;
3056                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3057                 break;
3058         }
3059     }
3060     else
3061     {
3062         switch (flags & ~WINEDDCKEY_COLORSPACE)
3063         {
3064             case WINEDDCKEY_DESTBLT:
3065                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3066                 break;
3067
3068             case WINEDDCKEY_DESTOVERLAY:
3069                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3070                 break;
3071
3072             case WINEDDCKEY_SRCOVERLAY:
3073                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3074                 break;
3075
3076             case WINEDDCKEY_SRCBLT:
3077                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3078                 break;
3079         }
3080     }
3081
3082     return WINED3D_OK;
3083 }
3084
3085 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3086 {
3087     TRACE("surface %p.\n", surface);
3088
3089     return surface->palette;
3090 }
3091
3092 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3093 {
3094     const struct wined3d_format *format = surface->resource.format;
3095     DWORD pitch;
3096
3097     TRACE("surface %p.\n", surface);
3098
3099     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3100     {
3101         /* Since compressed formats are block based, pitch means the amount of
3102          * bytes to the next row of block rather than the next row of pixels. */
3103         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3104         pitch = row_block_count * format->block_byte_count;
3105     }
3106     else
3107     {
3108         unsigned char alignment = surface->resource.device->surface_alignment;
3109         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3110         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3111     }
3112
3113     TRACE("Returning %u.\n", pitch);
3114
3115     return pitch;
3116 }
3117
3118 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3119 {
3120     TRACE("surface %p, mem %p.\n", surface, mem);
3121
3122     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3123     {
3124         WARN("Surface is locked or the DC is in use.\n");
3125         return WINED3DERR_INVALIDCALL;
3126     }
3127
3128     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3129     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3130     {
3131         ERR("Not supported on render targets.\n");
3132         return WINED3DERR_INVALIDCALL;
3133     }
3134
3135     if (mem && mem != surface->resource.allocatedMemory)
3136     {
3137         void *release = NULL;
3138
3139         /* Do I have to copy the old surface content? */
3140         if (surface->flags & SFLAG_DIBSECTION)
3141         {
3142             SelectObject(surface->hDC, surface->dib.holdbitmap);
3143             DeleteDC(surface->hDC);
3144             /* Release the DIB section. */
3145             DeleteObject(surface->dib.DIBsection);
3146             surface->dib.bitmap_data = NULL;
3147             surface->resource.allocatedMemory = NULL;
3148             surface->hDC = NULL;
3149             surface->flags &= ~SFLAG_DIBSECTION;
3150         }
3151         else if (!(surface->flags & SFLAG_USERPTR))
3152         {
3153             release = surface->resource.heapMemory;
3154             surface->resource.heapMemory = NULL;
3155         }
3156         surface->resource.allocatedMemory = mem;
3157         surface->flags |= SFLAG_USERPTR;
3158
3159         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3160         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3161
3162         /* For client textures OpenGL has to be notified. */
3163         if (surface->flags & SFLAG_CLIENT)
3164             surface_release_client_storage(surface);
3165
3166         /* Now free the old memory if any. */
3167         HeapFree(GetProcessHeap(), 0, release);
3168     }
3169     else if (surface->flags & SFLAG_USERPTR)
3170     {
3171         /* HeapMemory should be NULL already. */
3172         if (surface->resource.heapMemory)
3173             ERR("User pointer surface has heap memory allocated.\n");
3174
3175         if (!mem)
3176         {
3177             surface->resource.allocatedMemory = NULL;
3178             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3179
3180             if (surface->flags & SFLAG_CLIENT)
3181                 surface_release_client_storage(surface);
3182
3183             surface_prepare_system_memory(surface);
3184         }
3185
3186         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3187     }
3188
3189     return WINED3D_OK;
3190 }
3191
3192 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3193 {
3194     LONG w, h;
3195
3196     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3197
3198     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3199     {
3200         WARN("Not an overlay surface.\n");
3201         return WINEDDERR_NOTAOVERLAYSURFACE;
3202     }
3203
3204     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3205     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3206     surface->overlay_destrect.left = x;
3207     surface->overlay_destrect.top = y;
3208     surface->overlay_destrect.right = x + w;
3209     surface->overlay_destrect.bottom = y + h;
3210
3211     surface_draw_overlay(surface);
3212
3213     return WINED3D_OK;
3214 }
3215
3216 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3217 {
3218     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3219
3220     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3221     {
3222         TRACE("Not an overlay surface.\n");
3223         return WINEDDERR_NOTAOVERLAYSURFACE;
3224     }
3225
3226     if (!surface->overlay_dest)
3227     {
3228         TRACE("Overlay not visible.\n");
3229         *x = 0;
3230         *y = 0;
3231         return WINEDDERR_OVERLAYNOTVISIBLE;
3232     }
3233
3234     *x = surface->overlay_destrect.left;
3235     *y = surface->overlay_destrect.top;
3236
3237     TRACE("Returning position %d, %d.\n", *x, *y);
3238
3239     return WINED3D_OK;
3240 }
3241
3242 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3243         DWORD flags, struct wined3d_surface *ref)
3244 {
3245     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3246
3247     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3248     {
3249         TRACE("Not an overlay surface.\n");
3250         return WINEDDERR_NOTAOVERLAYSURFACE;
3251     }
3252
3253     return WINED3D_OK;
3254 }
3255
3256 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3257         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3258 {
3259     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3260             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3261
3262     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3263     {
3264         WARN("Not an overlay surface.\n");
3265         return WINEDDERR_NOTAOVERLAYSURFACE;
3266     }
3267     else if (!dst_surface)
3268     {
3269         WARN("Dest surface is NULL.\n");
3270         return WINED3DERR_INVALIDCALL;
3271     }
3272
3273     if (src_rect)
3274     {
3275         surface->overlay_srcrect = *src_rect;
3276     }
3277     else
3278     {
3279         surface->overlay_srcrect.left = 0;
3280         surface->overlay_srcrect.top = 0;
3281         surface->overlay_srcrect.right = surface->resource.width;
3282         surface->overlay_srcrect.bottom = surface->resource.height;
3283     }
3284
3285     if (dst_rect)
3286     {
3287         surface->overlay_destrect = *dst_rect;
3288     }
3289     else
3290     {
3291         surface->overlay_destrect.left = 0;
3292         surface->overlay_destrect.top = 0;
3293         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3294         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3295     }
3296
3297     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3298     {
3299         surface->overlay_dest = NULL;
3300         list_remove(&surface->overlay_entry);
3301     }
3302
3303     if (flags & WINEDDOVER_SHOW)
3304     {
3305         if (surface->overlay_dest != dst_surface)
3306         {
3307             surface->overlay_dest = dst_surface;
3308             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3309         }
3310     }
3311     else if (flags & WINEDDOVER_HIDE)
3312     {
3313         /* tests show that the rectangles are erased on hide */
3314         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3315         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3316         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3317         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3318         surface->overlay_dest = NULL;
3319     }
3320
3321     surface_draw_overlay(surface);
3322
3323     return WINED3D_OK;
3324 }
3325
3326 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3327 {
3328     TRACE("surface %p, clipper %p.\n", surface, clipper);
3329
3330     surface->clipper = clipper;
3331
3332     return WINED3D_OK;
3333 }
3334
3335 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3336 {
3337     TRACE("surface %p.\n", surface);
3338
3339     return surface->clipper;
3340 }
3341
3342 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3343 {
3344     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3345
3346     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3347
3348     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3349     {
3350         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3351         return WINED3DERR_INVALIDCALL;
3352     }
3353
3354     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3355             surface->pow2Width, surface->pow2Height);
3356     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3357     surface->resource.format = format;
3358
3359     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3360     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3361             format->glFormat, format->glInternal, format->glType);
3362
3363     return WINED3D_OK;
3364 }
3365
3366 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3367         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3368 {
3369     unsigned short *dst_s;
3370     const float *src_f;
3371     unsigned int x, y;
3372
3373     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3374
3375     for (y = 0; y < h; ++y)
3376     {
3377         src_f = (const float *)(src + y * pitch_in);
3378         dst_s = (unsigned short *) (dst + y * pitch_out);
3379         for (x = 0; x < w; ++x)
3380         {
3381             dst_s[x] = float_32_to_16(src_f + x);
3382         }
3383     }
3384 }
3385
3386 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3387         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3388 {
3389     static const unsigned char convert_5to8[] =
3390     {
3391         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3392         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3393         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3394         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3395     };
3396     static const unsigned char convert_6to8[] =
3397     {
3398         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3399         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3400         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3401         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3402         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3403         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3404         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3405         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3406     };
3407     unsigned int x, y;
3408
3409     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3410
3411     for (y = 0; y < h; ++y)
3412     {
3413         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3414         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3415         for (x = 0; x < w; ++x)
3416         {
3417             WORD pixel = src_line[x];
3418             dst_line[x] = 0xff000000
3419                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3420                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3421                     | convert_5to8[(pixel & 0x001f)];
3422         }
3423     }
3424 }
3425
3426 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3427  * in both cases we're just setting the X / Alpha channel to 0xff. */
3428 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3429         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3430 {
3431     unsigned int x, y;
3432
3433     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3434
3435     for (y = 0; y < h; ++y)
3436     {
3437         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3438         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3439
3440         for (x = 0; x < w; ++x)
3441         {
3442             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3443         }
3444     }
3445 }
3446
3447 static inline BYTE cliptobyte(int x)
3448 {
3449     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3450 }
3451
3452 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3453         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3454 {
3455     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3456     unsigned int x, y;
3457
3458     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3459
3460     for (y = 0; y < h; ++y)
3461     {
3462         const BYTE *src_line = src + y * pitch_in;
3463         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3464         for (x = 0; x < w; ++x)
3465         {
3466             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3467              *     C = Y - 16; D = U - 128; E = V - 128;
3468              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3469              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3470              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3471              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3472              * U and V are shared between the pixels. */
3473             if (!(x & 1)) /* For every even pixel, read new U and V. */
3474             {
3475                 d = (int) src_line[1] - 128;
3476                 e = (int) src_line[3] - 128;
3477                 r2 = 409 * e + 128;
3478                 g2 = - 100 * d - 208 * e + 128;
3479                 b2 = 516 * d + 128;
3480             }
3481             c2 = 298 * ((int) src_line[0] - 16);
3482             dst_line[x] = 0xff000000
3483                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3484                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3485                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3486                 /* Scale RGB values to 0..255 range,
3487                  * then clip them if still not in range (may be negative),
3488                  * then shift them within DWORD if necessary. */
3489             src_line += 2;
3490         }
3491     }
3492 }
3493
3494 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3495         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3496 {
3497     unsigned int x, y;
3498     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3499
3500     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3501
3502     for (y = 0; y < h; ++y)
3503     {
3504         const BYTE *src_line = src + y * pitch_in;
3505         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3506         for (x = 0; x < w; ++x)
3507         {
3508             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3509              *     C = Y - 16; D = U - 128; E = V - 128;
3510              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3511              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3512              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3513              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3514              * U and V are shared between the pixels. */
3515             if (!(x & 1)) /* For every even pixel, read new U and V. */
3516             {
3517                 d = (int) src_line[1] - 128;
3518                 e = (int) src_line[3] - 128;
3519                 r2 = 409 * e + 128;
3520                 g2 = - 100 * d - 208 * e + 128;
3521                 b2 = 516 * d + 128;
3522             }
3523             c2 = 298 * ((int) src_line[0] - 16);
3524             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3525                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3526                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3527                 /* Scale RGB values to 0..255 range,
3528                  * then clip them if still not in range (may be negative),
3529                  * then shift them within DWORD if necessary. */
3530             src_line += 2;
3531         }
3532     }
3533 }
3534
3535 struct d3dfmt_convertor_desc
3536 {
3537     enum wined3d_format_id from, to;
3538     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3539 };
3540
3541 static const struct d3dfmt_convertor_desc convertors[] =
3542 {
3543     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3544     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3545     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3546     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3547     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3548     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3549 };
3550
3551 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3552         enum wined3d_format_id to)
3553 {
3554     unsigned int i;
3555
3556     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3557     {
3558         if (convertors[i].from == from && convertors[i].to == to)
3559             return &convertors[i];
3560     }
3561
3562     return NULL;
3563 }
3564
3565 /*****************************************************************************
3566  * surface_convert_format
3567  *
3568  * Creates a duplicate of a surface in a different format. Is used by Blt to
3569  * blit between surfaces with different formats.
3570  *
3571  * Parameters
3572  *  source: Source surface
3573  *  fmt: Requested destination format
3574  *
3575  *****************************************************************************/
3576 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3577 {
3578     const struct d3dfmt_convertor_desc *conv;
3579     WINED3DLOCKED_RECT lock_src, lock_dst;
3580     struct wined3d_surface *ret = NULL;
3581     HRESULT hr;
3582
3583     conv = find_convertor(source->resource.format->id, to_fmt);
3584     if (!conv)
3585     {
3586         FIXME("Cannot find a conversion function from format %s to %s.\n",
3587                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3588         return NULL;
3589     }
3590
3591     wined3d_surface_create(source->resource.device, source->resource.width,
3592             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3593             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3594             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3595     if (!ret)
3596     {
3597         ERR("Failed to create a destination surface for conversion.\n");
3598         return NULL;
3599     }
3600
3601     memset(&lock_src, 0, sizeof(lock_src));
3602     memset(&lock_dst, 0, sizeof(lock_dst));
3603
3604     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3605     if (FAILED(hr))
3606     {
3607         ERR("Failed to lock the source surface.\n");
3608         wined3d_surface_decref(ret);
3609         return NULL;
3610     }
3611     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3612     if (FAILED(hr))
3613     {
3614         ERR("Failed to lock the destination surface.\n");
3615         wined3d_surface_unmap(source);
3616         wined3d_surface_decref(ret);
3617         return NULL;
3618     }
3619
3620     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3621             source->resource.width, source->resource.height);
3622
3623     wined3d_surface_unmap(ret);
3624     wined3d_surface_unmap(source);
3625
3626     return ret;
3627 }
3628
3629 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3630         unsigned int bpp, UINT pitch, DWORD color)
3631 {
3632     BYTE *first;
3633     int x, y;
3634
3635     /* Do first row */
3636
3637 #define COLORFILL_ROW(type) \
3638 do { \
3639     type *d = (type *)buf; \
3640     for (x = 0; x < width; ++x) \
3641         d[x] = (type)color; \
3642 } while(0)
3643
3644     switch (bpp)
3645     {
3646         case 1:
3647             COLORFILL_ROW(BYTE);
3648             break;
3649
3650         case 2:
3651             COLORFILL_ROW(WORD);
3652             break;
3653
3654         case 3:
3655         {
3656             BYTE *d = buf;
3657             for (x = 0; x < width; ++x, d += 3)
3658             {
3659                 d[0] = (color      ) & 0xFF;
3660                 d[1] = (color >>  8) & 0xFF;
3661                 d[2] = (color >> 16) & 0xFF;
3662             }
3663             break;
3664         }
3665         case 4:
3666             COLORFILL_ROW(DWORD);
3667             break;
3668
3669         default:
3670             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3671             return WINED3DERR_NOTAVAILABLE;
3672     }
3673
3674 #undef COLORFILL_ROW
3675
3676     /* Now copy first row. */
3677     first = buf;
3678     for (y = 1; y < height; ++y)
3679     {
3680         buf += pitch;
3681         memcpy(buf, first, width * bpp);
3682     }
3683
3684     return WINED3D_OK;
3685 }
3686
3687 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3688 {
3689     TRACE("surface %p.\n", surface);
3690
3691     if (!(surface->flags & SFLAG_LOCKED))
3692     {
3693         WARN("Trying to unmap unmapped surface.\n");
3694         return WINEDDERR_NOTLOCKED;
3695     }
3696     surface->flags &= ~SFLAG_LOCKED;
3697
3698     surface->surface_ops->surface_unmap(surface);
3699
3700     return WINED3D_OK;
3701 }
3702
3703 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3704         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3705 {
3706     const struct wined3d_format *format = surface->resource.format;
3707
3708     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3709             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3710
3711     if (surface->flags & SFLAG_LOCKED)
3712     {
3713         WARN("Surface is already mapped.\n");
3714         return WINED3DERR_INVALIDCALL;
3715     }
3716     if ((format->flags & WINED3DFMT_FLAG_COMPRESSED)
3717             && rect && (rect->left || rect->top
3718             || rect->right != surface->resource.width
3719             || rect->bottom != surface->resource.height))
3720     {
3721         UINT width_mask = format->block_width - 1;
3722         UINT height_mask = format->block_height - 1;
3723
3724         if ((rect->left & width_mask) || (rect->right & width_mask)
3725                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3726         {
3727             switch (surface->resource.pool)
3728             {
3729                 case WINED3DPOOL_DEFAULT:
3730                     WARN("Partial block lock with WINED3DPOOL_DEFAULT\n");
3731                     return WINED3DERR_INVALIDCALL;
3732
3733                 default:
3734                     FIXME("Partial block lock with %s\n", debug_d3dpool(surface->resource.pool));
3735             }
3736         }
3737     }
3738
3739     surface->flags |= SFLAG_LOCKED;
3740
3741     if (!(surface->flags & SFLAG_LOCKABLE))
3742         WARN("Trying to lock unlockable surface.\n");
3743
3744     surface->surface_ops->surface_map(surface, rect, flags);
3745
3746     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3747
3748     if (!rect)
3749     {
3750         locked_rect->pBits = surface->resource.allocatedMemory;
3751         surface->lockedRect.left = 0;
3752         surface->lockedRect.top = 0;
3753         surface->lockedRect.right = surface->resource.width;
3754         surface->lockedRect.bottom = surface->resource.height;
3755     }
3756     else
3757     {
3758         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3759         {
3760             /* Compressed textures are block based, so calculate the offset of
3761              * the block that contains the top-left pixel of the locked rectangle. */
3762             locked_rect->pBits = surface->resource.allocatedMemory
3763                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3764                     + ((rect->left / format->block_width) * format->block_byte_count);
3765         }
3766         else
3767         {
3768             locked_rect->pBits = surface->resource.allocatedMemory
3769                     + (locked_rect->Pitch * rect->top)
3770                     + (rect->left * format->byte_count);
3771         }
3772         surface->lockedRect.left = rect->left;
3773         surface->lockedRect.top = rect->top;
3774         surface->lockedRect.right = rect->right;
3775         surface->lockedRect.bottom = rect->bottom;
3776     }
3777
3778     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3779     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3780
3781     return WINED3D_OK;
3782 }
3783
3784 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3785 {
3786     WINED3DLOCKED_RECT lock;
3787     HRESULT hr;
3788
3789     TRACE("surface %p, dc %p.\n", surface, dc);
3790
3791     if (surface->flags & SFLAG_USERPTR)
3792     {
3793         ERR("Not supported on surfaces with application-provided memory.\n");
3794         return WINEDDERR_NODC;
3795     }
3796
3797     /* Give more detailed info for ddraw. */
3798     if (surface->flags & SFLAG_DCINUSE)
3799         return WINEDDERR_DCALREADYCREATED;
3800
3801     /* Can't GetDC if the surface is locked. */
3802     if (surface->flags & SFLAG_LOCKED)
3803         return WINED3DERR_INVALIDCALL;
3804
3805     /* Create a DIB section if there isn't a dc yet. */
3806     if (!surface->hDC)
3807     {
3808         if (surface->flags & SFLAG_CLIENT)
3809         {
3810             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3811             surface_release_client_storage(surface);
3812         }
3813         hr = surface_create_dib_section(surface);
3814         if (FAILED(hr))
3815             return WINED3DERR_INVALIDCALL;
3816
3817         /* Use the DIB section from now on if we are not using a PBO. */
3818         if (!(surface->flags & SFLAG_PBO))
3819             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3820     }
3821
3822     /* Map the surface. */
3823     hr = wined3d_surface_map(surface, &lock, NULL, 0);
3824     if (FAILED(hr))
3825     {
3826         ERR("Map failed, hr %#x.\n", hr);
3827         return hr;
3828     }
3829
3830     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3831      * activates the allocatedMemory. */
3832     if (surface->flags & SFLAG_PBO)
3833         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3834
3835     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3836             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3837     {
3838         /* GetDC on palettized formats is unsupported in D3D9, and the method
3839          * is missing in D3D8, so this should only be used for DX <=7
3840          * surfaces (with non-device palettes). */
3841         const PALETTEENTRY *pal = NULL;
3842
3843         if (surface->palette)
3844         {
3845             pal = surface->palette->palents;
3846         }
3847         else
3848         {
3849             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3850             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3851
3852             if (dds_primary && dds_primary->palette)
3853                 pal = dds_primary->palette->palents;
3854         }
3855
3856         if (pal)
3857         {
3858             RGBQUAD col[256];
3859             unsigned int i;
3860
3861             for (i = 0; i < 256; ++i)
3862             {
3863                 col[i].rgbRed = pal[i].peRed;
3864                 col[i].rgbGreen = pal[i].peGreen;
3865                 col[i].rgbBlue = pal[i].peBlue;
3866                 col[i].rgbReserved = 0;
3867             }
3868             SetDIBColorTable(surface->hDC, 0, 256, col);
3869         }
3870     }
3871
3872     surface->flags |= SFLAG_DCINUSE;
3873
3874     *dc = surface->hDC;
3875     TRACE("Returning dc %p.\n", *dc);
3876
3877     return WINED3D_OK;
3878 }
3879
3880 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3881 {
3882     TRACE("surface %p, dc %p.\n", surface, dc);
3883
3884     if (!(surface->flags & SFLAG_DCINUSE))
3885         return WINEDDERR_NODC;
3886
3887     if (surface->hDC != dc)
3888     {
3889         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3890                 dc, surface->hDC);
3891         return WINEDDERR_NODC;
3892     }
3893
3894     /* Copy the contents of the DIB over to the PBO. */
3895     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3896         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3897
3898     /* We locked first, so unlock now. */
3899     wined3d_surface_unmap(surface);
3900
3901     surface->flags &= ~SFLAG_DCINUSE;
3902
3903     return WINED3D_OK;
3904 }
3905
3906 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3907 {
3908     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3909
3910     if (flags)
3911     {
3912         static UINT once;
3913         if (!once++)
3914             FIXME("Ignoring flags %#x.\n", flags);
3915         else
3916             WARN("Ignoring flags %#x.\n", flags);
3917     }
3918
3919     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3920     {
3921         ERR("Not supported on swapchain surfaces.\n");
3922         return WINEDDERR_NOTFLIPPABLE;
3923     }
3924
3925     /* Flipping is only supported on render targets and overlays. */
3926     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3927     {
3928         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3929         return WINEDDERR_NOTFLIPPABLE;
3930     }
3931
3932     flip_surface(surface, override);
3933
3934     /* Update overlays if they're visible. */
3935     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3936         return surface_draw_overlay(surface);
3937
3938     return WINED3D_OK;
3939 }
3940
3941 /* Do not call while under the GL lock. */
3942 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3943 {
3944     struct wined3d_device *device = surface->resource.device;
3945
3946     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3947
3948     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3949     {
3950         struct wined3d_texture *texture = surface->container.u.texture;
3951
3952         TRACE("Passing to container (%p).\n", texture);
3953         texture->texture_ops->texture_preload(texture, srgb);
3954     }
3955     else
3956     {
3957         struct wined3d_context *context;
3958
3959         TRACE("(%p) : About to load surface\n", surface);
3960
3961         /* TODO: Use already acquired context when possible. */
3962         context = context_acquire(device, NULL);
3963
3964         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3965
3966         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3967         {
3968             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3969             GLclampf tmp;
3970             tmp = 0.9f;
3971             ENTER_GL();
3972             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3973             LEAVE_GL();
3974         }
3975
3976         context_release(context);
3977     }
3978 }
3979
3980 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3981 {
3982     if (!surface->resource.allocatedMemory)
3983     {
3984         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3985                 surface->resource.size + RESOURCE_ALIGNMENT);
3986         if (!surface->resource.heapMemory)
3987         {
3988             ERR("Out of memory\n");
3989             return FALSE;
3990         }
3991         surface->resource.allocatedMemory =
3992             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
3993     }
3994     else
3995     {
3996         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
3997     }
3998
3999     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4000
4001     return TRUE;
4002 }
4003
4004 /* Read the framebuffer back into the surface */
4005 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4006 {
4007     struct wined3d_device *device = surface->resource.device;
4008     const struct wined3d_gl_info *gl_info;
4009     struct wined3d_context *context;
4010     BYTE *mem;
4011     GLint fmt;
4012     GLint type;
4013     BYTE *row, *top, *bottom;
4014     int i;
4015     BOOL bpp;
4016     RECT local_rect;
4017     BOOL srcIsUpsideDown;
4018     GLint rowLen = 0;
4019     GLint skipPix = 0;
4020     GLint skipRow = 0;
4021
4022     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
4023         static BOOL warned = FALSE;
4024         if(!warned) {
4025             ERR("The application tries to lock the render target, but render target locking is disabled\n");
4026             warned = TRUE;
4027         }
4028         return;
4029     }
4030
4031     context = context_acquire(device, surface);
4032     context_apply_blit_state(context, device);
4033     gl_info = context->gl_info;
4034
4035     ENTER_GL();
4036
4037     /* Select the correct read buffer, and give some debug output.
4038      * There is no need to keep track of the current read buffer or reset it, every part of the code
4039      * that reads sets the read buffer as desired.
4040      */
4041     if (surface_is_offscreen(surface))
4042     {
4043         /* Mapping the primary render target which is not on a swapchain.
4044          * Read from the back buffer. */
4045         TRACE("Mapping offscreen render target.\n");
4046         glReadBuffer(device->offscreenBuffer);
4047         srcIsUpsideDown = TRUE;
4048     }
4049     else
4050     {
4051         /* Onscreen surfaces are always part of a swapchain */
4052         GLenum buffer = surface_get_gl_buffer(surface);
4053         TRACE("Mapping %#x buffer.\n", buffer);
4054         glReadBuffer(buffer);
4055         checkGLcall("glReadBuffer");
4056         srcIsUpsideDown = FALSE;
4057     }
4058
4059     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4060     if (!rect)
4061     {
4062         local_rect.left = 0;
4063         local_rect.top = 0;
4064         local_rect.right = surface->resource.width;
4065         local_rect.bottom = surface->resource.height;
4066     }
4067     else
4068     {
4069         local_rect = *rect;
4070     }
4071     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4072
4073     switch (surface->resource.format->id)
4074     {
4075         case WINED3DFMT_P8_UINT:
4076         {
4077             if (primary_render_target_is_p8(device))
4078             {
4079                 /* In case of P8 render targets the index is stored in the alpha component */
4080                 fmt = GL_ALPHA;
4081                 type = GL_UNSIGNED_BYTE;
4082                 mem = dest;
4083                 bpp = surface->resource.format->byte_count;
4084             }
4085             else
4086             {
4087                 /* GL can't return palettized data, so read ARGB pixels into a
4088                  * separate block of memory and convert them into palettized format
4089                  * in software. Slow, but if the app means to use palettized render
4090                  * targets and locks it...
4091                  *
4092                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4093                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4094                  * for the color channels when palettizing the colors.
4095                  */
4096                 fmt = GL_RGB;
4097                 type = GL_UNSIGNED_BYTE;
4098                 pitch *= 3;
4099                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4100                 if (!mem)
4101                 {
4102                     ERR("Out of memory\n");
4103                     LEAVE_GL();
4104                     return;
4105                 }
4106                 bpp = surface->resource.format->byte_count * 3;
4107             }
4108         }
4109         break;
4110
4111         default:
4112             mem = dest;
4113             fmt = surface->resource.format->glFormat;
4114             type = surface->resource.format->glType;
4115             bpp = surface->resource.format->byte_count;
4116     }
4117
4118     if (surface->flags & SFLAG_PBO)
4119     {
4120         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4121         checkGLcall("glBindBufferARB");
4122         if (mem)
4123         {
4124             ERR("mem not null for pbo -- unexpected\n");
4125             mem = NULL;
4126         }
4127     }
4128
4129     /* Save old pixel store pack state */
4130     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4131     checkGLcall("glGetIntegerv");
4132     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4133     checkGLcall("glGetIntegerv");
4134     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4135     checkGLcall("glGetIntegerv");
4136
4137     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4138     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4139     checkGLcall("glPixelStorei");
4140     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4141     checkGLcall("glPixelStorei");
4142     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4143     checkGLcall("glPixelStorei");
4144
4145     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4146             local_rect.right - local_rect.left,
4147             local_rect.bottom - local_rect.top,
4148             fmt, type, mem);
4149     checkGLcall("glReadPixels");
4150
4151     /* Reset previous pixel store pack state */
4152     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4153     checkGLcall("glPixelStorei");
4154     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4155     checkGLcall("glPixelStorei");
4156     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4157     checkGLcall("glPixelStorei");
4158
4159     if (surface->flags & SFLAG_PBO)
4160     {
4161         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4162         checkGLcall("glBindBufferARB");
4163
4164         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4165          * to get a pointer to it and perform the flipping in software. This is a lot
4166          * faster than calling glReadPixels for each line. In case we want more speed
4167          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4168         if (!srcIsUpsideDown)
4169         {
4170             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4171             checkGLcall("glBindBufferARB");
4172
4173             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4174             checkGLcall("glMapBufferARB");
4175         }
4176     }
4177
4178     /* TODO: Merge this with the palettization loop below for P8 targets */
4179     if(!srcIsUpsideDown) {
4180         UINT len, off;
4181         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4182             Flip the lines in software */
4183         len = (local_rect.right - local_rect.left) * bpp;
4184         off = local_rect.left * bpp;
4185
4186         row = HeapAlloc(GetProcessHeap(), 0, len);
4187         if(!row) {
4188             ERR("Out of memory\n");
4189             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4190                 HeapFree(GetProcessHeap(), 0, mem);
4191             LEAVE_GL();
4192             return;
4193         }
4194
4195         top = mem + pitch * local_rect.top;
4196         bottom = mem + pitch * (local_rect.bottom - 1);
4197         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4198             memcpy(row, top + off, len);
4199             memcpy(top + off, bottom + off, len);
4200             memcpy(bottom + off, row, len);
4201             top += pitch;
4202             bottom -= pitch;
4203         }
4204         HeapFree(GetProcessHeap(), 0, row);
4205
4206         /* Unmap the temp PBO buffer */
4207         if (surface->flags & SFLAG_PBO)
4208         {
4209             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4210             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4211         }
4212     }
4213
4214     LEAVE_GL();
4215     context_release(context);
4216
4217     /* For P8 textures we need to perform an inverse palette lookup. This is
4218      * done by searching for a palette index which matches the RGB value.
4219      * Note this isn't guaranteed to work when there are multiple entries for
4220      * the same color but we have no choice. In case of P8 render targets,
4221      * the index is stored in the alpha component so no conversion is needed. */
4222     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4223     {
4224         const PALETTEENTRY *pal = NULL;
4225         DWORD width = pitch / 3;
4226         int x, y, c;
4227
4228         if (surface->palette)
4229         {
4230             pal = surface->palette->palents;
4231         }
4232         else
4233         {
4234             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4235             HeapFree(GetProcessHeap(), 0, mem);
4236             return;
4237         }
4238
4239         for(y = local_rect.top; y < local_rect.bottom; y++) {
4240             for(x = local_rect.left; x < local_rect.right; x++) {
4241                 /*                      start              lines            pixels      */
4242                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4243                 const BYTE *green = blue  + 1;
4244                 const BYTE *red = green + 1;
4245
4246                 for(c = 0; c < 256; c++) {
4247                     if(*red   == pal[c].peRed   &&
4248                        *green == pal[c].peGreen &&
4249                        *blue  == pal[c].peBlue)
4250                     {
4251                         *((BYTE *) dest + y * width + x) = c;
4252                         break;
4253                     }
4254                 }
4255             }
4256         }
4257         HeapFree(GetProcessHeap(), 0, mem);
4258     }
4259 }
4260
4261 /* Read the framebuffer contents into a texture. Note that this function
4262  * doesn't do any kind of flipping. Using this on an onscreen surface will
4263  * result in a flipped D3D texture. */
4264 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4265 {
4266     struct wined3d_device *device = surface->resource.device;
4267     struct wined3d_context *context;
4268
4269     context = context_acquire(device, surface);
4270     device_invalidate_state(device, STATE_FRAMEBUFFER);
4271
4272     surface_prepare_texture(surface, context, srgb);
4273     surface_bind_and_dirtify(surface, context, srgb);
4274
4275     TRACE("Reading back offscreen render target %p.\n", surface);
4276
4277     ENTER_GL();
4278
4279     if (surface_is_offscreen(surface))
4280         glReadBuffer(device->offscreenBuffer);
4281     else
4282         glReadBuffer(surface_get_gl_buffer(surface));
4283     checkGLcall("glReadBuffer");
4284
4285     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4286             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4287     checkGLcall("glCopyTexSubImage2D");
4288
4289     LEAVE_GL();
4290
4291     context_release(context);
4292 }
4293
4294 /* Context activation is done by the caller. */
4295 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4296         struct wined3d_context *context, BOOL srgb)
4297 {
4298     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4299     CONVERT_TYPES convert;
4300     struct wined3d_format format;
4301
4302     if (surface->flags & alloc_flag) return;
4303
4304     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4305     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4306     else surface->flags &= ~SFLAG_CONVERTED;
4307
4308     surface_bind_and_dirtify(surface, context, srgb);
4309     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4310     surface->flags |= alloc_flag;
4311 }
4312
4313 /* Context activation is done by the caller. */
4314 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4315 {
4316     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4317     {
4318         struct wined3d_texture *texture = surface->container.u.texture;
4319         UINT sub_count = texture->level_count * texture->layer_count;
4320         UINT i;
4321
4322         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4323
4324         for (i = 0; i < sub_count; ++i)
4325         {
4326             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4327             surface_prepare_texture_internal(s, context, srgb);
4328         }
4329
4330         return;
4331     }
4332
4333     surface_prepare_texture_internal(surface, context, srgb);
4334 }
4335
4336 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4337 {
4338     if (multisample)
4339     {
4340         if (surface->rb_multisample)
4341             return;
4342
4343         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4344         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4345         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4346                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4347         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4348     }
4349     else
4350     {
4351         if (surface->rb_resolved)
4352             return;
4353
4354         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4355         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4356         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4357                 surface->pow2Width, surface->pow2Height);
4358         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4359     }
4360 }
4361
4362 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4363         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4364 {
4365     struct wined3d_device *device = surface->resource.device;
4366     UINT pitch = wined3d_surface_get_pitch(surface);
4367     const struct wined3d_gl_info *gl_info;
4368     struct wined3d_context *context;
4369     RECT local_rect;
4370     UINT w, h;
4371
4372     surface_get_rect(surface, rect, &local_rect);
4373
4374     mem += local_rect.top * pitch + local_rect.left * bpp;
4375     w = local_rect.right - local_rect.left;
4376     h = local_rect.bottom - local_rect.top;
4377
4378     /* Activate the correct context for the render target */
4379     context = context_acquire(device, surface);
4380     context_apply_blit_state(context, device);
4381     gl_info = context->gl_info;
4382
4383     ENTER_GL();
4384
4385     if (!surface_is_offscreen(surface))
4386     {
4387         GLenum buffer = surface_get_gl_buffer(surface);
4388         TRACE("Unlocking %#x buffer.\n", buffer);
4389         context_set_draw_buffer(context, buffer);
4390
4391         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4392         glPixelZoom(1.0f, -1.0f);
4393     }
4394     else
4395     {
4396         /* Primary offscreen render target */
4397         TRACE("Offscreen render target.\n");
4398         context_set_draw_buffer(context, device->offscreenBuffer);
4399
4400         glPixelZoom(1.0f, 1.0f);
4401     }
4402
4403     glRasterPos3i(local_rect.left, local_rect.top, 1);
4404     checkGLcall("glRasterPos3i");
4405
4406     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4407     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4408
4409     if (surface->flags & SFLAG_PBO)
4410     {
4411         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4412         checkGLcall("glBindBufferARB");
4413     }
4414
4415     glDrawPixels(w, h, fmt, type, mem);
4416     checkGLcall("glDrawPixels");
4417
4418     if (surface->flags & SFLAG_PBO)
4419     {
4420         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4421         checkGLcall("glBindBufferARB");
4422     }
4423
4424     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4425     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4426
4427     LEAVE_GL();
4428
4429     if (wined3d_settings.strict_draw_ordering
4430             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4431             && surface->container.u.swapchain->front_buffer == surface))
4432         wglFlush();
4433
4434     context_release(context);
4435 }
4436
4437 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4438         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4439 {
4440     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4441     const struct wined3d_device *device = surface->resource.device;
4442     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4443     BOOL blit_supported = FALSE;
4444
4445     /* Copy the default values from the surface. Below we might perform fixups */
4446     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4447     *format = *surface->resource.format;
4448     *convert = NO_CONVERSION;
4449
4450     /* Ok, now look if we have to do any conversion */
4451     switch (surface->resource.format->id)
4452     {
4453         case WINED3DFMT_P8_UINT:
4454             /* Below the call to blit_supported is disabled for Wine 1.2
4455              * because the function isn't operating correctly yet. At the
4456              * moment 8-bit blits are handled in software and if certain GL
4457              * extensions are around, surface conversion is performed at
4458              * upload time. The blit_supported call recognizes it as a
4459              * destination fixup. This type of upload 'fixup' and 8-bit to
4460              * 8-bit blits need to be handled by the blit_shader.
4461              * TODO: get rid of this #if 0. */
4462 #if 0
4463             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4464                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4465                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4466 #endif
4467             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4468
4469             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4470              * texturing. Further also use conversion in case of color keying.
4471              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4472              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4473              * conflicts with this.
4474              */
4475             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4476                     || colorkey_active || !use_texturing)
4477             {
4478                 format->glFormat = GL_RGBA;
4479                 format->glInternal = GL_RGBA;
4480                 format->glType = GL_UNSIGNED_BYTE;
4481                 format->conv_byte_count = 4;
4482                 if (colorkey_active)
4483                     *convert = CONVERT_PALETTED_CK;
4484                 else
4485                     *convert = CONVERT_PALETTED;
4486             }
4487             break;
4488
4489         case WINED3DFMT_B2G3R3_UNORM:
4490             /* **********************
4491                 GL_UNSIGNED_BYTE_3_3_2
4492                 ********************** */
4493             if (colorkey_active) {
4494                 /* This texture format will never be used.. So do not care about color keying
4495                     up until the point in time it will be needed :-) */
4496                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4497             }
4498             break;
4499
4500         case WINED3DFMT_B5G6R5_UNORM:
4501             if (colorkey_active)
4502             {
4503                 *convert = CONVERT_CK_565;
4504                 format->glFormat = GL_RGBA;
4505                 format->glInternal = GL_RGB5_A1;
4506                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4507                 format->conv_byte_count = 2;
4508             }
4509             break;
4510
4511         case WINED3DFMT_B5G5R5X1_UNORM:
4512             if (colorkey_active)
4513             {
4514                 *convert = CONVERT_CK_5551;
4515                 format->glFormat = GL_BGRA;
4516                 format->glInternal = GL_RGB5_A1;
4517                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4518                 format->conv_byte_count = 2;
4519             }
4520             break;
4521
4522         case WINED3DFMT_B8G8R8_UNORM:
4523             if (colorkey_active)
4524             {
4525                 *convert = CONVERT_CK_RGB24;
4526                 format->glFormat = GL_RGBA;
4527                 format->glInternal = GL_RGBA8;
4528                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4529                 format->conv_byte_count = 4;
4530             }
4531             break;
4532
4533         case WINED3DFMT_B8G8R8X8_UNORM:
4534             if (colorkey_active)
4535             {
4536                 *convert = CONVERT_RGB32_888;
4537                 format->glFormat = GL_RGBA;
4538                 format->glInternal = GL_RGBA8;
4539                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4540                 format->conv_byte_count = 4;
4541             }
4542             break;
4543
4544         default:
4545             break;
4546     }
4547
4548     return WINED3D_OK;
4549 }
4550
4551 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4552 {
4553     const struct wined3d_device *device = surface->resource.device;
4554     const struct wined3d_palette *pal = surface->palette;
4555     BOOL index_in_alpha = FALSE;
4556     unsigned int i;
4557
4558     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4559      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4560      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4561      * duplicate entries. Store the color key in the unused alpha component to speed the
4562      * download up and to make conversion unneeded. */
4563     index_in_alpha = primary_render_target_is_p8(device);
4564
4565     if (!pal)
4566     {
4567         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4568         if (index_in_alpha)
4569         {
4570             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4571              * there's no palette at this time. */
4572             for (i = 0; i < 256; i++) table[i][3] = i;
4573         }
4574     }
4575     else
4576     {
4577         TRACE("Using surface palette %p\n", pal);
4578         /* Get the surface's palette */
4579         for (i = 0; i < 256; ++i)
4580         {
4581             table[i][0] = pal->palents[i].peRed;
4582             table[i][1] = pal->palents[i].peGreen;
4583             table[i][2] = pal->palents[i].peBlue;
4584
4585             /* When index_in_alpha is set the palette index is stored in the
4586              * alpha component. In case of a readback we can then read
4587              * GL_ALPHA. Color keying is handled in BltOverride using a
4588              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4589              * color key itself is passed to glAlphaFunc in other cases the
4590              * alpha component of pixels that should be masked away is set to 0. */
4591             if (index_in_alpha)
4592             {
4593                 table[i][3] = i;
4594             }
4595             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4596                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4597             {
4598                 table[i][3] = 0x00;
4599             }
4600             else if (pal->flags & WINEDDPCAPS_ALPHA)
4601             {
4602                 table[i][3] = pal->palents[i].peFlags;
4603             }
4604             else
4605             {
4606                 table[i][3] = 0xFF;
4607             }
4608         }
4609     }
4610 }
4611
4612 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4613         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4614 {
4615     const BYTE *source;
4616     BYTE *dest;
4617     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4618
4619     switch (convert) {
4620         case NO_CONVERSION:
4621         {
4622             memcpy(dst, src, pitch * height);
4623             break;
4624         }
4625         case CONVERT_PALETTED:
4626         case CONVERT_PALETTED_CK:
4627         {
4628             BYTE table[256][4];
4629             unsigned int x, y;
4630
4631             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4632
4633             for (y = 0; y < height; y++)
4634             {
4635                 source = src + pitch * y;
4636                 dest = dst + outpitch * y;
4637                 /* This is an 1 bpp format, using the width here is fine */
4638                 for (x = 0; x < width; x++) {
4639                     BYTE color = *source++;
4640                     *dest++ = table[color][0];
4641                     *dest++ = table[color][1];
4642                     *dest++ = table[color][2];
4643                     *dest++ = table[color][3];
4644                 }
4645             }
4646         }
4647         break;
4648
4649         case CONVERT_CK_565:
4650         {
4651             /* Converting the 565 format in 5551 packed to emulate color-keying.
4652
4653               Note : in all these conversion, it would be best to average the averaging
4654                       pixels to get the color of the pixel that will be color-keyed to
4655                       prevent 'color bleeding'. This will be done later on if ever it is
4656                       too visible.
4657
4658               Note2: Nvidia documents say that their driver does not support alpha + color keying
4659                      on the same surface and disables color keying in such a case
4660             */
4661             unsigned int x, y;
4662             const WORD *Source;
4663             WORD *Dest;
4664
4665             TRACE("Color keyed 565\n");
4666
4667             for (y = 0; y < height; y++) {
4668                 Source = (const WORD *)(src + y * pitch);
4669                 Dest = (WORD *) (dst + y * outpitch);
4670                 for (x = 0; x < width; x++ ) {
4671                     WORD color = *Source++;
4672                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4673                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4674                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4675                         *Dest |= 0x0001;
4676                     Dest++;
4677                 }
4678             }
4679         }
4680         break;
4681
4682         case CONVERT_CK_5551:
4683         {
4684             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4685             unsigned int x, y;
4686             const WORD *Source;
4687             WORD *Dest;
4688             TRACE("Color keyed 5551\n");
4689             for (y = 0; y < height; y++) {
4690                 Source = (const WORD *)(src + y * pitch);
4691                 Dest = (WORD *) (dst + y * outpitch);
4692                 for (x = 0; x < width; x++ ) {
4693                     WORD color = *Source++;
4694                     *Dest = color;
4695                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4696                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4697                         *Dest |= (1 << 15);
4698                     else
4699                         *Dest &= ~(1 << 15);
4700                     Dest++;
4701                 }
4702             }
4703         }
4704         break;
4705
4706         case CONVERT_CK_RGB24:
4707         {
4708             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4709             unsigned int x, y;
4710             for (y = 0; y < height; y++)
4711             {
4712                 source = src + pitch * y;
4713                 dest = dst + outpitch * y;
4714                 for (x = 0; x < width; x++) {
4715                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4716                     DWORD dstcolor = color << 8;
4717                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4718                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4719                         dstcolor |= 0xff;
4720                     *(DWORD*)dest = dstcolor;
4721                     source += 3;
4722                     dest += 4;
4723                 }
4724             }
4725         }
4726         break;
4727
4728         case CONVERT_RGB32_888:
4729         {
4730             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4731             unsigned int x, y;
4732             for (y = 0; y < height; y++)
4733             {
4734                 source = src + pitch * y;
4735                 dest = dst + outpitch * y;
4736                 for (x = 0; x < width; x++) {
4737                     DWORD color = 0xffffff & *(const DWORD*)source;
4738                     DWORD dstcolor = color << 8;
4739                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4740                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4741                         dstcolor |= 0xff;
4742                     *(DWORD*)dest = dstcolor;
4743                     source += 4;
4744                     dest += 4;
4745                 }
4746             }
4747         }
4748         break;
4749
4750         default:
4751             ERR("Unsupported conversion type %#x.\n", convert);
4752     }
4753     return WINED3D_OK;
4754 }
4755
4756 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4757 {
4758     /* Flip the surface contents */
4759     /* Flip the DC */
4760     {
4761         HDC tmp;
4762         tmp = front->hDC;
4763         front->hDC = back->hDC;
4764         back->hDC = tmp;
4765     }
4766
4767     /* Flip the DIBsection */
4768     {
4769         HBITMAP tmp;
4770         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4771         tmp = front->dib.DIBsection;
4772         front->dib.DIBsection = back->dib.DIBsection;
4773         back->dib.DIBsection = tmp;
4774
4775         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4776         else front->flags &= ~SFLAG_DIBSECTION;
4777         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4778         else back->flags &= ~SFLAG_DIBSECTION;
4779     }
4780
4781     /* Flip the surface data */
4782     {
4783         void* tmp;
4784
4785         tmp = front->dib.bitmap_data;
4786         front->dib.bitmap_data = back->dib.bitmap_data;
4787         back->dib.bitmap_data = tmp;
4788
4789         tmp = front->resource.allocatedMemory;
4790         front->resource.allocatedMemory = back->resource.allocatedMemory;
4791         back->resource.allocatedMemory = tmp;
4792
4793         tmp = front->resource.heapMemory;
4794         front->resource.heapMemory = back->resource.heapMemory;
4795         back->resource.heapMemory = tmp;
4796     }
4797
4798     /* Flip the PBO */
4799     {
4800         GLuint tmp_pbo = front->pbo;
4801         front->pbo = back->pbo;
4802         back->pbo = tmp_pbo;
4803     }
4804
4805     /* client_memory should not be different, but just in case */
4806     {
4807         BOOL tmp;
4808         tmp = front->dib.client_memory;
4809         front->dib.client_memory = back->dib.client_memory;
4810         back->dib.client_memory = tmp;
4811     }
4812
4813     /* Flip the opengl texture */
4814     {
4815         GLuint tmp;
4816
4817         tmp = back->texture_name;
4818         back->texture_name = front->texture_name;
4819         front->texture_name = tmp;
4820
4821         tmp = back->texture_name_srgb;
4822         back->texture_name_srgb = front->texture_name_srgb;
4823         front->texture_name_srgb = tmp;
4824
4825         tmp = back->rb_multisample;
4826         back->rb_multisample = front->rb_multisample;
4827         front->rb_multisample = tmp;
4828
4829         tmp = back->rb_resolved;
4830         back->rb_resolved = front->rb_resolved;
4831         front->rb_resolved = tmp;
4832
4833         resource_unload(&back->resource);
4834         resource_unload(&front->resource);
4835     }
4836
4837     {
4838         DWORD tmp_flags = back->flags;
4839         back->flags = front->flags;
4840         front->flags = tmp_flags;
4841     }
4842 }
4843
4844 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4845  * pixel copy calls. */
4846 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4847         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4848 {
4849     struct wined3d_device *device = dst_surface->resource.device;
4850     float xrel, yrel;
4851     UINT row;
4852     struct wined3d_context *context;
4853     BOOL upsidedown = FALSE;
4854     RECT dst_rect = *dst_rect_in;
4855
4856     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4857      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4858      */
4859     if(dst_rect.top > dst_rect.bottom) {
4860         UINT tmp = dst_rect.bottom;
4861         dst_rect.bottom = dst_rect.top;
4862         dst_rect.top = tmp;
4863         upsidedown = TRUE;
4864     }
4865
4866     context = context_acquire(device, src_surface);
4867     context_apply_blit_state(context, device);
4868     surface_internal_preload(dst_surface, SRGB_RGB);
4869     ENTER_GL();
4870
4871     /* Bind the target texture */
4872     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4873     if (surface_is_offscreen(src_surface))
4874     {
4875         TRACE("Reading from an offscreen target\n");
4876         upsidedown = !upsidedown;
4877         glReadBuffer(device->offscreenBuffer);
4878     }
4879     else
4880     {
4881         glReadBuffer(surface_get_gl_buffer(src_surface));
4882     }
4883     checkGLcall("glReadBuffer");
4884
4885     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4886     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4887
4888     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4889     {
4890         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4891
4892         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4893             ERR("Texture filtering not supported in direct blit\n");
4894         }
4895     }
4896     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4897             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4898     {
4899         ERR("Texture filtering not supported in direct blit\n");
4900     }
4901
4902     if (upsidedown
4903             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4904             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4905     {
4906         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4907
4908         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4909                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4910                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4911                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4912     }
4913     else
4914     {
4915         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4916         /* I have to process this row by row to swap the image,
4917          * otherwise it would be upside down, so stretching in y direction
4918          * doesn't cost extra time
4919          *
4920          * However, stretching in x direction can be avoided if not necessary
4921          */
4922         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4923             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4924             {
4925                 /* Well, that stuff works, but it's very slow.
4926                  * find a better way instead
4927                  */
4928                 UINT col;
4929
4930                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4931                 {
4932                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4933                             dst_rect.left + col /* x offset */, row /* y offset */,
4934                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4935                 }
4936             }
4937             else
4938             {
4939                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4940                         dst_rect.left /* x offset */, row /* y offset */,
4941                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4942             }
4943         }
4944     }
4945     checkGLcall("glCopyTexSubImage2D");
4946
4947     LEAVE_GL();
4948     context_release(context);
4949
4950     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4951      * path is never entered
4952      */
4953     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4954 }
4955
4956 /* Uses the hardware to stretch and flip the image */
4957 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4958         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4959 {
4960     struct wined3d_device *device = dst_surface->resource.device;
4961     struct wined3d_swapchain *src_swapchain = NULL;
4962     GLuint src, backup = 0;
4963     float left, right, top, bottom; /* Texture coordinates */
4964     UINT fbwidth = src_surface->resource.width;
4965     UINT fbheight = src_surface->resource.height;
4966     struct wined3d_context *context;
4967     GLenum drawBuffer = GL_BACK;
4968     GLenum texture_target;
4969     BOOL noBackBufferBackup;
4970     BOOL src_offscreen;
4971     BOOL upsidedown = FALSE;
4972     RECT dst_rect = *dst_rect_in;
4973
4974     TRACE("Using hwstretch blit\n");
4975     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4976     context = context_acquire(device, src_surface);
4977     context_apply_blit_state(context, device);
4978     surface_internal_preload(dst_surface, SRGB_RGB);
4979
4980     src_offscreen = surface_is_offscreen(src_surface);
4981     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4982     if (!noBackBufferBackup && !src_surface->texture_name)
4983     {
4984         /* Get it a description */
4985         surface_internal_preload(src_surface, SRGB_RGB);
4986     }
4987     ENTER_GL();
4988
4989     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4990      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4991      */
4992     if (context->aux_buffers >= 2)
4993     {
4994         /* Got more than one aux buffer? Use the 2nd aux buffer */
4995         drawBuffer = GL_AUX1;
4996     }
4997     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
4998     {
4999         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5000         drawBuffer = GL_AUX0;
5001     }
5002
5003     if(noBackBufferBackup) {
5004         glGenTextures(1, &backup);
5005         checkGLcall("glGenTextures");
5006         context_bind_texture(context, GL_TEXTURE_2D, backup);
5007         texture_target = GL_TEXTURE_2D;
5008     } else {
5009         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5010          * we are reading from the back buffer, the backup can be used as source texture
5011          */
5012         texture_target = src_surface->texture_target;
5013         context_bind_texture(context, texture_target, src_surface->texture_name);
5014         glEnable(texture_target);
5015         checkGLcall("glEnable(texture_target)");
5016
5017         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5018         src_surface->flags &= ~SFLAG_INTEXTURE;
5019     }
5020
5021     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5022      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5023      */
5024     if(dst_rect.top > dst_rect.bottom) {
5025         UINT tmp = dst_rect.bottom;
5026         dst_rect.bottom = dst_rect.top;
5027         dst_rect.top = tmp;
5028         upsidedown = TRUE;
5029     }
5030
5031     if (src_offscreen)
5032     {
5033         TRACE("Reading from an offscreen target\n");
5034         upsidedown = !upsidedown;
5035         glReadBuffer(device->offscreenBuffer);
5036     }
5037     else
5038     {
5039         glReadBuffer(surface_get_gl_buffer(src_surface));
5040     }
5041
5042     /* TODO: Only back up the part that will be overwritten */
5043     glCopyTexSubImage2D(texture_target, 0,
5044                         0, 0 /* read offsets */,
5045                         0, 0,
5046                         fbwidth,
5047                         fbheight);
5048
5049     checkGLcall("glCopyTexSubImage2D");
5050
5051     /* No issue with overriding these - the sampler is dirty due to blit usage */
5052     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5053             wined3d_gl_mag_filter(magLookup, Filter));
5054     checkGLcall("glTexParameteri");
5055     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5056             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5057     checkGLcall("glTexParameteri");
5058
5059     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5060         src_swapchain = src_surface->container.u.swapchain;
5061     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5062     {
5063         src = backup ? backup : src_surface->texture_name;
5064     }
5065     else
5066     {
5067         glReadBuffer(GL_FRONT);
5068         checkGLcall("glReadBuffer(GL_FRONT)");
5069
5070         glGenTextures(1, &src);
5071         checkGLcall("glGenTextures(1, &src)");
5072         context_bind_texture(context, GL_TEXTURE_2D, src);
5073
5074         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5075          * out for power of 2 sizes
5076          */
5077         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5078                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5079         checkGLcall("glTexImage2D");
5080         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5081                             0, 0 /* read offsets */,
5082                             0, 0,
5083                             fbwidth,
5084                             fbheight);
5085
5086         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5087         checkGLcall("glTexParameteri");
5088         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5089         checkGLcall("glTexParameteri");
5090
5091         glReadBuffer(GL_BACK);
5092         checkGLcall("glReadBuffer(GL_BACK)");
5093
5094         if(texture_target != GL_TEXTURE_2D) {
5095             glDisable(texture_target);
5096             glEnable(GL_TEXTURE_2D);
5097             texture_target = GL_TEXTURE_2D;
5098         }
5099     }
5100     checkGLcall("glEnd and previous");
5101
5102     left = src_rect->left;
5103     right = src_rect->right;
5104
5105     if (!upsidedown)
5106     {
5107         top = src_surface->resource.height - src_rect->top;
5108         bottom = src_surface->resource.height - src_rect->bottom;
5109     }
5110     else
5111     {
5112         top = src_surface->resource.height - src_rect->bottom;
5113         bottom = src_surface->resource.height - src_rect->top;
5114     }
5115
5116     if (src_surface->flags & SFLAG_NORMCOORD)
5117     {
5118         left /= src_surface->pow2Width;
5119         right /= src_surface->pow2Width;
5120         top /= src_surface->pow2Height;
5121         bottom /= src_surface->pow2Height;
5122     }
5123
5124     /* draw the source texture stretched and upside down. The correct surface is bound already */
5125     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5126     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5127
5128     context_set_draw_buffer(context, drawBuffer);
5129     glReadBuffer(drawBuffer);
5130
5131     glBegin(GL_QUADS);
5132         /* bottom left */
5133         glTexCoord2f(left, bottom);
5134         glVertex2i(0, 0);
5135
5136         /* top left */
5137         glTexCoord2f(left, top);
5138         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5139
5140         /* top right */
5141         glTexCoord2f(right, top);
5142         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5143
5144         /* bottom right */
5145         glTexCoord2f(right, bottom);
5146         glVertex2i(dst_rect.right - dst_rect.left, 0);
5147     glEnd();
5148     checkGLcall("glEnd and previous");
5149
5150     if (texture_target != dst_surface->texture_target)
5151     {
5152         glDisable(texture_target);
5153         glEnable(dst_surface->texture_target);
5154         texture_target = dst_surface->texture_target;
5155     }
5156
5157     /* Now read the stretched and upside down image into the destination texture */
5158     context_bind_texture(context, texture_target, dst_surface->texture_name);
5159     glCopyTexSubImage2D(texture_target,
5160                         0,
5161                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5162                         0, 0, /* We blitted the image to the origin */
5163                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5164     checkGLcall("glCopyTexSubImage2D");
5165
5166     if(drawBuffer == GL_BACK) {
5167         /* Write the back buffer backup back */
5168         if(backup) {
5169             if(texture_target != GL_TEXTURE_2D) {
5170                 glDisable(texture_target);
5171                 glEnable(GL_TEXTURE_2D);
5172                 texture_target = GL_TEXTURE_2D;
5173             }
5174             context_bind_texture(context, GL_TEXTURE_2D, backup);
5175         }
5176         else
5177         {
5178             if (texture_target != src_surface->texture_target)
5179             {
5180                 glDisable(texture_target);
5181                 glEnable(src_surface->texture_target);
5182                 texture_target = src_surface->texture_target;
5183             }
5184             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5185         }
5186
5187         glBegin(GL_QUADS);
5188             /* top left */
5189             glTexCoord2f(0.0f, 0.0f);
5190             glVertex2i(0, fbheight);
5191
5192             /* bottom left */
5193             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5194             glVertex2i(0, 0);
5195
5196             /* bottom right */
5197             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5198                     (float)fbheight / (float)src_surface->pow2Height);
5199             glVertex2i(fbwidth, 0);
5200
5201             /* top right */
5202             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5203             glVertex2i(fbwidth, fbheight);
5204         glEnd();
5205     }
5206     glDisable(texture_target);
5207     checkGLcall("glDisable(texture_target)");
5208
5209     /* Cleanup */
5210     if (src != src_surface->texture_name && src != backup)
5211     {
5212         glDeleteTextures(1, &src);
5213         checkGLcall("glDeleteTextures(1, &src)");
5214     }
5215     if(backup) {
5216         glDeleteTextures(1, &backup);
5217         checkGLcall("glDeleteTextures(1, &backup)");
5218     }
5219
5220     LEAVE_GL();
5221
5222     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5223
5224     context_release(context);
5225
5226     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5227      * path is never entered
5228      */
5229     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5230 }
5231
5232 /* Front buffer coordinates are always full screen coordinates, but our GL
5233  * drawable is limited to the window's client area. The sysmem and texture
5234  * copies do have the full screen size. Note that GL has a bottom-left
5235  * origin, while D3D has a top-left origin. */
5236 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5237 {
5238     UINT drawable_height;
5239
5240     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5241             && surface == surface->container.u.swapchain->front_buffer)
5242     {
5243         POINT offset = {0, 0};
5244         RECT windowsize;
5245
5246         ScreenToClient(window, &offset);
5247         OffsetRect(rect, offset.x, offset.y);
5248
5249         GetClientRect(window, &windowsize);
5250         drawable_height = windowsize.bottom - windowsize.top;
5251     }
5252     else
5253     {
5254         drawable_height = surface->resource.height;
5255     }
5256
5257     rect->top = drawable_height - rect->top;
5258     rect->bottom = drawable_height - rect->bottom;
5259 }
5260
5261 static void surface_blt_to_drawable(const struct wined3d_device *device,
5262         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5263         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5264         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5265 {
5266     struct wined3d_context *context;
5267     RECT src_rect, dst_rect;
5268
5269     src_rect = *src_rect_in;
5270     dst_rect = *dst_rect_in;
5271
5272     /* Make sure the surface is up-to-date. This should probably use
5273      * surface_load_location() and worry about the destination surface too,
5274      * unless we're overwriting it completely. */
5275     surface_internal_preload(src_surface, SRGB_RGB);
5276
5277     /* Activate the destination context, set it up for blitting */
5278     context = context_acquire(device, dst_surface);
5279     context_apply_blit_state(context, device);
5280
5281     if (!surface_is_offscreen(dst_surface))
5282         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5283
5284     device->blitter->set_shader(device->blit_priv, context, src_surface);
5285
5286     ENTER_GL();
5287
5288     if (color_key)
5289     {
5290         glEnable(GL_ALPHA_TEST);
5291         checkGLcall("glEnable(GL_ALPHA_TEST)");
5292
5293         /* When the primary render target uses P8, the alpha component
5294          * contains the palette index. Which means that the colorkey is one of
5295          * the palette entries. In other cases pixels that should be masked
5296          * away have alpha set to 0. */
5297         if (primary_render_target_is_p8(device))
5298             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5299         else
5300             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5301         checkGLcall("glAlphaFunc");
5302     }
5303     else
5304     {
5305         glDisable(GL_ALPHA_TEST);
5306         checkGLcall("glDisable(GL_ALPHA_TEST)");
5307     }
5308
5309     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5310
5311     if (color_key)
5312     {
5313         glDisable(GL_ALPHA_TEST);
5314         checkGLcall("glDisable(GL_ALPHA_TEST)");
5315     }
5316
5317     LEAVE_GL();
5318
5319     /* Leave the opengl state valid for blitting */
5320     device->blitter->unset_shader(context->gl_info);
5321
5322     if (wined3d_settings.strict_draw_ordering
5323             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5324             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5325         wglFlush(); /* Flush to ensure ordering across contexts. */
5326
5327     context_release(context);
5328 }
5329
5330 /* Do not call while under the GL lock. */
5331 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5332 {
5333     struct wined3d_device *device = s->resource.device;
5334     const struct blit_shader *blitter;
5335
5336     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5337             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5338     if (!blitter)
5339     {
5340         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5341         return WINED3DERR_INVALIDCALL;
5342     }
5343
5344     return blitter->color_fill(device, s, rect, color);
5345 }
5346
5347 /* Do not call while under the GL lock. */
5348 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5349         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5350         WINED3DTEXTUREFILTERTYPE Filter)
5351 {
5352     struct wined3d_device *device = dst_surface->resource.device;
5353     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5354     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5355
5356     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5357             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5358             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5359
5360     /* Get the swapchain. One of the surfaces has to be a primary surface */
5361     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5362     {
5363         WARN("Destination is in sysmem, rejecting gl blt\n");
5364         return WINED3DERR_INVALIDCALL;
5365     }
5366
5367     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5368         dstSwapchain = dst_surface->container.u.swapchain;
5369
5370     if (src_surface)
5371     {
5372         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5373         {
5374             WARN("Src is in sysmem, rejecting gl blt\n");
5375             return WINED3DERR_INVALIDCALL;
5376         }
5377
5378         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5379             srcSwapchain = src_surface->container.u.swapchain;
5380     }
5381
5382     /* Early sort out of cases where no render target is used */
5383     if (!dstSwapchain && !srcSwapchain
5384             && src_surface != device->fb.render_targets[0]
5385             && dst_surface != device->fb.render_targets[0])
5386     {
5387         TRACE("No surface is render target, not using hardware blit.\n");
5388         return WINED3DERR_INVALIDCALL;
5389     }
5390
5391     /* No destination color keying supported */
5392     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5393     {
5394         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5395         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5396         return WINED3DERR_INVALIDCALL;
5397     }
5398
5399     if (dstSwapchain && dstSwapchain == srcSwapchain)
5400     {
5401         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5402         return WINED3DERR_INVALIDCALL;
5403     }
5404
5405     if (dstSwapchain && srcSwapchain)
5406     {
5407         FIXME("Implement hardware blit between two different swapchains\n");
5408         return WINED3DERR_INVALIDCALL;
5409     }
5410
5411     if (dstSwapchain)
5412     {
5413         /* Handled with regular texture -> swapchain blit */
5414         if (src_surface == device->fb.render_targets[0])
5415             TRACE("Blit from active render target to a swapchain\n");
5416     }
5417     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5418     {
5419         FIXME("Implement blit from a swapchain to the active render target\n");
5420         return WINED3DERR_INVALIDCALL;
5421     }
5422
5423     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5424     {
5425         /* Blit from render target to texture */
5426         BOOL stretchx;
5427
5428         /* P8 read back is not implemented */
5429         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5430                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5431         {
5432             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5433             return WINED3DERR_INVALIDCALL;
5434         }
5435
5436         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5437         {
5438             TRACE("Color keying not supported by frame buffer to texture blit\n");
5439             return WINED3DERR_INVALIDCALL;
5440             /* Destination color key is checked above */
5441         }
5442
5443         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5444             stretchx = TRUE;
5445         else
5446             stretchx = FALSE;
5447
5448         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5449          * flip the image nor scale it.
5450          *
5451          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5452          * -> If the app wants a image width an unscaled width, copy it line per line
5453          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5454          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5455          *    back buffer. This is slower than reading line per line, thus not used for flipping
5456          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5457          *    pixel by pixel. */
5458         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5459                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5460         {
5461             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5462             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5463         } else {
5464             TRACE("Using hardware stretching to flip / stretch the texture\n");
5465             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5466         }
5467
5468         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5469         {
5470             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5471             dst_surface->resource.allocatedMemory = NULL;
5472             dst_surface->resource.heapMemory = NULL;
5473         }
5474         else
5475         {
5476             dst_surface->flags &= ~SFLAG_INSYSMEM;
5477         }
5478
5479         return WINED3D_OK;
5480     }
5481     else if (src_surface)
5482     {
5483         /* Blit from offscreen surface to render target */
5484         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5485         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5486
5487         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5488
5489         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5490                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5491                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5492         {
5493             FIXME("Unsupported blit operation falling back to software\n");
5494             return WINED3DERR_INVALIDCALL;
5495         }
5496
5497         /* Color keying: Check if we have to do a color keyed blt,
5498          * and if not check if a color key is activated.
5499          *
5500          * Just modify the color keying parameters in the surface and restore them afterwards
5501          * The surface keeps track of the color key last used to load the opengl surface.
5502          * PreLoad will catch the change to the flags and color key and reload if necessary.
5503          */
5504         if (flags & WINEDDBLT_KEYSRC)
5505         {
5506             /* Use color key from surface */
5507         }
5508         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5509         {
5510             /* Use color key from DDBltFx */
5511             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5512             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5513         }
5514         else
5515         {
5516             /* Do not use color key */
5517             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5518         }
5519
5520         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5521                 src_surface, src_rect, dst_surface, dst_rect);
5522
5523         /* Restore the color key parameters */
5524         src_surface->CKeyFlags = oldCKeyFlags;
5525         src_surface->SrcBltCKey = oldBltCKey;
5526
5527         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5528
5529         return WINED3D_OK;
5530     }
5531
5532     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5533     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5534     return WINED3DERR_INVALIDCALL;
5535 }
5536
5537 /* GL locking is done by the caller */
5538 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5539         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5540 {
5541     struct wined3d_device *device = surface->resource.device;
5542     const struct wined3d_gl_info *gl_info = context->gl_info;
5543     GLint compare_mode = GL_NONE;
5544     struct blt_info info;
5545     GLint old_binding = 0;
5546     RECT rect;
5547
5548     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5549
5550     glDisable(GL_CULL_FACE);
5551     glDisable(GL_BLEND);
5552     glDisable(GL_ALPHA_TEST);
5553     glDisable(GL_SCISSOR_TEST);
5554     glDisable(GL_STENCIL_TEST);
5555     glEnable(GL_DEPTH_TEST);
5556     glDepthFunc(GL_ALWAYS);
5557     glDepthMask(GL_TRUE);
5558     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5559     glViewport(x, y, w, h);
5560
5561     SetRect(&rect, 0, h, w, 0);
5562     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5563     context_active_texture(context, context->gl_info, 0);
5564     glGetIntegerv(info.binding, &old_binding);
5565     glBindTexture(info.bind_target, texture);
5566     if (gl_info->supported[ARB_SHADOW])
5567     {
5568         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5569         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5570     }
5571
5572     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5573             gl_info, info.tex_type, &surface->ds_current_size);
5574
5575     glBegin(GL_TRIANGLE_STRIP);
5576     glTexCoord3fv(info.coords[0]);
5577     glVertex2f(-1.0f, -1.0f);
5578     glTexCoord3fv(info.coords[1]);
5579     glVertex2f(1.0f, -1.0f);
5580     glTexCoord3fv(info.coords[2]);
5581     glVertex2f(-1.0f, 1.0f);
5582     glTexCoord3fv(info.coords[3]);
5583     glVertex2f(1.0f, 1.0f);
5584     glEnd();
5585
5586     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5587     glBindTexture(info.bind_target, old_binding);
5588
5589     glPopAttrib();
5590
5591     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5592 }
5593
5594 void surface_modify_ds_location(struct wined3d_surface *surface,
5595         DWORD location, UINT w, UINT h)
5596 {
5597     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5598
5599     if (location & ~SFLAG_DS_LOCATIONS)
5600         FIXME("Invalid location (%#x) specified.\n", location);
5601
5602     surface->ds_current_size.cx = w;
5603     surface->ds_current_size.cy = h;
5604     surface->flags &= ~SFLAG_DS_LOCATIONS;
5605     surface->flags |= location;
5606 }
5607
5608 /* Context activation is done by the caller. */
5609 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5610 {
5611     struct wined3d_device *device = surface->resource.device;
5612     GLsizei w, h;
5613
5614     TRACE("surface %p, new location %#x.\n", surface, location);
5615
5616     /* TODO: Make this work for modes other than FBO */
5617     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5618
5619     if (!(surface->flags & location))
5620     {
5621         w = surface->ds_current_size.cx;
5622         h = surface->ds_current_size.cy;
5623         surface->ds_current_size.cx = 0;
5624         surface->ds_current_size.cy = 0;
5625     }
5626     else
5627     {
5628         w = surface->resource.width;
5629         h = surface->resource.height;
5630     }
5631
5632     if (surface->ds_current_size.cx == surface->resource.width
5633             && surface->ds_current_size.cy == surface->resource.height)
5634     {
5635         TRACE("Location (%#x) is already up to date.\n", location);
5636         return;
5637     }
5638
5639     if (surface->current_renderbuffer)
5640     {
5641         FIXME("Not supported with fixed up depth stencil.\n");
5642         return;
5643     }
5644
5645     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5646     {
5647         /* This mostly happens when a depth / stencil is used without being
5648          * cleared first. In principle we could upload from sysmem, or
5649          * explicitly clear before first usage. For the moment there don't
5650          * appear to be a lot of applications depending on this, so a FIXME
5651          * should do. */
5652         FIXME("No up to date depth stencil location.\n");
5653         surface->flags |= location;
5654         surface->ds_current_size.cx = surface->resource.width;
5655         surface->ds_current_size.cy = surface->resource.height;
5656         return;
5657     }
5658
5659     if (location == SFLAG_DS_OFFSCREEN)
5660     {
5661         GLint old_binding = 0;
5662         GLenum bind_target;
5663
5664         /* The render target is allowed to be smaller than the depth/stencil
5665          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5666          * than the offscreen surface. Don't overwrite the offscreen surface
5667          * with undefined data. */
5668         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5669         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5670
5671         TRACE("Copying onscreen depth buffer to depth texture.\n");
5672
5673         ENTER_GL();
5674
5675         if (!device->depth_blt_texture)
5676         {
5677             glGenTextures(1, &device->depth_blt_texture);
5678         }
5679
5680         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5681          * directly on the FBO texture. That's because we need to flip. */
5682         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5683                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5684         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5685         {
5686             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5687             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5688         }
5689         else
5690         {
5691             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5692             bind_target = GL_TEXTURE_2D;
5693         }
5694         glBindTexture(bind_target, device->depth_blt_texture);
5695         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5696          * internal format, because the internal format might include stencil
5697          * data. In principle we should copy stencil data as well, but unless
5698          * the driver supports stencil export it's hard to do, and doesn't
5699          * seem to be needed in practice. If the hardware doesn't support
5700          * writing stencil data, the glCopyTexImage2D() call might trigger
5701          * software fallbacks. */
5702         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5703         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5704         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5705         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5706         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5707         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5708         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5709         glBindTexture(bind_target, old_binding);
5710
5711         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5712                 NULL, surface, SFLAG_INTEXTURE);
5713         context_set_draw_buffer(context, GL_NONE);
5714         glReadBuffer(GL_NONE);
5715
5716         /* Do the actual blit */
5717         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5718         checkGLcall("depth_blt");
5719
5720         context_invalidate_state(context, STATE_FRAMEBUFFER);
5721
5722         LEAVE_GL();
5723
5724         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5725     }
5726     else if (location == SFLAG_DS_ONSCREEN)
5727     {
5728         TRACE("Copying depth texture to onscreen depth buffer.\n");
5729
5730         ENTER_GL();
5731
5732         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5733                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5734         surface_depth_blt(surface, context, surface->texture_name,
5735                 0, surface->pow2Height - h, w, h, surface->texture_target);
5736         checkGLcall("depth_blt");
5737
5738         context_invalidate_state(context, STATE_FRAMEBUFFER);
5739
5740         LEAVE_GL();
5741
5742         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5743     }
5744     else
5745     {
5746         ERR("Invalid location (%#x) specified.\n", location);
5747     }
5748
5749     surface->flags |= location;
5750     surface->ds_current_size.cx = surface->resource.width;
5751     surface->ds_current_size.cy = surface->resource.height;
5752 }
5753
5754 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5755 {
5756     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5757     struct wined3d_surface *overlay;
5758
5759     TRACE("surface %p, location %s, persistent %#x.\n",
5760             surface, debug_surflocation(location), persistent);
5761
5762     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5763             && (location & SFLAG_INDRAWABLE))
5764         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5765
5766     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5767             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5768         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5769
5770     if (persistent)
5771     {
5772         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5773                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5774         {
5775             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5776             {
5777                 TRACE("Passing to container.\n");
5778                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5779             }
5780         }
5781         surface->flags &= ~SFLAG_LOCATIONS;
5782         surface->flags |= location;
5783
5784         /* Redraw emulated overlays, if any */
5785         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5786         {
5787             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5788             {
5789                 surface_draw_overlay(overlay);
5790             }
5791         }
5792     }
5793     else
5794     {
5795         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5796         {
5797             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5798             {
5799                 TRACE("Passing to container\n");
5800                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5801             }
5802         }
5803         surface->flags &= ~location;
5804     }
5805
5806     if (!(surface->flags & SFLAG_LOCATIONS))
5807     {
5808         ERR("Surface %p does not have any up to date location.\n", surface);
5809     }
5810 }
5811
5812 static DWORD resource_access_from_location(DWORD location)
5813 {
5814     switch (location)
5815     {
5816         case SFLAG_INSYSMEM:
5817             return WINED3D_RESOURCE_ACCESS_CPU;
5818
5819         case SFLAG_INDRAWABLE:
5820         case SFLAG_INSRGBTEX:
5821         case SFLAG_INTEXTURE:
5822         case SFLAG_INRB_MULTISAMPLE:
5823         case SFLAG_INRB_RESOLVED:
5824             return WINED3D_RESOURCE_ACCESS_GPU;
5825
5826         default:
5827             FIXME("Unhandled location %#x.\n", location);
5828             return 0;
5829     }
5830 }
5831
5832 static void surface_load_sysmem(struct wined3d_surface *surface,
5833         const struct wined3d_gl_info *gl_info, const RECT *rect)
5834 {
5835     surface_prepare_system_memory(surface);
5836
5837     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5838         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5839
5840     /* Download the surface to system memory. */
5841     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5842     {
5843         struct wined3d_device *device = surface->resource.device;
5844         struct wined3d_context *context;
5845
5846         /* TODO: Use already acquired context when possible. */
5847         context = context_acquire(device, NULL);
5848
5849         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5850         surface_download_data(surface, gl_info);
5851
5852         context_release(context);
5853
5854         return;
5855     }
5856
5857     if (surface->flags & SFLAG_INDRAWABLE)
5858     {
5859         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5860                 wined3d_surface_get_pitch(surface));
5861         return;
5862     }
5863
5864     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5865             surface, surface->flags & SFLAG_LOCATIONS);
5866 }
5867
5868 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5869         const struct wined3d_gl_info *gl_info, const RECT *rect)
5870 {
5871     struct wined3d_device *device = surface->resource.device;
5872     struct wined3d_format format;
5873     CONVERT_TYPES convert;
5874     UINT byte_count;
5875     BYTE *mem;
5876
5877     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5878     {
5879         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5880         return WINED3DERR_INVALIDCALL;
5881     }
5882
5883     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5884         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5885
5886     if (surface->flags & SFLAG_INTEXTURE)
5887     {
5888         RECT r;
5889
5890         surface_get_rect(surface, rect, &r);
5891         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5892
5893         return WINED3D_OK;
5894     }
5895
5896     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5897     {
5898         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5899          * path through sysmem. */
5900         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5901     }
5902
5903     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5904
5905     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5906      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5907      * called. */
5908     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5909     {
5910         struct wined3d_context *context;
5911
5912         TRACE("Removing the pbo attached to surface %p.\n", surface);
5913
5914         /* TODO: Use already acquired context when possible. */
5915         context = context_acquire(device, NULL);
5916
5917         surface_remove_pbo(surface, gl_info);
5918
5919         context_release(context);
5920     }
5921
5922     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5923     {
5924         UINT height = surface->resource.height;
5925         UINT width = surface->resource.width;
5926         UINT src_pitch, dst_pitch;
5927
5928         byte_count = format.conv_byte_count;
5929         src_pitch = wined3d_surface_get_pitch(surface);
5930
5931         /* Stick to the alignment for the converted surface too, makes it
5932          * easier to load the surface. */
5933         dst_pitch = width * byte_count;
5934         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5935
5936         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5937         {
5938             ERR("Out of memory (%u).\n", dst_pitch * height);
5939             return E_OUTOFMEMORY;
5940         }
5941
5942         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5943                 src_pitch, width, height, dst_pitch, convert, surface);
5944
5945         surface->flags |= SFLAG_CONVERTED;
5946     }
5947     else
5948     {
5949         surface->flags &= ~SFLAG_CONVERTED;
5950         mem = surface->resource.allocatedMemory;
5951         byte_count = format.byte_count;
5952     }
5953
5954     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5955
5956     /* Don't delete PBO memory. */
5957     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5958         HeapFree(GetProcessHeap(), 0, mem);
5959
5960     return WINED3D_OK;
5961 }
5962
5963 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5964         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5965 {
5966     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5967     struct wined3d_device *device = surface->resource.device;
5968     struct wined3d_context *context;
5969     UINT width, src_pitch, dst_pitch;
5970     struct wined3d_bo_address data;
5971     struct wined3d_format format;
5972     POINT dst_point = {0, 0};
5973     CONVERT_TYPES convert;
5974     BYTE *mem;
5975
5976     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5977             && surface_is_offscreen(surface)
5978             && (surface->flags & SFLAG_INDRAWABLE))
5979     {
5980         surface_load_fb_texture(surface, srgb);
5981
5982         return WINED3D_OK;
5983     }
5984
5985     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5986             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5987             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5988                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5989                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5990     {
5991         if (srgb)
5992             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
5993                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
5994         else
5995             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
5996                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
5997
5998         return WINED3D_OK;
5999     }
6000
6001     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6002             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6003             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6004                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6005                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6006     {
6007         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6008         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6009         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6010
6011         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
6012                 &rect, surface, dst_location, &rect);
6013
6014         return WINED3D_OK;
6015     }
6016
6017     /* Upload from system memory */
6018
6019     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6020             TRUE /* We will use textures */, &format, &convert);
6021
6022     if (srgb)
6023     {
6024         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6025         {
6026             /* Performance warning... */
6027             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6028             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6029         }
6030     }
6031     else
6032     {
6033         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6034         {
6035             /* Performance warning... */
6036             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6037             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6038         }
6039     }
6040
6041     if (!(surface->flags & SFLAG_INSYSMEM))
6042     {
6043         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6044         /* Lets hope we get it from somewhere... */
6045         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6046     }
6047
6048     /* TODO: Use already acquired context when possible. */
6049     context = context_acquire(device, NULL);
6050
6051     surface_prepare_texture(surface, context, srgb);
6052     surface_bind_and_dirtify(surface, context, srgb);
6053
6054     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6055     {
6056         surface->flags |= SFLAG_GLCKEY;
6057         surface->glCKey = surface->SrcBltCKey;
6058     }
6059     else surface->flags &= ~SFLAG_GLCKEY;
6060
6061     width = surface->resource.width;
6062     src_pitch = wined3d_surface_get_pitch(surface);
6063
6064     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6065      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6066      * called. */
6067     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6068     {
6069         TRACE("Removing the pbo attached to surface %p.\n", surface);
6070         surface_remove_pbo(surface, gl_info);
6071     }
6072
6073     if (format.convert)
6074     {
6075         /* This code is entered for texture formats which need a fixup. */
6076         UINT height = surface->resource.height;
6077
6078         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6079         dst_pitch = width * format.conv_byte_count;
6080         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6081
6082         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6083         {
6084             ERR("Out of memory (%u).\n", dst_pitch * height);
6085             context_release(context);
6086             return E_OUTOFMEMORY;
6087         }
6088         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6089     }
6090     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6091     {
6092         /* This code is only entered for color keying fixups */
6093         UINT height = surface->resource.height;
6094
6095         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6096         dst_pitch = width * format.conv_byte_count;
6097         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6098
6099         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6100         {
6101             ERR("Out of memory (%u).\n", dst_pitch * height);
6102             context_release(context);
6103             return E_OUTOFMEMORY;
6104         }
6105         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6106                 width, height, dst_pitch, convert, surface);
6107     }
6108     else
6109     {
6110         mem = surface->resource.allocatedMemory;
6111     }
6112
6113     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6114     data.addr = mem;
6115     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6116
6117     context_release(context);
6118
6119     /* Don't delete PBO memory. */
6120     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6121         HeapFree(GetProcessHeap(), 0, mem);
6122
6123     return WINED3D_OK;
6124 }
6125
6126 static void surface_multisample_resolve(struct wined3d_surface *surface)
6127 {
6128     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6129
6130     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6131         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6132
6133     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6134             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6135 }
6136
6137 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6138 {
6139     struct wined3d_device *device = surface->resource.device;
6140     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6141     HRESULT hr;
6142
6143     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6144
6145     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6146     {
6147         if (location == SFLAG_INTEXTURE)
6148         {
6149             struct wined3d_context *context = context_acquire(device, NULL);
6150             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6151             context_release(context);
6152             return WINED3D_OK;
6153         }
6154         else
6155         {
6156             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6157             return WINED3DERR_INVALIDCALL;
6158         }
6159     }
6160
6161     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6162         location = SFLAG_INTEXTURE;
6163
6164     if (surface->flags & location)
6165     {
6166         TRACE("Location already up to date.\n");
6167         return WINED3D_OK;
6168     }
6169
6170     if (WARN_ON(d3d_surface))
6171     {
6172         DWORD required_access = resource_access_from_location(location);
6173         if ((surface->resource.access_flags & required_access) != required_access)
6174             WARN("Operation requires %#x access, but surface only has %#x.\n",
6175                     required_access, surface->resource.access_flags);
6176     }
6177
6178     if (!(surface->flags & SFLAG_LOCATIONS))
6179     {
6180         ERR("Surface %p does not have any up to date location.\n", surface);
6181         surface->flags |= SFLAG_LOST;
6182         return WINED3DERR_DEVICELOST;
6183     }
6184
6185     switch (location)
6186     {
6187         case SFLAG_INSYSMEM:
6188             surface_load_sysmem(surface, gl_info, rect);
6189             break;
6190
6191         case SFLAG_INDRAWABLE:
6192             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6193                 return hr;
6194             break;
6195
6196         case SFLAG_INRB_RESOLVED:
6197             surface_multisample_resolve(surface);
6198             break;
6199
6200         case SFLAG_INTEXTURE:
6201         case SFLAG_INSRGBTEX:
6202             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6203                 return hr;
6204             break;
6205
6206         default:
6207             ERR("Don't know how to handle location %#x.\n", location);
6208             break;
6209     }
6210
6211     if (!rect)
6212     {
6213         surface->flags |= location;
6214
6215         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6216             surface_evict_sysmem(surface);
6217     }
6218
6219     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6220             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6221     {
6222         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6223     }
6224
6225     return WINED3D_OK;
6226 }
6227
6228 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6229 {
6230     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6231
6232     /* Not on a swapchain - must be offscreen */
6233     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6234
6235     /* The front buffer is always onscreen */
6236     if (surface == swapchain->front_buffer) return FALSE;
6237
6238     /* If the swapchain is rendered to an FBO, the backbuffer is
6239      * offscreen, otherwise onscreen */
6240     return swapchain->render_to_fbo;
6241 }
6242
6243 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6244 /* Context activation is done by the caller. */
6245 static void ffp_blit_free(struct wined3d_device *device) { }
6246
6247 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6248 /* Context activation is done by the caller. */
6249 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6250 {
6251     BYTE table[256][4];
6252     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6253
6254     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6255
6256     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6257     ENTER_GL();
6258     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6259     LEAVE_GL();
6260 }
6261
6262 /* Context activation is done by the caller. */
6263 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6264 {
6265     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6266
6267     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6268      * else the surface is converted in software at upload time in LoadLocation.
6269      */
6270     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6271         ffp_blit_p8_upload_palette(surface, context->gl_info);
6272
6273     ENTER_GL();
6274     glEnable(surface->texture_target);
6275     checkGLcall("glEnable(surface->texture_target)");
6276     LEAVE_GL();
6277     return WINED3D_OK;
6278 }
6279
6280 /* Context activation is done by the caller. */
6281 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6282 {
6283     ENTER_GL();
6284     glDisable(GL_TEXTURE_2D);
6285     checkGLcall("glDisable(GL_TEXTURE_2D)");
6286     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6287     {
6288         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6289         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6290     }
6291     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6292     {
6293         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6294         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6295     }
6296     LEAVE_GL();
6297 }
6298
6299 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6300         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6301         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6302 {
6303     enum complex_fixup src_fixup;
6304
6305     switch (blit_op)
6306     {
6307         case WINED3D_BLIT_OP_COLOR_BLIT:
6308             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6309                 return FALSE;
6310
6311             src_fixup = get_complex_fixup(src_format->color_fixup);
6312             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6313             {
6314                 TRACE("Checking support for fixup:\n");
6315                 dump_color_fixup_desc(src_format->color_fixup);
6316             }
6317
6318             if (!is_identity_fixup(dst_format->color_fixup))
6319             {
6320                 TRACE("Destination fixups are not supported\n");
6321                 return FALSE;
6322             }
6323
6324             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6325             {
6326                 TRACE("P8 fixup supported\n");
6327                 return TRUE;
6328             }
6329
6330             /* We only support identity conversions. */
6331             if (is_identity_fixup(src_format->color_fixup))
6332             {
6333                 TRACE("[OK]\n");
6334                 return TRUE;
6335             }
6336
6337             TRACE("[FAILED]\n");
6338             return FALSE;
6339
6340         case WINED3D_BLIT_OP_COLOR_FILL:
6341             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6342                 return FALSE;
6343
6344             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6345             {
6346                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6347                     return FALSE;
6348             }
6349             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6350             {
6351                 TRACE("Color fill not supported\n");
6352                 return FALSE;
6353             }
6354
6355             /* FIXME: We should reject color fills on formats with fixups,
6356              * but this would break P8 color fills for example. */
6357
6358             return TRUE;
6359
6360         case WINED3D_BLIT_OP_DEPTH_FILL:
6361             return TRUE;
6362
6363         default:
6364             TRACE("Unsupported blit_op=%d\n", blit_op);
6365             return FALSE;
6366     }
6367 }
6368
6369 /* Do not call while under the GL lock. */
6370 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6371         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6372 {
6373     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6374     struct wined3d_fb_state fb = {&dst_surface, NULL};
6375
6376     return device_clear_render_targets(device, 1, &fb,
6377             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6378 }
6379
6380 /* Do not call while under the GL lock. */
6381 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6382         struct wined3d_surface *surface, const RECT *rect, float depth)
6383 {
6384     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6385     struct wined3d_fb_state fb = {NULL, surface};
6386
6387     return device_clear_render_targets(device, 0, &fb,
6388             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6389 }
6390
6391 const struct blit_shader ffp_blit =  {
6392     ffp_blit_alloc,
6393     ffp_blit_free,
6394     ffp_blit_set,
6395     ffp_blit_unset,
6396     ffp_blit_supported,
6397     ffp_blit_color_fill,
6398     ffp_blit_depth_fill,
6399 };
6400
6401 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6402 {
6403     return WINED3D_OK;
6404 }
6405
6406 /* Context activation is done by the caller. */
6407 static void cpu_blit_free(struct wined3d_device *device)
6408 {
6409 }
6410
6411 /* Context activation is done by the caller. */
6412 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
6413 {
6414     return WINED3D_OK;
6415 }
6416
6417 /* Context activation is done by the caller. */
6418 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6419 {
6420 }
6421
6422 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6423         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6424         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6425 {
6426     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6427     {
6428         return TRUE;
6429     }
6430
6431     return FALSE;
6432 }
6433
6434 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6435         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6436         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6437 {
6438     UINT row_block_count;
6439     const BYTE *src_row;
6440     BYTE *dst_row;
6441     UINT x, y;
6442
6443     src_row = src_data;
6444     dst_row = dst_data;
6445
6446     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6447
6448     if (!flags)
6449     {
6450         for (y = 0; y < update_h; y += format->block_height)
6451         {
6452             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6453             src_row += src_pitch;
6454             dst_row += dst_pitch;
6455         }
6456
6457         return WINED3D_OK;
6458     }
6459
6460     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6461     {
6462         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6463
6464         switch (format->id)
6465         {
6466             case WINED3DFMT_DXT1:
6467                 for (y = 0; y < update_h; y += format->block_height)
6468                 {
6469                     struct block
6470                     {
6471                         WORD color[2];
6472                         BYTE control_row[4];
6473                     };
6474
6475                     const struct block *s = (const struct block *)src_row;
6476                     struct block *d = (struct block *)dst_row;
6477
6478                     for (x = 0; x < row_block_count; ++x)
6479                     {
6480                         d[x].color[0] = s[x].color[0];
6481                         d[x].color[1] = s[x].color[1];
6482                         d[x].control_row[0] = s[x].control_row[3];
6483                         d[x].control_row[1] = s[x].control_row[2];
6484                         d[x].control_row[2] = s[x].control_row[1];
6485                         d[x].control_row[3] = s[x].control_row[0];
6486                     }
6487                     src_row -= src_pitch;
6488                     dst_row += dst_pitch;
6489                 }
6490                 return WINED3D_OK;
6491
6492             case WINED3DFMT_DXT3:
6493                 for (y = 0; y < update_h; y += format->block_height)
6494                 {
6495                     struct block
6496                     {
6497                         WORD alpha_row[4];
6498                         WORD color[2];
6499                         BYTE control_row[4];
6500                     };
6501
6502                     const struct block *s = (const struct block *)src_row;
6503                     struct block *d = (struct block *)dst_row;
6504
6505                     for (x = 0; x < row_block_count; ++x)
6506                     {
6507                         d[x].alpha_row[0] = s[x].alpha_row[3];
6508                         d[x].alpha_row[1] = s[x].alpha_row[2];
6509                         d[x].alpha_row[2] = s[x].alpha_row[1];
6510                         d[x].alpha_row[3] = s[x].alpha_row[0];
6511                         d[x].color[0] = s[x].color[0];
6512                         d[x].color[1] = s[x].color[1];
6513                         d[x].control_row[0] = s[x].control_row[3];
6514                         d[x].control_row[1] = s[x].control_row[2];
6515                         d[x].control_row[2] = s[x].control_row[1];
6516                         d[x].control_row[3] = s[x].control_row[0];
6517                     }
6518                     src_row -= src_pitch;
6519                     dst_row += dst_pitch;
6520                 }
6521                 return WINED3D_OK;
6522
6523             default:
6524                 FIXME("Compressed flip not implemented for format %s.\n",
6525                         debug_d3dformat(format->id));
6526                 return E_NOTIMPL;
6527         }
6528     }
6529
6530     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6531             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6532
6533     return E_NOTIMPL;
6534 }
6535
6536 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6537         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6538         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6539 {
6540     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6541     const struct wined3d_format *src_format, *dst_format;
6542     struct wined3d_surface *orig_src = src_surface;
6543     WINED3DLOCKED_RECT dlock, slock;
6544     HRESULT hr = WINED3D_OK;
6545     const BYTE *sbuf;
6546     RECT xdst,xsrc;
6547     BYTE *dbuf;
6548     int x, y;
6549
6550     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6551             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6552             flags, fx, debug_d3dtexturefiltertype(filter));
6553
6554     xsrc = *src_rect;
6555
6556     if (!src_surface)
6557     {
6558         RECT full_rect;
6559
6560         full_rect.left = 0;
6561         full_rect.top = 0;
6562         full_rect.right = dst_surface->resource.width;
6563         full_rect.bottom = dst_surface->resource.height;
6564         IntersectRect(&xdst, &full_rect, dst_rect);
6565     }
6566     else
6567     {
6568         BOOL clip_horiz, clip_vert;
6569
6570         xdst = *dst_rect;
6571         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6572         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6573
6574         if (clip_vert || clip_horiz)
6575         {
6576             /* Now check if this is a special case or not... */
6577             if ((flags & WINEDDBLT_DDFX)
6578                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6579                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6580             {
6581                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6582                 return WINED3D_OK;
6583             }
6584
6585             if (clip_horiz)
6586             {
6587                 if (xdst.left < 0)
6588                 {
6589                     xsrc.left -= xdst.left;
6590                     xdst.left = 0;
6591                 }
6592                 if (xdst.right > dst_surface->resource.width)
6593                 {
6594                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6595                     xdst.right = (int)dst_surface->resource.width;
6596                 }
6597             }
6598
6599             if (clip_vert)
6600             {
6601                 if (xdst.top < 0)
6602                 {
6603                     xsrc.top -= xdst.top;
6604                     xdst.top = 0;
6605                 }
6606                 if (xdst.bottom > dst_surface->resource.height)
6607                 {
6608                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6609                     xdst.bottom = (int)dst_surface->resource.height;
6610                 }
6611             }
6612
6613             /* And check if after clipping something is still to be done... */
6614             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6615                     || (xdst.left >= (int)dst_surface->resource.width)
6616                     || (xdst.top >= (int)dst_surface->resource.height)
6617                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6618                     || (xsrc.left >= (int)src_surface->resource.width)
6619                     || (xsrc.top >= (int)src_surface->resource.height))
6620             {
6621                 TRACE("Nothing to be done after clipping.\n");
6622                 return WINED3D_OK;
6623             }
6624         }
6625     }
6626
6627     if (src_surface == dst_surface)
6628     {
6629         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6630         slock = dlock;
6631         src_format = dst_surface->resource.format;
6632         dst_format = src_format;
6633     }
6634     else
6635     {
6636         dst_format = dst_surface->resource.format;
6637         if (src_surface)
6638         {
6639             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6640             {
6641                 src_surface = surface_convert_format(src_surface, dst_format->id);
6642                 if (!src_surface)
6643                 {
6644                     /* The conv function writes a FIXME */
6645                     WARN("Cannot convert source surface format to dest format.\n");
6646                     goto release;
6647                 }
6648             }
6649             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6650             src_format = src_surface->resource.format;
6651         }
6652         else
6653         {
6654             src_format = dst_format;
6655         }
6656         if (dst_rect)
6657             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6658         else
6659             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6660     }
6661
6662     bpp = dst_surface->resource.format->byte_count;
6663     srcheight = xsrc.bottom - xsrc.top;
6664     srcwidth = xsrc.right - xsrc.left;
6665     dstheight = xdst.bottom - xdst.top;
6666     dstwidth = xdst.right - xdst.left;
6667     width = (xdst.right - xdst.left) * bpp;
6668
6669     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6670     {
6671         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6672
6673         if (src_surface == dst_surface)
6674         {
6675             FIXME("Only plain blits supported on compressed surfaces.\n");
6676             hr = E_NOTIMPL;
6677             goto release;
6678         }
6679
6680         if (srcheight != dstheight || srcwidth != dstwidth)
6681         {
6682             WARN("Stretching not supported on compressed surfaces.\n");
6683             hr = WINED3DERR_INVALIDCALL;
6684             goto release;
6685         }
6686
6687         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6688         {
6689             WARN("Rectangle not block-aligned.\n");
6690             hr = WINED3DERR_INVALIDCALL;
6691             goto release;
6692         }
6693
6694         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6695                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6696                 src_format, flags, fx);
6697         goto release;
6698     }
6699
6700     if (dst_rect && src_surface != dst_surface)
6701         dbuf = dlock.pBits;
6702     else
6703         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6704
6705     /* First, all the 'source-less' blits */
6706     if (flags & WINEDDBLT_COLORFILL)
6707     {
6708         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6709         flags &= ~WINEDDBLT_COLORFILL;
6710     }
6711
6712     if (flags & WINEDDBLT_DEPTHFILL)
6713     {
6714         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6715     }
6716     if (flags & WINEDDBLT_ROP)
6717     {
6718         /* Catch some degenerate cases here. */
6719         switch (fx->dwROP)
6720         {
6721             case BLACKNESS:
6722                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6723                 break;
6724             case 0xAA0029: /* No-op */
6725                 break;
6726             case WHITENESS:
6727                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6728                 break;
6729             case SRCCOPY: /* Well, we do that below? */
6730                 break;
6731             default:
6732                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6733                 goto error;
6734         }
6735         flags &= ~WINEDDBLT_ROP;
6736     }
6737     if (flags & WINEDDBLT_DDROPS)
6738     {
6739         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6740     }
6741     /* Now the 'with source' blits. */
6742     if (src_surface)
6743     {
6744         const BYTE *sbase;
6745         int sx, xinc, sy, yinc;
6746
6747         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6748             goto release;
6749
6750         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6751                 && (srcwidth != dstwidth || srcheight != dstheight))
6752         {
6753             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6754             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6755         }
6756
6757         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6758         xinc = (srcwidth << 16) / dstwidth;
6759         yinc = (srcheight << 16) / dstheight;
6760
6761         if (!flags)
6762         {
6763             /* No effects, we can cheat here. */
6764             if (dstwidth == srcwidth)
6765             {
6766                 if (dstheight == srcheight)
6767                 {
6768                     /* No stretching in either direction. This needs to be as
6769                      * fast as possible. */
6770                     sbuf = sbase;
6771
6772                     /* Check for overlapping surfaces. */
6773                     if (src_surface != dst_surface || xdst.top < xsrc.top
6774                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6775                     {
6776                         /* No overlap, or dst above src, so copy from top downwards. */
6777                         for (y = 0; y < dstheight; ++y)
6778                         {
6779                             memcpy(dbuf, sbuf, width);
6780                             sbuf += slock.Pitch;
6781                             dbuf += dlock.Pitch;
6782                         }
6783                     }
6784                     else if (xdst.top > xsrc.top)
6785                     {
6786                         /* Copy from bottom upwards. */
6787                         sbuf += (slock.Pitch*dstheight);
6788                         dbuf += (dlock.Pitch*dstheight);
6789                         for (y = 0; y < dstheight; ++y)
6790                         {
6791                             sbuf -= slock.Pitch;
6792                             dbuf -= dlock.Pitch;
6793                             memcpy(dbuf, sbuf, width);
6794                         }
6795                     }
6796                     else
6797                     {
6798                         /* Src and dst overlapping on the same line, use memmove. */
6799                         for (y = 0; y < dstheight; ++y)
6800                         {
6801                             memmove(dbuf, sbuf, width);
6802                             sbuf += slock.Pitch;
6803                             dbuf += dlock.Pitch;
6804                         }
6805                     }
6806                 }
6807                 else
6808                 {
6809                     /* Stretching in y direction only. */
6810                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6811                     {
6812                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6813                         memcpy(dbuf, sbuf, width);
6814                         dbuf += dlock.Pitch;
6815                     }
6816                 }
6817             }
6818             else
6819             {
6820                 /* Stretching in X direction. */
6821                 int last_sy = -1;
6822                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6823                 {
6824                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6825
6826                     if ((sy >> 16) == (last_sy >> 16))
6827                     {
6828                         /* This source row is the same as last source row -
6829                          * Copy the already stretched row. */
6830                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6831                     }
6832                     else
6833                     {
6834 #define STRETCH_ROW(type) \
6835 do { \
6836     const type *s = (const type *)sbuf; \
6837     type *d = (type *)dbuf; \
6838     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6839         d[x] = s[sx >> 16]; \
6840 } while(0)
6841
6842                         switch(bpp)
6843                         {
6844                             case 1:
6845                                 STRETCH_ROW(BYTE);
6846                                 break;
6847                             case 2:
6848                                 STRETCH_ROW(WORD);
6849                                 break;
6850                             case 4:
6851                                 STRETCH_ROW(DWORD);
6852                                 break;
6853                             case 3:
6854                             {
6855                                 const BYTE *s;
6856                                 BYTE *d = dbuf;
6857                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6858                                 {
6859                                     DWORD pixel;
6860
6861                                     s = sbuf + 3 * (sx >> 16);
6862                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6863                                     d[0] = (pixel      ) & 0xff;
6864                                     d[1] = (pixel >>  8) & 0xff;
6865                                     d[2] = (pixel >> 16) & 0xff;
6866                                     d += 3;
6867                                 }
6868                                 break;
6869                             }
6870                             default:
6871                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6872                                 hr = WINED3DERR_NOTAVAILABLE;
6873                                 goto error;
6874                         }
6875 #undef STRETCH_ROW
6876                     }
6877                     dbuf += dlock.Pitch;
6878                     last_sy = sy;
6879                 }
6880             }
6881         }
6882         else
6883         {
6884             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6885             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6886             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6887             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6888             {
6889                 /* The color keying flags are checked for correctness in ddraw */
6890                 if (flags & WINEDDBLT_KEYSRC)
6891                 {
6892                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6893                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6894                 }
6895                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6896                 {
6897                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6898                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6899                 }
6900
6901                 if (flags & WINEDDBLT_KEYDEST)
6902                 {
6903                     /* Destination color keys are taken from the source surface! */
6904                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6905                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6906                 }
6907                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6908                 {
6909                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6910                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6911                 }
6912
6913                 if (bpp == 1)
6914                 {
6915                     keymask = 0xff;
6916                 }
6917                 else
6918                 {
6919                     keymask = src_format->red_mask
6920                             | src_format->green_mask
6921                             | src_format->blue_mask;
6922                 }
6923                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6924             }
6925
6926             if (flags & WINEDDBLT_DDFX)
6927             {
6928                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6929                 LONG tmpxy;
6930                 dTopLeft     = dbuf;
6931                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6932                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6933                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6934
6935                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6936                 {
6937                     /* I don't think we need to do anything about this flag */
6938                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6939                 }
6940                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6941                 {
6942                     tmp          = dTopRight;
6943                     dTopRight    = dTopLeft;
6944                     dTopLeft     = tmp;
6945                     tmp          = dBottomRight;
6946                     dBottomRight = dBottomLeft;
6947                     dBottomLeft  = tmp;
6948                     dstxinc = dstxinc * -1;
6949                 }
6950                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6951                 {
6952                     tmp          = dTopLeft;
6953                     dTopLeft     = dBottomLeft;
6954                     dBottomLeft  = tmp;
6955                     tmp          = dTopRight;
6956                     dTopRight    = dBottomRight;
6957                     dBottomRight = tmp;
6958                     dstyinc = dstyinc * -1;
6959                 }
6960                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6961                 {
6962                     /* I don't think we need to do anything about this flag */
6963                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6964                 }
6965                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6966                 {
6967                     tmp          = dBottomRight;
6968                     dBottomRight = dTopLeft;
6969                     dTopLeft     = tmp;
6970                     tmp          = dBottomLeft;
6971                     dBottomLeft  = dTopRight;
6972                     dTopRight    = tmp;
6973                     dstxinc = dstxinc * -1;
6974                     dstyinc = dstyinc * -1;
6975                 }
6976                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6977                 {
6978                     tmp          = dTopLeft;
6979                     dTopLeft     = dBottomLeft;
6980                     dBottomLeft  = dBottomRight;
6981                     dBottomRight = dTopRight;
6982                     dTopRight    = tmp;
6983                     tmpxy   = dstxinc;
6984                     dstxinc = dstyinc;
6985                     dstyinc = tmpxy;
6986                     dstxinc = dstxinc * -1;
6987                 }
6988                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6989                 {
6990                     tmp          = dTopLeft;
6991                     dTopLeft     = dTopRight;
6992                     dTopRight    = dBottomRight;
6993                     dBottomRight = dBottomLeft;
6994                     dBottomLeft  = tmp;
6995                     tmpxy   = dstxinc;
6996                     dstxinc = dstyinc;
6997                     dstyinc = tmpxy;
6998                     dstyinc = dstyinc * -1;
6999                 }
7000                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7001                 {
7002                     /* I don't think we need to do anything about this flag */
7003                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7004                 }
7005                 dbuf = dTopLeft;
7006                 flags &= ~(WINEDDBLT_DDFX);
7007             }
7008
7009 #define COPY_COLORKEY_FX(type) \
7010 do { \
7011     const type *s; \
7012     type *d = (type *)dbuf, *dx, tmp; \
7013     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7014     { \
7015         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
7016         dx = d; \
7017         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7018         { \
7019             tmp = s[sx >> 16]; \
7020             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7021                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7022             { \
7023                 dx[0] = tmp; \
7024             } \
7025             dx = (type *)(((BYTE *)dx) + dstxinc); \
7026         } \
7027         d = (type *)(((BYTE *)d) + dstyinc); \
7028     } \
7029 } while(0)
7030
7031             switch (bpp)
7032             {
7033                 case 1:
7034                     COPY_COLORKEY_FX(BYTE);
7035                     break;
7036                 case 2:
7037                     COPY_COLORKEY_FX(WORD);
7038                     break;
7039                 case 4:
7040                     COPY_COLORKEY_FX(DWORD);
7041                     break;
7042                 case 3:
7043                 {
7044                     const BYTE *s;
7045                     BYTE *d = dbuf, *dx;
7046                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7047                     {
7048                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7049                         dx = d;
7050                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7051                         {
7052                             DWORD pixel, dpixel = 0;
7053                             s = sbuf + 3 * (sx>>16);
7054                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7055                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7056                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7057                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7058                             {
7059                                 dx[0] = (pixel      ) & 0xff;
7060                                 dx[1] = (pixel >>  8) & 0xff;
7061                                 dx[2] = (pixel >> 16) & 0xff;
7062                             }
7063                             dx += dstxinc;
7064                         }
7065                         d += dstyinc;
7066                     }
7067                     break;
7068                 }
7069                 default:
7070                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7071                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7072                     hr = WINED3DERR_NOTAVAILABLE;
7073                     goto error;
7074 #undef COPY_COLORKEY_FX
7075             }
7076         }
7077     }
7078
7079 error:
7080     if (flags && FIXME_ON(d3d_surface))
7081     {
7082         FIXME("\tUnsupported flags: %#x.\n", flags);
7083     }
7084
7085 release:
7086     wined3d_surface_unmap(dst_surface);
7087     if (src_surface && src_surface != dst_surface)
7088         wined3d_surface_unmap(src_surface);
7089     /* Release the converted surface, if any. */
7090     if (src_surface && src_surface != orig_src)
7091         wined3d_surface_decref(src_surface);
7092
7093     return hr;
7094 }
7095
7096 /* Do not call while under the GL lock. */
7097 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7098         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7099 {
7100     static const RECT src_rect;
7101     WINEDDBLTFX BltFx;
7102
7103     memset(&BltFx, 0, sizeof(BltFx));
7104     BltFx.dwSize = sizeof(BltFx);
7105     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7106     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7107             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7108 }
7109
7110 /* Do not call while under the GL lock. */
7111 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7112         struct wined3d_surface *surface, const RECT *rect, float depth)
7113 {
7114     FIXME("Depth filling not implemented by cpu_blit.\n");
7115     return WINED3DERR_INVALIDCALL;
7116 }
7117
7118 const struct blit_shader cpu_blit =  {
7119     cpu_blit_alloc,
7120     cpu_blit_free,
7121     cpu_blit_set,
7122     cpu_blit_unset,
7123     cpu_blit_supported,
7124     cpu_blit_color_fill,
7125     cpu_blit_depth_fill,
7126 };
7127
7128 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7129         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7130         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7131         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7132 {
7133     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7134     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7135     unsigned int resource_size;
7136     HRESULT hr;
7137
7138     if (multisample_quality > 0)
7139     {
7140         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7141         multisample_quality = 0;
7142     }
7143
7144     /* Quick lockable sanity check.
7145      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7146      * this function is too deep to need to care about things like this.
7147      * Levels need to be checked too, since they all affect what can be done. */
7148     switch (pool)
7149     {
7150         case WINED3DPOOL_SCRATCH:
7151             if (!lockable)
7152             {
7153                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7154                         "which are mutually exclusive, setting lockable to TRUE.\n");
7155                 lockable = TRUE;
7156             }
7157             break;
7158
7159         case WINED3DPOOL_SYSTEMMEM:
7160             if (!lockable)
7161                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7162             break;
7163
7164         case WINED3DPOOL_MANAGED:
7165             if (usage & WINED3DUSAGE_DYNAMIC)
7166                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7167             break;
7168
7169         case WINED3DPOOL_DEFAULT:
7170             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7171                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7172             break;
7173
7174         default:
7175             FIXME("Unknown pool %#x.\n", pool);
7176             break;
7177     };
7178
7179     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7180         FIXME("Trying to create a render target that isn't in the default pool.\n");
7181
7182     /* FIXME: Check that the format is supported by the device. */
7183
7184     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7185     if (!resource_size)
7186         return WINED3DERR_INVALIDCALL;
7187
7188     surface->surface_type = surface_type;
7189
7190     switch (surface_type)
7191     {
7192         case SURFACE_OPENGL:
7193             surface->surface_ops = &surface_ops;
7194             break;
7195
7196         case SURFACE_GDI:
7197             surface->surface_ops = &gdi_surface_ops;
7198             break;
7199
7200         default:
7201             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7202             return WINED3DERR_INVALIDCALL;
7203     }
7204
7205     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7206             multisample_type, multisample_quality, usage, pool, width, height, 1,
7207             resource_size, parent, parent_ops, &surface_resource_ops);
7208     if (FAILED(hr))
7209     {
7210         WARN("Failed to initialize resource, returning %#x.\n", hr);
7211         return hr;
7212     }
7213
7214     /* "Standalone" surface. */
7215     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7216
7217     surface->texture_level = level;
7218     list_init(&surface->overlays);
7219
7220     /* Flags */
7221     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7222     if (discard)
7223         surface->flags |= SFLAG_DISCARD;
7224     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7225         surface->flags |= SFLAG_LOCKABLE;
7226     /* I'm not sure if this qualifies as a hack or as an optimization. It
7227      * seems reasonable to assume that lockable render targets will get
7228      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7229      * creation. However, the other reason we want to do this is that several
7230      * ddraw applications access surface memory while the surface isn't
7231      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7232      * future locks prevents these from crashing. */
7233     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7234         surface->flags |= SFLAG_DYNLOCK;
7235
7236     /* Mark the texture as dirty so that it gets loaded first time around. */
7237     surface_add_dirty_rect(surface, NULL);
7238     list_init(&surface->renderbuffers);
7239
7240     TRACE("surface %p, memory %p, size %u\n",
7241             surface, surface->resource.allocatedMemory, surface->resource.size);
7242
7243     /* Call the private setup routine */
7244     hr = surface->surface_ops->surface_private_setup(surface);
7245     if (FAILED(hr))
7246     {
7247         ERR("Private setup failed, returning %#x\n", hr);
7248         surface_cleanup(surface);
7249         return hr;
7250     }
7251
7252     /* Similar to lockable rendertargets above, creating the DIB section
7253      * during surface initialization prevents the sysmem pointer from changing
7254      * after a wined3d_surface_getdc() call. */
7255     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7256             && SUCCEEDED(surface_create_dib_section(surface)))
7257     {
7258         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7259         surface->resource.heapMemory = NULL;
7260         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7261     }
7262
7263     return hr;
7264 }
7265
7266 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7267         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7268         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7269         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7270 {
7271     struct wined3d_surface *object;
7272     HRESULT hr;
7273
7274     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7275             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7276     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7277             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7278     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7279
7280     if (surface_type == SURFACE_OPENGL && !device->adapter)
7281     {
7282         ERR("OpenGL surfaces are not available without OpenGL.\n");
7283         return WINED3DERR_NOTAVAILABLE;
7284     }
7285
7286     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7287     if (!object)
7288     {
7289         ERR("Failed to allocate surface memory.\n");
7290         return WINED3DERR_OUTOFVIDEOMEMORY;
7291     }
7292
7293     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7294             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7295     if (FAILED(hr))
7296     {
7297         WARN("Failed to initialize surface, returning %#x.\n", hr);
7298         HeapFree(GetProcessHeap(), 0, object);
7299         return hr;
7300     }
7301
7302     TRACE("Created surface %p.\n", object);
7303     *surface = object;
7304
7305     return hr;
7306 }