wined3d: Remove surface_preload from wined3d_surface_ops.
[wine] / dlls / wined3d / surface.c
1 /*
2  * Copyright 1997-2000 Marcus Meissner
3  * Copyright 1998-2000 Lionel Ulmer
4  * Copyright 2000-2001 TransGaming Technologies Inc.
5  * Copyright 2002-2005 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006-2011 Stefan Dösinger for CodeWeavers
10  * Copyright 2007-2008 Henri Verbeet
11  * Copyright 2006-2008 Roderick Colenbrander
12  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
13  *
14  * This library is free software; you can redistribute it and/or
15  * modify it under the terms of the GNU Lesser General Public
16  * License as published by the Free Software Foundation; either
17  * version 2.1 of the License, or (at your option) any later version.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
27  */
28
29 #include "config.h"
30 #include "wine/port.h"
31 #include "wined3d_private.h"
32
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
34 WINE_DECLARE_DEBUG_CHANNEL(d3d);
35
36 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
37         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
38         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
39 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
40         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *fx,
41         WINED3DTEXTUREFILTERTYPE filter);
42
43 static void surface_cleanup(struct wined3d_surface *surface)
44 {
45     struct wined3d_surface *overlay, *cur;
46
47     TRACE("surface %p.\n", surface);
48
49     if (surface->texture_name || (surface->flags & SFLAG_PBO)
50              || surface->rb_multisample || surface->rb_resolved
51              || !list_empty(&surface->renderbuffers))
52     {
53         struct wined3d_renderbuffer_entry *entry, *entry2;
54         const struct wined3d_gl_info *gl_info;
55         struct wined3d_context *context;
56
57         context = context_acquire(surface->resource.device, NULL);
58         gl_info = context->gl_info;
59
60         ENTER_GL();
61
62         if (surface->texture_name)
63         {
64             TRACE("Deleting texture %u.\n", surface->texture_name);
65             glDeleteTextures(1, &surface->texture_name);
66         }
67
68         if (surface->flags & SFLAG_PBO)
69         {
70             TRACE("Deleting PBO %u.\n", surface->pbo);
71             GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
72         }
73
74         if (surface->rb_multisample)
75         {
76             TRACE("Deleting multisample renderbuffer %u.\n", surface->rb_multisample);
77             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
78         }
79
80         if (surface->rb_resolved)
81         {
82             TRACE("Deleting resolved renderbuffer %u.\n", surface->rb_resolved);
83             gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
84         }
85
86         LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
87         {
88             TRACE("Deleting renderbuffer %u.\n", entry->id);
89             gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
90             HeapFree(GetProcessHeap(), 0, entry);
91         }
92
93         LEAVE_GL();
94
95         context_release(context);
96     }
97
98     if (surface->flags & SFLAG_DIBSECTION)
99     {
100         /* Release the DC. */
101         SelectObject(surface->hDC, surface->dib.holdbitmap);
102         DeleteDC(surface->hDC);
103         /* Release the DIB section. */
104         DeleteObject(surface->dib.DIBsection);
105         surface->dib.bitmap_data = NULL;
106         surface->resource.allocatedMemory = NULL;
107     }
108
109     if (surface->flags & SFLAG_USERPTR)
110         wined3d_surface_set_mem(surface, NULL);
111     if (surface->overlay_dest)
112         list_remove(&surface->overlay_entry);
113
114     LIST_FOR_EACH_ENTRY_SAFE(overlay, cur, &surface->overlays, struct wined3d_surface, overlay_entry)
115     {
116         list_remove(&overlay->overlay_entry);
117         overlay->overlay_dest = NULL;
118     }
119
120     resource_cleanup(&surface->resource);
121 }
122
123 void surface_update_draw_binding(struct wined3d_surface *surface)
124 {
125     if (!surface_is_offscreen(surface) || wined3d_settings.offscreen_rendering_mode != ORM_FBO)
126         surface->draw_binding = SFLAG_INDRAWABLE;
127     else if (surface->resource.multisample_type)
128         surface->draw_binding = SFLAG_INRB_MULTISAMPLE;
129     else
130         surface->draw_binding = SFLAG_INTEXTURE;
131 }
132
133 void surface_set_container(struct wined3d_surface *surface, enum wined3d_container_type type, void *container)
134 {
135     TRACE("surface %p, container %p.\n", surface, container);
136
137     if (!container && type != WINED3D_CONTAINER_NONE)
138         ERR("Setting NULL container of type %#x.\n", type);
139
140     if (type == WINED3D_CONTAINER_SWAPCHAIN)
141     {
142         surface->get_drawable_size = get_drawable_size_swapchain;
143     }
144     else
145     {
146         switch (wined3d_settings.offscreen_rendering_mode)
147         {
148             case ORM_FBO:
149                 surface->get_drawable_size = get_drawable_size_fbo;
150                 break;
151
152             case ORM_BACKBUFFER:
153                 surface->get_drawable_size = get_drawable_size_backbuffer;
154                 break;
155
156             default:
157                 ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
158                 return;
159         }
160     }
161
162     surface->container.type = type;
163     surface->container.u.base = container;
164     surface_update_draw_binding(surface);
165 }
166
167 struct blt_info
168 {
169     GLenum binding;
170     GLenum bind_target;
171     enum tex_types tex_type;
172     GLfloat coords[4][3];
173 };
174
175 struct float_rect
176 {
177     float l;
178     float t;
179     float r;
180     float b;
181 };
182
183 static inline void cube_coords_float(const RECT *r, UINT w, UINT h, struct float_rect *f)
184 {
185     f->l = ((r->left * 2.0f) / w) - 1.0f;
186     f->t = ((r->top * 2.0f) / h) - 1.0f;
187     f->r = ((r->right * 2.0f) / w) - 1.0f;
188     f->b = ((r->bottom * 2.0f) / h) - 1.0f;
189 }
190
191 static void surface_get_blt_info(GLenum target, const RECT *rect, GLsizei w, GLsizei h, struct blt_info *info)
192 {
193     GLfloat (*coords)[3] = info->coords;
194     struct float_rect f;
195
196     switch (target)
197     {
198         default:
199             FIXME("Unsupported texture target %#x\n", target);
200             /* Fall back to GL_TEXTURE_2D */
201         case GL_TEXTURE_2D:
202             info->binding = GL_TEXTURE_BINDING_2D;
203             info->bind_target = GL_TEXTURE_2D;
204             info->tex_type = tex_2d;
205             coords[0][0] = (float)rect->left / w;
206             coords[0][1] = (float)rect->top / h;
207             coords[0][2] = 0.0f;
208
209             coords[1][0] = (float)rect->right / w;
210             coords[1][1] = (float)rect->top / h;
211             coords[1][2] = 0.0f;
212
213             coords[2][0] = (float)rect->left / w;
214             coords[2][1] = (float)rect->bottom / h;
215             coords[2][2] = 0.0f;
216
217             coords[3][0] = (float)rect->right / w;
218             coords[3][1] = (float)rect->bottom / h;
219             coords[3][2] = 0.0f;
220             break;
221
222         case GL_TEXTURE_RECTANGLE_ARB:
223             info->binding = GL_TEXTURE_BINDING_RECTANGLE_ARB;
224             info->bind_target = GL_TEXTURE_RECTANGLE_ARB;
225             info->tex_type = tex_rect;
226             coords[0][0] = rect->left;  coords[0][1] = rect->top;       coords[0][2] = 0.0f;
227             coords[1][0] = rect->right; coords[1][1] = rect->top;       coords[1][2] = 0.0f;
228             coords[2][0] = rect->left;  coords[2][1] = rect->bottom;    coords[2][2] = 0.0f;
229             coords[3][0] = rect->right; coords[3][1] = rect->bottom;    coords[3][2] = 0.0f;
230             break;
231
232         case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
233             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
234             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
235             info->tex_type = tex_cube;
236             cube_coords_float(rect, w, h, &f);
237
238             coords[0][0] =  1.0f;   coords[0][1] = -f.t;   coords[0][2] = -f.l;
239             coords[1][0] =  1.0f;   coords[1][1] = -f.t;   coords[1][2] = -f.r;
240             coords[2][0] =  1.0f;   coords[2][1] = -f.b;   coords[2][2] = -f.l;
241             coords[3][0] =  1.0f;   coords[3][1] = -f.b;   coords[3][2] = -f.r;
242             break;
243
244         case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
245             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
246             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
247             info->tex_type = tex_cube;
248             cube_coords_float(rect, w, h, &f);
249
250             coords[0][0] = -1.0f;   coords[0][1] = -f.t;   coords[0][2] = f.l;
251             coords[1][0] = -1.0f;   coords[1][1] = -f.t;   coords[1][2] = f.r;
252             coords[2][0] = -1.0f;   coords[2][1] = -f.b;   coords[2][2] = f.l;
253             coords[3][0] = -1.0f;   coords[3][1] = -f.b;   coords[3][2] = f.r;
254             break;
255
256         case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
257             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
258             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
259             info->tex_type = tex_cube;
260             cube_coords_float(rect, w, h, &f);
261
262             coords[0][0] = f.l;   coords[0][1] =  1.0f;   coords[0][2] = f.t;
263             coords[1][0] = f.r;   coords[1][1] =  1.0f;   coords[1][2] = f.t;
264             coords[2][0] = f.l;   coords[2][1] =  1.0f;   coords[2][2] = f.b;
265             coords[3][0] = f.r;   coords[3][1] =  1.0f;   coords[3][2] = f.b;
266             break;
267
268         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
269             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
270             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
271             info->tex_type = tex_cube;
272             cube_coords_float(rect, w, h, &f);
273
274             coords[0][0] = f.l;   coords[0][1] = -1.0f;   coords[0][2] = -f.t;
275             coords[1][0] = f.r;   coords[1][1] = -1.0f;   coords[1][2] = -f.t;
276             coords[2][0] = f.l;   coords[2][1] = -1.0f;   coords[2][2] = -f.b;
277             coords[3][0] = f.r;   coords[3][1] = -1.0f;   coords[3][2] = -f.b;
278             break;
279
280         case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
281             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
282             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
283             info->tex_type = tex_cube;
284             cube_coords_float(rect, w, h, &f);
285
286             coords[0][0] = f.l;   coords[0][1] = -f.t;   coords[0][2] =  1.0f;
287             coords[1][0] = f.r;   coords[1][1] = -f.t;   coords[1][2] =  1.0f;
288             coords[2][0] = f.l;   coords[2][1] = -f.b;   coords[2][2] =  1.0f;
289             coords[3][0] = f.r;   coords[3][1] = -f.b;   coords[3][2] =  1.0f;
290             break;
291
292         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
293             info->binding = GL_TEXTURE_BINDING_CUBE_MAP_ARB;
294             info->bind_target = GL_TEXTURE_CUBE_MAP_ARB;
295             info->tex_type = tex_cube;
296             cube_coords_float(rect, w, h, &f);
297
298             coords[0][0] = -f.l;   coords[0][1] = -f.t;   coords[0][2] = -1.0f;
299             coords[1][0] = -f.r;   coords[1][1] = -f.t;   coords[1][2] = -1.0f;
300             coords[2][0] = -f.l;   coords[2][1] = -f.b;   coords[2][2] = -1.0f;
301             coords[3][0] = -f.r;   coords[3][1] = -f.b;   coords[3][2] = -1.0f;
302             break;
303     }
304 }
305
306 static void surface_get_rect(const struct wined3d_surface *surface, const RECT *rect_in, RECT *rect_out)
307 {
308     if (rect_in)
309         *rect_out = *rect_in;
310     else
311     {
312         rect_out->left = 0;
313         rect_out->top = 0;
314         rect_out->right = surface->resource.width;
315         rect_out->bottom = surface->resource.height;
316     }
317 }
318
319 /* GL locking and context activation is done by the caller */
320 void draw_textured_quad(const struct wined3d_surface *src_surface, struct wined3d_context *context,
321         const RECT *src_rect, const RECT *dst_rect, WINED3DTEXTUREFILTERTYPE Filter)
322 {
323     struct blt_info info;
324
325     surface_get_blt_info(src_surface->texture_target, src_rect, src_surface->pow2Width, src_surface->pow2Height, &info);
326
327     glEnable(info.bind_target);
328     checkGLcall("glEnable(bind_target)");
329
330     context_bind_texture(context, info.bind_target, src_surface->texture_name);
331
332     /* Filtering for StretchRect */
333     glTexParameteri(info.bind_target, GL_TEXTURE_MAG_FILTER,
334             wined3d_gl_mag_filter(magLookup, Filter));
335     checkGLcall("glTexParameteri");
336     glTexParameteri(info.bind_target, GL_TEXTURE_MIN_FILTER,
337             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
338     checkGLcall("glTexParameteri");
339     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
340     glTexParameteri(info.bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
341     if (context->gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
342         glTexParameteri(info.bind_target, GL_TEXTURE_SRGB_DECODE_EXT, GL_SKIP_DECODE_EXT);
343     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
344     checkGLcall("glTexEnvi");
345
346     /* Draw a quad */
347     glBegin(GL_TRIANGLE_STRIP);
348     glTexCoord3fv(info.coords[0]);
349     glVertex2i(dst_rect->left, dst_rect->top);
350
351     glTexCoord3fv(info.coords[1]);
352     glVertex2i(dst_rect->right, dst_rect->top);
353
354     glTexCoord3fv(info.coords[2]);
355     glVertex2i(dst_rect->left, dst_rect->bottom);
356
357     glTexCoord3fv(info.coords[3]);
358     glVertex2i(dst_rect->right, dst_rect->bottom);
359     glEnd();
360
361     /* Unbind the texture */
362     context_bind_texture(context, info.bind_target, 0);
363
364     /* We changed the filtering settings on the texture. Inform the
365      * container about this to get the filters reset properly next draw. */
366     if (src_surface->container.type == WINED3D_CONTAINER_TEXTURE)
367     {
368         struct wined3d_texture *texture = src_surface->container.u.texture;
369         texture->texture_rgb.states[WINED3DTEXSTA_MAGFILTER] = WINED3DTEXF_POINT;
370         texture->texture_rgb.states[WINED3DTEXSTA_MINFILTER] = WINED3DTEXF_POINT;
371         texture->texture_rgb.states[WINED3DTEXSTA_MIPFILTER] = WINED3DTEXF_NONE;
372         texture->texture_rgb.states[WINED3DTEXSTA_SRGBTEXTURE] = FALSE;
373     }
374 }
375
376 static HRESULT surface_create_dib_section(struct wined3d_surface *surface)
377 {
378     const struct wined3d_format *format = surface->resource.format;
379     SYSTEM_INFO sysInfo;
380     BITMAPINFO *b_info;
381     int extraline = 0;
382     DWORD *masks;
383     UINT usage;
384     HDC dc;
385
386     TRACE("surface %p.\n", surface);
387
388     if (!(format->flags & WINED3DFMT_FLAG_GETDC))
389     {
390         WARN("Cannot use GetDC on a %s surface.\n", debug_d3dformat(format->id));
391         return WINED3DERR_INVALIDCALL;
392     }
393
394     switch (format->byte_count)
395     {
396         case 2:
397         case 4:
398             /* Allocate extra space to store the RGB bit masks. */
399             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER) + 3 * sizeof(DWORD));
400             break;
401
402         case 3:
403             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(BITMAPINFOHEADER));
404             break;
405
406         default:
407             /* Allocate extra space for a palette. */
408             b_info = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
409                     sizeof(BITMAPINFOHEADER) + sizeof(RGBQUAD) * (1 << (format->byte_count * 8)));
410             break;
411     }
412
413     if (!b_info)
414         return E_OUTOFMEMORY;
415
416     /* Some applications access the surface in via DWORDs, and do not take
417      * the necessary care at the end of the surface. So we need at least
418      * 4 extra bytes at the end of the surface. Check against the page size,
419      * if the last page used for the surface has at least 4 spare bytes we're
420      * safe, otherwise add an extra line to the DIB section. */
421     GetSystemInfo(&sysInfo);
422     if( ((surface->resource.size + 3) % sysInfo.dwPageSize) < 4)
423     {
424         extraline = 1;
425         TRACE("Adding an extra line to the DIB section.\n");
426     }
427
428     b_info->bmiHeader.biSize = sizeof(BITMAPINFOHEADER);
429     /* TODO: Is there a nicer way to force a specific alignment? (8 byte for ddraw) */
430     b_info->bmiHeader.biWidth = wined3d_surface_get_pitch(surface) / format->byte_count;
431     b_info->bmiHeader.biHeight = 0 - surface->resource.height - extraline;
432     b_info->bmiHeader.biSizeImage = (surface->resource.height + extraline)
433             * wined3d_surface_get_pitch(surface);
434     b_info->bmiHeader.biPlanes = 1;
435     b_info->bmiHeader.biBitCount = format->byte_count * 8;
436
437     b_info->bmiHeader.biXPelsPerMeter = 0;
438     b_info->bmiHeader.biYPelsPerMeter = 0;
439     b_info->bmiHeader.biClrUsed = 0;
440     b_info->bmiHeader.biClrImportant = 0;
441
442     /* Get the bit masks */
443     masks = (DWORD *)b_info->bmiColors;
444     switch (surface->resource.format->id)
445     {
446         case WINED3DFMT_B8G8R8_UNORM:
447             usage = DIB_RGB_COLORS;
448             b_info->bmiHeader.biCompression = BI_RGB;
449             break;
450
451         case WINED3DFMT_B5G5R5X1_UNORM:
452         case WINED3DFMT_B5G5R5A1_UNORM:
453         case WINED3DFMT_B4G4R4A4_UNORM:
454         case WINED3DFMT_B4G4R4X4_UNORM:
455         case WINED3DFMT_B2G3R3_UNORM:
456         case WINED3DFMT_B2G3R3A8_UNORM:
457         case WINED3DFMT_R10G10B10A2_UNORM:
458         case WINED3DFMT_R8G8B8A8_UNORM:
459         case WINED3DFMT_R8G8B8X8_UNORM:
460         case WINED3DFMT_B10G10R10A2_UNORM:
461         case WINED3DFMT_B5G6R5_UNORM:
462         case WINED3DFMT_R16G16B16A16_UNORM:
463             usage = 0;
464             b_info->bmiHeader.biCompression = BI_BITFIELDS;
465             masks[0] = format->red_mask;
466             masks[1] = format->green_mask;
467             masks[2] = format->blue_mask;
468             break;
469
470         default:
471             /* Don't know palette */
472             b_info->bmiHeader.biCompression = BI_RGB;
473             usage = 0;
474             break;
475     }
476
477     if (!(dc = GetDC(0)))
478     {
479         HeapFree(GetProcessHeap(), 0, b_info);
480         return HRESULT_FROM_WIN32(GetLastError());
481     }
482
483     TRACE("Creating a DIB section with size %dx%dx%d, size=%d.\n",
484             b_info->bmiHeader.biWidth, b_info->bmiHeader.biHeight,
485             b_info->bmiHeader.biBitCount, b_info->bmiHeader.biSizeImage);
486     surface->dib.DIBsection = CreateDIBSection(dc, b_info, usage, &surface->dib.bitmap_data, 0, 0);
487     ReleaseDC(0, dc);
488
489     if (!surface->dib.DIBsection)
490     {
491         ERR("Failed to create DIB section.\n");
492         HeapFree(GetProcessHeap(), 0, b_info);
493         return HRESULT_FROM_WIN32(GetLastError());
494     }
495
496     TRACE("DIBSection at %p.\n", surface->dib.bitmap_data);
497     /* Copy the existing surface to the dib section. */
498     if (surface->resource.allocatedMemory)
499     {
500         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory,
501                 surface->resource.height * wined3d_surface_get_pitch(surface));
502     }
503     else
504     {
505         /* This is to make maps read the GL texture although memory is allocated. */
506         surface->flags &= ~SFLAG_INSYSMEM;
507     }
508     surface->dib.bitmap_size = b_info->bmiHeader.biSizeImage;
509
510     HeapFree(GetProcessHeap(), 0, b_info);
511
512     /* Now allocate a DC. */
513     surface->hDC = CreateCompatibleDC(0);
514     surface->dib.holdbitmap = SelectObject(surface->hDC, surface->dib.DIBsection);
515     TRACE("Using wined3d palette %p.\n", surface->palette);
516     SelectPalette(surface->hDC, surface->palette ? surface->palette->hpal : 0, FALSE);
517
518     surface->flags |= SFLAG_DIBSECTION;
519
520     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
521     surface->resource.heapMemory = NULL;
522
523     return WINED3D_OK;
524 }
525
526 static void surface_prepare_system_memory(struct wined3d_surface *surface)
527 {
528     struct wined3d_device *device = surface->resource.device;
529     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
530
531     TRACE("surface %p.\n", surface);
532
533     /* Performance optimization: Count how often a surface is locked, if it is
534      * locked regularly do not throw away the system memory copy. This avoids
535      * the need to download the surface from OpenGL all the time. The surface
536      * is still downloaded if the OpenGL texture is changed. */
537     if (!(surface->flags & SFLAG_DYNLOCK))
538     {
539         if (++surface->lockCount > MAXLOCKCOUNT)
540         {
541             TRACE("Surface is locked regularly, not freeing the system memory copy any more.\n");
542             surface->flags |= SFLAG_DYNLOCK;
543         }
544     }
545
546     /* Create a PBO for dynamically locked surfaces but don't do it for
547      * converted or NPOT surfaces. Also don't create a PBO for systemmem
548      * surfaces. */
549     if (gl_info->supported[ARB_PIXEL_BUFFER_OBJECT] && (surface->flags & SFLAG_DYNLOCK)
550             && !(surface->flags & (SFLAG_PBO | SFLAG_CONVERTED | SFLAG_NONPOW2))
551             && (surface->resource.pool != WINED3DPOOL_SYSTEMMEM))
552     {
553         struct wined3d_context *context;
554         GLenum error;
555
556         context = context_acquire(device, NULL);
557         ENTER_GL();
558
559         GL_EXTCALL(glGenBuffersARB(1, &surface->pbo));
560         error = glGetError();
561         if (!surface->pbo || error != GL_NO_ERROR)
562             ERR("Failed to create a PBO with error %s (%#x).\n", debug_glerror(error), error);
563
564         TRACE("Binding PBO %u.\n", surface->pbo);
565
566         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
567         checkGLcall("glBindBufferARB");
568
569         GL_EXTCALL(glBufferDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->resource.size + 4,
570                 surface->resource.allocatedMemory, GL_STREAM_DRAW_ARB));
571         checkGLcall("glBufferDataARB");
572
573         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
574         checkGLcall("glBindBufferARB");
575
576         /* We don't need the system memory anymore and we can't even use it for PBOs. */
577         if (!(surface->flags & SFLAG_CLIENT))
578         {
579             HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
580             surface->resource.heapMemory = NULL;
581         }
582         surface->resource.allocatedMemory = NULL;
583         surface->flags |= SFLAG_PBO;
584         LEAVE_GL();
585         context_release(context);
586     }
587     else if (!(surface->resource.allocatedMemory || surface->flags & SFLAG_PBO))
588     {
589         /* Whatever surface we have, make sure that there is memory allocated
590          * for the downloaded copy, or a PBO to map. */
591         if (!surface->resource.heapMemory)
592             surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
593
594         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
595                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
596
597         if (surface->flags & SFLAG_INSYSMEM)
598             ERR("Surface without memory or PBO has SFLAG_INSYSMEM set.\n");
599     }
600 }
601
602 static void surface_evict_sysmem(struct wined3d_surface *surface)
603 {
604     if (surface->flags & SFLAG_DONOTFREE)
605         return;
606
607     HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
608     surface->resource.allocatedMemory = NULL;
609     surface->resource.heapMemory = NULL;
610     surface_modify_location(surface, SFLAG_INSYSMEM, FALSE);
611 }
612
613 /* Context activation is done by the caller. */
614 static void surface_bind_and_dirtify(struct wined3d_surface *surface,
615         struct wined3d_context *context, BOOL srgb)
616 {
617     struct wined3d_device *device = surface->resource.device;
618     DWORD active_sampler;
619
620     /* We don't need a specific texture unit, but after binding the texture
621      * the current unit is dirty. Read the unit back instead of switching to
622      * 0, this avoids messing around with the state manager's GL states. The
623      * current texture unit should always be a valid one.
624      *
625      * To be more specific, this is tricky because we can implicitly be
626      * called from sampler() in state.c. This means we can't touch anything
627      * other than whatever happens to be the currently active texture, or we
628      * would risk marking already applied sampler states dirty again. */
629     active_sampler = device->rev_tex_unit_map[context->active_texture];
630
631     if (active_sampler != WINED3D_UNMAPPED_STAGE)
632         device_invalidate_state(device, STATE_SAMPLER(active_sampler));
633     surface_bind(surface, context, srgb);
634 }
635
636 static void surface_force_reload(struct wined3d_surface *surface)
637 {
638     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
639 }
640
641 static void surface_release_client_storage(struct wined3d_surface *surface)
642 {
643     struct wined3d_context *context = context_acquire(surface->resource.device, NULL);
644
645     ENTER_GL();
646     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
647     if (surface->texture_name)
648     {
649         surface_bind_and_dirtify(surface, context, FALSE);
650         glTexImage2D(surface->texture_target, surface->texture_level,
651                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
652     }
653     if (surface->texture_name_srgb)
654     {
655         surface_bind_and_dirtify(surface, context, TRUE);
656         glTexImage2D(surface->texture_target, surface->texture_level,
657                 GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, NULL);
658     }
659     glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
660     LEAVE_GL();
661
662     context_release(context);
663
664     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
665     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
666     surface_force_reload(surface);
667 }
668
669 static HRESULT surface_private_setup(struct wined3d_surface *surface)
670 {
671     /* TODO: Check against the maximum texture sizes supported by the video card. */
672     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
673     unsigned int pow2Width, pow2Height;
674
675     TRACE("surface %p.\n", surface);
676
677     surface->texture_name = 0;
678     surface->texture_target = GL_TEXTURE_2D;
679
680     /* Non-power2 support */
681     if (gl_info->supported[ARB_TEXTURE_NON_POWER_OF_TWO] || gl_info->supported[WINED3D_GL_NORMALIZED_TEXRECT])
682     {
683         pow2Width = surface->resource.width;
684         pow2Height = surface->resource.height;
685     }
686     else
687     {
688         /* Find the nearest pow2 match */
689         pow2Width = pow2Height = 1;
690         while (pow2Width < surface->resource.width)
691             pow2Width <<= 1;
692         while (pow2Height < surface->resource.height)
693             pow2Height <<= 1;
694     }
695     surface->pow2Width = pow2Width;
696     surface->pow2Height = pow2Height;
697
698     if (pow2Width > surface->resource.width || pow2Height > surface->resource.height)
699     {
700         /* TODO: Add support for non power two compressed textures. */
701         if (surface->resource.format->flags & WINED3DFMT_FLAG_COMPRESSED)
702         {
703             FIXME("(%p) Compressed non-power-two textures are not supported w(%d) h(%d)\n",
704                   surface, surface->resource.width, surface->resource.height);
705             return WINED3DERR_NOTAVAILABLE;
706         }
707     }
708
709     if (pow2Width != surface->resource.width
710             || pow2Height != surface->resource.height)
711     {
712         surface->flags |= SFLAG_NONPOW2;
713     }
714
715     if ((surface->pow2Width > gl_info->limits.texture_size || surface->pow2Height > gl_info->limits.texture_size)
716             && !(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
717     {
718         /* One of three options:
719          * 1: Do the same as we do with NPOT and scale the texture, (any
720          *    texture ops would require the texture to be scaled which is
721          *    potentially slow)
722          * 2: Set the texture to the maximum size (bad idea).
723          * 3: WARN and return WINED3DERR_NOTAVAILABLE;
724          * 4: Create the surface, but allow it to be used only for DirectDraw
725          *    Blts. Some apps (e.g. Swat 3) create textures with a Height of
726          *    16 and a Width > 3000 and blt 16x16 letter areas from them to
727          *    the render target. */
728         if (surface->resource.pool == WINED3DPOOL_DEFAULT || surface->resource.pool == WINED3DPOOL_MANAGED)
729         {
730             WARN("Unable to allocate a surface which exceeds the maximum OpenGL texture size.\n");
731             return WINED3DERR_NOTAVAILABLE;
732         }
733
734         /* We should never use this surface in combination with OpenGL! */
735         TRACE("Creating an oversized surface: %ux%u.\n",
736                 surface->pow2Width, surface->pow2Height);
737     }
738     else
739     {
740         /* Don't use ARB_TEXTURE_RECTANGLE in case the surface format is P8
741          * and EXT_PALETTED_TEXTURE is used in combination with texture
742          * uploads (RTL_READTEX/RTL_TEXTEX). The reason is that
743          * EXT_PALETTED_TEXTURE doesn't work in combination with
744          * ARB_TEXTURE_RECTANGLE. */
745         if (surface->flags & SFLAG_NONPOW2 && gl_info->supported[ARB_TEXTURE_RECTANGLE]
746                 && !(surface->resource.format->id == WINED3DFMT_P8_UINT
747                 && gl_info->supported[EXT_PALETTED_TEXTURE]
748                 && wined3d_settings.rendertargetlock_mode == RTL_READTEX))
749         {
750             surface->texture_target = GL_TEXTURE_RECTANGLE_ARB;
751             surface->pow2Width = surface->resource.width;
752             surface->pow2Height = surface->resource.height;
753             surface->flags &= ~(SFLAG_NONPOW2 | SFLAG_NORMCOORD);
754         }
755     }
756
757     switch (wined3d_settings.offscreen_rendering_mode)
758     {
759         case ORM_FBO:
760             surface->get_drawable_size = get_drawable_size_fbo;
761             break;
762
763         case ORM_BACKBUFFER:
764             surface->get_drawable_size = get_drawable_size_backbuffer;
765             break;
766
767         default:
768             ERR("Unhandled offscreen rendering mode %#x.\n", wined3d_settings.offscreen_rendering_mode);
769             return WINED3DERR_INVALIDCALL;
770     }
771
772     surface->flags |= SFLAG_INSYSMEM;
773
774     return WINED3D_OK;
775 }
776
777 static void surface_realize_palette(struct wined3d_surface *surface)
778 {
779     struct wined3d_palette *palette = surface->palette;
780
781     TRACE("surface %p.\n", surface);
782
783     if (!palette) return;
784
785     if (surface->resource.format->id == WINED3DFMT_P8_UINT
786             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
787     {
788         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
789         {
790             /* Make sure the texture is up to date. This call doesn't do
791              * anything if the texture is already up to date. */
792             surface_load_location(surface, SFLAG_INTEXTURE, NULL);
793
794             /* We want to force a palette refresh, so mark the drawable as not being up to date */
795             if (!surface_is_offscreen(surface))
796                 surface_modify_location(surface, SFLAG_INDRAWABLE, FALSE);
797         }
798         else
799         {
800             if (!(surface->flags & SFLAG_INSYSMEM))
801             {
802                 TRACE("Palette changed with surface that does not have an up to date system memory copy.\n");
803                 surface_load_location(surface, SFLAG_INSYSMEM, NULL);
804             }
805             surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
806         }
807     }
808
809     if (surface->flags & SFLAG_DIBSECTION)
810     {
811         RGBQUAD col[256];
812         unsigned int i;
813
814         TRACE("Updating the DC's palette.\n");
815
816         for (i = 0; i < 256; ++i)
817         {
818             col[i].rgbRed   = palette->palents[i].peRed;
819             col[i].rgbGreen = palette->palents[i].peGreen;
820             col[i].rgbBlue  = palette->palents[i].peBlue;
821             col[i].rgbReserved = 0;
822         }
823         SetDIBColorTable(surface->hDC, 0, 256, col);
824     }
825
826     /* Propagate the changes to the drawable when we have a palette. */
827     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
828         surface_load_location(surface, surface->draw_binding, NULL);
829 }
830
831 static HRESULT surface_draw_overlay(struct wined3d_surface *surface)
832 {
833     HRESULT hr;
834
835     /* If there's no destination surface there is nothing to do. */
836     if (!surface->overlay_dest)
837         return WINED3D_OK;
838
839     /* Blt calls ModifyLocation on the dest surface, which in turn calls
840      * DrawOverlay to update the overlay. Prevent an endless recursion. */
841     if (surface->overlay_dest->flags & SFLAG_INOVERLAYDRAW)
842         return WINED3D_OK;
843
844     surface->overlay_dest->flags |= SFLAG_INOVERLAYDRAW;
845     hr = wined3d_surface_blt(surface->overlay_dest, &surface->overlay_destrect, surface,
846             &surface->overlay_srcrect, WINEDDBLT_WAIT, NULL, WINED3DTEXF_LINEAR);
847     surface->overlay_dest->flags &= ~SFLAG_INOVERLAYDRAW;
848
849     return hr;
850 }
851
852 static void surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
853 {
854     struct wined3d_device *device = surface->resource.device;
855     const RECT *pass_rect = rect;
856
857     TRACE("surface %p, rect %s, flags %#x.\n",
858             surface, wine_dbgstr_rect(rect), flags);
859
860     if (flags & WINED3DLOCK_DISCARD)
861     {
862         TRACE("WINED3DLOCK_DISCARD flag passed, marking SYSMEM as up to date.\n");
863         surface_prepare_system_memory(surface);
864         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
865     }
866     else
867     {
868         /* surface_load_location() does not check if the rectangle specifies
869          * the full surface. Most callers don't need that, so do it here. */
870         if (rect && !rect->top && !rect->left
871                 && rect->right == surface->resource.width
872                 && rect->bottom == surface->resource.height)
873             pass_rect = NULL;
874
875         if (!(wined3d_settings.rendertargetlock_mode == RTL_DISABLE
876                 && ((surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
877                 || surface == device->fb.render_targets[0])))
878             surface_load_location(surface, SFLAG_INSYSMEM, pass_rect);
879     }
880
881     if (surface->flags & SFLAG_PBO)
882     {
883         const struct wined3d_gl_info *gl_info;
884         struct wined3d_context *context;
885
886         context = context_acquire(device, NULL);
887         gl_info = context->gl_info;
888
889         ENTER_GL();
890         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
891         checkGLcall("glBindBufferARB");
892
893         /* This shouldn't happen but could occur if some other function
894          * didn't handle the PBO properly. */
895         if (surface->resource.allocatedMemory)
896             ERR("The surface already has PBO memory allocated.\n");
897
898         surface->resource.allocatedMemory = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
899         checkGLcall("glMapBufferARB");
900
901         /* Make sure the PBO isn't set anymore in order not to break non-PBO
902          * calls. */
903         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
904         checkGLcall("glBindBufferARB");
905
906         LEAVE_GL();
907         context_release(context);
908     }
909
910     if (!(flags & (WINED3DLOCK_NO_DIRTY_UPDATE | WINED3DLOCK_READONLY)))
911     {
912         if (!rect)
913             surface_add_dirty_rect(surface, NULL);
914         else
915         {
916             WINED3DBOX b;
917
918             b.Left = rect->left;
919             b.Top = rect->top;
920             b.Right = rect->right;
921             b.Bottom = rect->bottom;
922             b.Front = 0;
923             b.Back = 1;
924             surface_add_dirty_rect(surface, &b);
925         }
926     }
927 }
928
929 static void surface_unmap(struct wined3d_surface *surface)
930 {
931     struct wined3d_device *device = surface->resource.device;
932     BOOL fullsurface;
933
934     TRACE("surface %p.\n", surface);
935
936     memset(&surface->lockedRect, 0, sizeof(surface->lockedRect));
937
938     if (surface->flags & SFLAG_PBO)
939     {
940         const struct wined3d_gl_info *gl_info;
941         struct wined3d_context *context;
942
943         TRACE("Freeing PBO memory.\n");
944
945         context = context_acquire(device, NULL);
946         gl_info = context->gl_info;
947
948         ENTER_GL();
949         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
950         GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
951         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
952         checkGLcall("glUnmapBufferARB");
953         LEAVE_GL();
954         context_release(context);
955
956         surface->resource.allocatedMemory = NULL;
957     }
958
959     TRACE("dirtyfied %u.\n", surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE) ? 0 : 1);
960
961     if (surface->flags & (SFLAG_INDRAWABLE | SFLAG_INTEXTURE))
962     {
963         TRACE("Not dirtified, nothing to do.\n");
964         goto done;
965     }
966
967     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
968             || (device->fb.render_targets && surface == device->fb.render_targets[0]))
969     {
970         if (wined3d_settings.rendertargetlock_mode == RTL_DISABLE)
971         {
972             static BOOL warned = FALSE;
973             if (!warned)
974             {
975                 ERR("The application tries to write to the render target, but render target locking is disabled.\n");
976                 warned = TRUE;
977             }
978             goto done;
979         }
980
981         if (!surface->dirtyRect.left && !surface->dirtyRect.top
982                 && surface->dirtyRect.right == surface->resource.width
983                 && surface->dirtyRect.bottom == surface->resource.height)
984         {
985             fullsurface = TRUE;
986         }
987         else
988         {
989             /* TODO: Proper partial rectangle tracking. */
990             fullsurface = FALSE;
991             surface->flags |= SFLAG_INSYSMEM;
992         }
993
994         surface_load_location(surface, surface->draw_binding, fullsurface ? NULL : &surface->dirtyRect);
995
996         /* Partial rectangle tracking is not commonly implemented, it is only
997          * done for render targets. INSYSMEM was set before to tell
998          * surface_load_location() where to read the rectangle from.
999          * Indrawable is set because all modifications from the partial
1000          * sysmem copy are written back to the drawable, thus the surface is
1001          * merged again in the drawable. The sysmem copy is not fully up to
1002          * date because only a subrectangle was read in Map(). */
1003         if (!fullsurface)
1004         {
1005             surface_modify_location(surface, surface->draw_binding, TRUE);
1006             surface_evict_sysmem(surface);
1007         }
1008
1009         surface->dirtyRect.left = surface->resource.width;
1010         surface->dirtyRect.top = surface->resource.height;
1011         surface->dirtyRect.right = 0;
1012         surface->dirtyRect.bottom = 0;
1013     }
1014     else if (surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL))
1015     {
1016         FIXME("Depth / stencil buffer locking is not implemented.\n");
1017     }
1018
1019 done:
1020     /* Overlays have to be redrawn manually after changes with the GL implementation */
1021     if (surface->overlay_dest)
1022         surface->surface_ops->surface_draw_overlay(surface);
1023 }
1024
1025 static BOOL surface_is_full_rect(const struct wined3d_surface *surface, const RECT *r)
1026 {
1027     if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
1028         return FALSE;
1029     if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
1030         return FALSE;
1031     return TRUE;
1032 }
1033
1034 static void wined3d_surface_depth_blt_fbo(const struct wined3d_device *device, struct wined3d_surface *src_surface,
1035         const RECT *src_rect, struct wined3d_surface *dst_surface, const RECT *dst_rect)
1036 {
1037     const struct wined3d_gl_info *gl_info;
1038     struct wined3d_context *context;
1039     DWORD src_mask, dst_mask;
1040     GLbitfield gl_mask;
1041
1042     TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
1043             device, src_surface, wine_dbgstr_rect(src_rect),
1044             dst_surface, wine_dbgstr_rect(dst_rect));
1045
1046     src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1047     dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1048
1049     if (src_mask != dst_mask)
1050     {
1051         ERR("Incompatible formats %s and %s.\n",
1052                 debug_d3dformat(src_surface->resource.format->id),
1053                 debug_d3dformat(dst_surface->resource.format->id));
1054         return;
1055     }
1056
1057     if (!src_mask)
1058     {
1059         ERR("Not a depth / stencil format: %s.\n",
1060                 debug_d3dformat(src_surface->resource.format->id));
1061         return;
1062     }
1063
1064     gl_mask = 0;
1065     if (src_mask & WINED3DFMT_FLAG_DEPTH)
1066         gl_mask |= GL_DEPTH_BUFFER_BIT;
1067     if (src_mask & WINED3DFMT_FLAG_STENCIL)
1068         gl_mask |= GL_STENCIL_BUFFER_BIT;
1069
1070     /* Make sure the locations are up-to-date. Loading the destination
1071      * surface isn't required if the entire surface is overwritten. */
1072     surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
1073     if (!surface_is_full_rect(dst_surface, dst_rect))
1074         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
1075
1076     context = context_acquire(device, NULL);
1077     if (!context->valid)
1078     {
1079         context_release(context);
1080         WARN("Invalid context, skipping blit.\n");
1081         return;
1082     }
1083
1084     gl_info = context->gl_info;
1085
1086     ENTER_GL();
1087
1088     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
1089     glReadBuffer(GL_NONE);
1090     checkGLcall("glReadBuffer()");
1091     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1092
1093     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
1094     context_set_draw_buffer(context, GL_NONE);
1095     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1096
1097     if (gl_mask & GL_DEPTH_BUFFER_BIT)
1098     {
1099         glDepthMask(GL_TRUE);
1100         context_invalidate_state(context, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
1101     }
1102     if (gl_mask & GL_STENCIL_BUFFER_BIT)
1103     {
1104         if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
1105         {
1106             glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
1107             context_invalidate_state(context, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
1108         }
1109         glStencilMask(~0U);
1110         context_invalidate_state(context, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
1111     }
1112
1113     glDisable(GL_SCISSOR_TEST);
1114     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1115
1116     gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
1117             dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
1118     checkGLcall("glBlitFramebuffer()");
1119
1120     LEAVE_GL();
1121
1122     if (wined3d_settings.strict_draw_ordering)
1123         wglFlush(); /* Flush to ensure ordering across contexts. */
1124
1125     context_release(context);
1126 }
1127
1128 /* Blit between surface locations. Onscreen on different swapchains is not supported.
1129  * Depth / stencil is not supported. */
1130 static void surface_blt_fbo(const struct wined3d_device *device, const WINED3DTEXTUREFILTERTYPE filter,
1131         struct wined3d_surface *src_surface, DWORD src_location, const RECT *src_rect_in,
1132         struct wined3d_surface *dst_surface, DWORD dst_location, const RECT *dst_rect_in)
1133 {
1134     const struct wined3d_gl_info *gl_info;
1135     struct wined3d_context *context;
1136     RECT src_rect, dst_rect;
1137     GLenum gl_filter;
1138     GLenum buffer;
1139
1140     TRACE("device %p, filter %s,\n", device, debug_d3dtexturefiltertype(filter));
1141     TRACE("src_surface %p, src_location %s, src_rect %s,\n",
1142             src_surface, debug_surflocation(src_location), wine_dbgstr_rect(src_rect_in));
1143     TRACE("dst_surface %p, dst_location %s, dst_rect %s.\n",
1144             dst_surface, debug_surflocation(dst_location), wine_dbgstr_rect(dst_rect_in));
1145
1146     src_rect = *src_rect_in;
1147     dst_rect = *dst_rect_in;
1148
1149     switch (filter)
1150     {
1151         case WINED3DTEXF_LINEAR:
1152             gl_filter = GL_LINEAR;
1153             break;
1154
1155         default:
1156             FIXME("Unsupported filter mode %s (%#x).\n", debug_d3dtexturefiltertype(filter), filter);
1157         case WINED3DTEXF_NONE:
1158         case WINED3DTEXF_POINT:
1159             gl_filter = GL_NEAREST;
1160             break;
1161     }
1162
1163     /* Resolve the source surface first if needed. */
1164     if (src_location == SFLAG_INRB_MULTISAMPLE
1165             && (src_surface->resource.format->id != dst_surface->resource.format->id
1166                 || abs(src_rect.bottom - src_rect.top) != abs(dst_rect.bottom - dst_rect.top)
1167                 || abs(src_rect.right - src_rect.left) != abs(dst_rect.right - dst_rect.left)))
1168         src_location = SFLAG_INRB_RESOLVED;
1169
1170     /* Make sure the locations are up-to-date. Loading the destination
1171      * surface isn't required if the entire surface is overwritten. (And is
1172      * in fact harmful if we're being called by surface_load_location() with
1173      * the purpose of loading the destination surface.) */
1174     surface_load_location(src_surface, src_location, NULL);
1175     if (!surface_is_full_rect(dst_surface, &dst_rect))
1176         surface_load_location(dst_surface, dst_location, NULL);
1177
1178     if (src_location == SFLAG_INDRAWABLE) context = context_acquire(device, src_surface);
1179     else if (dst_location == SFLAG_INDRAWABLE) context = context_acquire(device, dst_surface);
1180     else context = context_acquire(device, NULL);
1181
1182     if (!context->valid)
1183     {
1184         context_release(context);
1185         WARN("Invalid context, skipping blit.\n");
1186         return;
1187     }
1188
1189     gl_info = context->gl_info;
1190
1191     if (src_location == SFLAG_INDRAWABLE)
1192     {
1193         TRACE("Source surface %p is onscreen.\n", src_surface);
1194         buffer = surface_get_gl_buffer(src_surface);
1195         surface_translate_drawable_coords(src_surface, context->win_handle, &src_rect);
1196     }
1197     else
1198     {
1199         TRACE("Source surface %p is offscreen.\n", src_surface);
1200         buffer = GL_COLOR_ATTACHMENT0;
1201     }
1202
1203     ENTER_GL();
1204     context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, src_surface, NULL, src_location);
1205     glReadBuffer(buffer);
1206     checkGLcall("glReadBuffer()");
1207     context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
1208     LEAVE_GL();
1209
1210     if (dst_location == SFLAG_INDRAWABLE)
1211     {
1212         TRACE("Destination surface %p is onscreen.\n", dst_surface);
1213         buffer = surface_get_gl_buffer(dst_surface);
1214         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
1215     }
1216     else
1217     {
1218         TRACE("Destination surface %p is offscreen.\n", dst_surface);
1219         buffer = GL_COLOR_ATTACHMENT0;
1220     }
1221
1222     ENTER_GL();
1223     context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
1224     context_set_draw_buffer(context, buffer);
1225     context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
1226     context_invalidate_state(context, STATE_FRAMEBUFFER);
1227
1228     glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1229     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
1230     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
1231     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
1232     context_invalidate_state(context, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
1233
1234     glDisable(GL_SCISSOR_TEST);
1235     context_invalidate_state(context, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
1236
1237     gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
1238             dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
1239     checkGLcall("glBlitFramebuffer()");
1240
1241     LEAVE_GL();
1242
1243     if (wined3d_settings.strict_draw_ordering
1244             || (dst_location == SFLAG_INDRAWABLE
1245             && dst_surface->container.u.swapchain->front_buffer == dst_surface))
1246         wglFlush();
1247
1248     context_release(context);
1249 }
1250
1251 static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
1252         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
1253         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
1254 {
1255     if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
1256         return FALSE;
1257
1258     /* Source and/or destination need to be on the GL side */
1259     if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
1260         return FALSE;
1261
1262     switch (blit_op)
1263     {
1264         case WINED3D_BLIT_OP_COLOR_BLIT:
1265             if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
1266                 return FALSE;
1267             if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
1268                 return FALSE;
1269             break;
1270
1271         case WINED3D_BLIT_OP_DEPTH_BLIT:
1272             if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1273                 return FALSE;
1274             if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
1275                 return FALSE;
1276             break;
1277
1278         default:
1279             return FALSE;
1280     }
1281
1282     if (!(src_format->id == dst_format->id
1283             || (is_identity_fixup(src_format->color_fixup)
1284             && is_identity_fixup(dst_format->color_fixup))))
1285         return FALSE;
1286
1287     return TRUE;
1288 }
1289
1290 /* This function checks if the primary render target uses the 8bit paletted format. */
1291 static BOOL primary_render_target_is_p8(const struct wined3d_device *device)
1292 {
1293     if (device->fb.render_targets && device->fb.render_targets[0])
1294     {
1295         const struct wined3d_surface *render_target = device->fb.render_targets[0];
1296         if ((render_target->resource.usage & WINED3DUSAGE_RENDERTARGET)
1297                 && (render_target->resource.format->id == WINED3DFMT_P8_UINT))
1298             return TRUE;
1299     }
1300     return FALSE;
1301 }
1302
1303 static BOOL surface_convert_color_to_float(const struct wined3d_surface *surface,
1304         DWORD color, WINED3DCOLORVALUE *float_color)
1305 {
1306     const struct wined3d_format *format = surface->resource.format;
1307     const struct wined3d_device *device = surface->resource.device;
1308
1309     switch (format->id)
1310     {
1311         case WINED3DFMT_P8_UINT:
1312             if (surface->palette)
1313             {
1314                 float_color->r = surface->palette->palents[color].peRed / 255.0f;
1315                 float_color->g = surface->palette->palents[color].peGreen / 255.0f;
1316                 float_color->b = surface->palette->palents[color].peBlue / 255.0f;
1317             }
1318             else
1319             {
1320                 float_color->r = 0.0f;
1321                 float_color->g = 0.0f;
1322                 float_color->b = 0.0f;
1323             }
1324             float_color->a = primary_render_target_is_p8(device) ? color / 255.0f : 1.0f;
1325             break;
1326
1327         case WINED3DFMT_B5G6R5_UNORM:
1328             float_color->r = ((color >> 11) & 0x1f) / 31.0f;
1329             float_color->g = ((color >> 5) & 0x3f) / 63.0f;
1330             float_color->b = (color & 0x1f) / 31.0f;
1331             float_color->a = 1.0f;
1332             break;
1333
1334         case WINED3DFMT_B8G8R8_UNORM:
1335         case WINED3DFMT_B8G8R8X8_UNORM:
1336             float_color->r = D3DCOLOR_R(color);
1337             float_color->g = D3DCOLOR_G(color);
1338             float_color->b = D3DCOLOR_B(color);
1339             float_color->a = 1.0f;
1340             break;
1341
1342         case WINED3DFMT_B8G8R8A8_UNORM:
1343             float_color->r = D3DCOLOR_R(color);
1344             float_color->g = D3DCOLOR_G(color);
1345             float_color->b = D3DCOLOR_B(color);
1346             float_color->a = D3DCOLOR_A(color);
1347             break;
1348
1349         default:
1350             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1351             return FALSE;
1352     }
1353
1354     return TRUE;
1355 }
1356
1357 static BOOL surface_convert_depth_to_float(const struct wined3d_surface *surface, DWORD depth, float *float_depth)
1358 {
1359     const struct wined3d_format *format = surface->resource.format;
1360
1361     switch (format->id)
1362     {
1363         case WINED3DFMT_S1_UINT_D15_UNORM:
1364             *float_depth = depth / (float)0x00007fff;
1365             break;
1366
1367         case WINED3DFMT_D16_UNORM:
1368             *float_depth = depth / (float)0x0000ffff;
1369             break;
1370
1371         case WINED3DFMT_D24_UNORM_S8_UINT:
1372         case WINED3DFMT_X8D24_UNORM:
1373             *float_depth = depth / (float)0x00ffffff;
1374             break;
1375
1376         case WINED3DFMT_D32_UNORM:
1377             *float_depth = depth / (float)0xffffffff;
1378             break;
1379
1380         default:
1381             ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
1382             return FALSE;
1383     }
1384
1385     return TRUE;
1386 }
1387
1388 /* Do not call while under the GL lock. */
1389 static HRESULT wined3d_surface_depth_fill(struct wined3d_surface *surface, const RECT *rect, float depth)
1390 {
1391     const struct wined3d_resource *resource = &surface->resource;
1392     struct wined3d_device *device = resource->device;
1393     const struct blit_shader *blitter;
1394
1395     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
1396             NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
1397     if (!blitter)
1398     {
1399         FIXME("No blitter is capable of performing the requested depth fill operation.\n");
1400         return WINED3DERR_INVALIDCALL;
1401     }
1402
1403     return blitter->depth_fill(device, surface, rect, depth);
1404 }
1405
1406 static HRESULT wined3d_surface_depth_blt(struct wined3d_surface *src_surface, const RECT *src_rect,
1407         struct wined3d_surface *dst_surface, const RECT *dst_rect)
1408 {
1409     struct wined3d_device *device = src_surface->resource.device;
1410
1411     if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
1412             src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1413             dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1414         return WINED3DERR_INVALIDCALL;
1415
1416     wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
1417
1418     surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
1419             dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
1420     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
1421
1422     return WINED3D_OK;
1423 }
1424
1425 /* Do not call while under the GL lock. */
1426 HRESULT CDECL wined3d_surface_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect_in,
1427         struct wined3d_surface *src_surface, const RECT *src_rect_in, DWORD flags,
1428         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
1429 {
1430     struct wined3d_swapchain *src_swapchain, *dst_swapchain;
1431     struct wined3d_device *device = dst_surface->resource.device;
1432     DWORD src_ds_flags, dst_ds_flags;
1433     RECT src_rect, dst_rect;
1434     BOOL scale, convert;
1435
1436     static const DWORD simple_blit = WINEDDBLT_ASYNC
1437             | WINEDDBLT_COLORFILL
1438             | WINEDDBLT_WAIT
1439             | WINEDDBLT_DEPTHFILL
1440             | WINEDDBLT_DONOTWAIT;
1441
1442     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
1443             dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
1444             flags, fx, debug_d3dtexturefiltertype(filter));
1445     TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
1446
1447     if (fx)
1448     {
1449         TRACE("dwSize %#x.\n", fx->dwSize);
1450         TRACE("dwDDFX %#x.\n", fx->dwDDFX);
1451         TRACE("dwROP %#x.\n", fx->dwROP);
1452         TRACE("dwDDROP %#x.\n", fx->dwDDROP);
1453         TRACE("dwRotationAngle %#x.\n", fx->dwRotationAngle);
1454         TRACE("dwZBufferOpCode %#x.\n", fx->dwZBufferOpCode);
1455         TRACE("dwZBufferLow %#x.\n", fx->dwZBufferLow);
1456         TRACE("dwZBufferHigh %#x.\n", fx->dwZBufferHigh);
1457         TRACE("dwZBufferBaseDest %#x.\n", fx->dwZBufferBaseDest);
1458         TRACE("dwZDestConstBitDepth %#x.\n", fx->dwZDestConstBitDepth);
1459         TRACE("lpDDSZBufferDest %p.\n", fx->u1.lpDDSZBufferDest);
1460         TRACE("dwZSrcConstBitDepth %#x.\n", fx->dwZSrcConstBitDepth);
1461         TRACE("lpDDSZBufferSrc %p.\n", fx->u2.lpDDSZBufferSrc);
1462         TRACE("dwAlphaEdgeBlendBitDepth %#x.\n", fx->dwAlphaEdgeBlendBitDepth);
1463         TRACE("dwAlphaEdgeBlend %#x.\n", fx->dwAlphaEdgeBlend);
1464         TRACE("dwReserved %#x.\n", fx->dwReserved);
1465         TRACE("dwAlphaDestConstBitDepth %#x.\n", fx->dwAlphaDestConstBitDepth);
1466         TRACE("lpDDSAlphaDest %p.\n", fx->u3.lpDDSAlphaDest);
1467         TRACE("dwAlphaSrcConstBitDepth %#x.\n", fx->dwAlphaSrcConstBitDepth);
1468         TRACE("lpDDSAlphaSrc %p.\n", fx->u4.lpDDSAlphaSrc);
1469         TRACE("lpDDSPattern %p.\n", fx->u5.lpDDSPattern);
1470         TRACE("ddckDestColorkey {%#x, %#x}.\n",
1471                 fx->ddckDestColorkey.dwColorSpaceLowValue,
1472                 fx->ddckDestColorkey.dwColorSpaceHighValue);
1473         TRACE("ddckSrcColorkey {%#x, %#x}.\n",
1474                 fx->ddckSrcColorkey.dwColorSpaceLowValue,
1475                 fx->ddckSrcColorkey.dwColorSpaceHighValue);
1476     }
1477
1478     if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
1479     {
1480         WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
1481         return WINEDDERR_SURFACEBUSY;
1482     }
1483
1484     surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
1485
1486     if (dst_rect.left >= dst_rect.right || dst_rect.top >= dst_rect.bottom
1487             || dst_rect.left > dst_surface->resource.width || dst_rect.left < 0
1488             || dst_rect.top > dst_surface->resource.height || dst_rect.top < 0
1489             || dst_rect.right > dst_surface->resource.width || dst_rect.right < 0
1490             || dst_rect.bottom > dst_surface->resource.height || dst_rect.bottom < 0)
1491     {
1492         /* The destination rect can be out of bounds on the condition
1493          * that a clipper is set for the surface. */
1494         if (dst_surface->clipper)
1495             FIXME("Blit clipping not implemented.\n");
1496         else
1497             WARN("The application gave us a bad destination rectangle without a clipper set.\n");
1498         return WINEDDERR_INVALIDRECT;
1499     }
1500
1501     if (src_surface)
1502     {
1503         surface_get_rect(src_surface, src_rect_in, &src_rect);
1504
1505         if (src_rect.left >= src_rect.right || src_rect.top >= src_rect.bottom
1506                 || src_rect.left > src_surface->resource.width || src_rect.left < 0
1507                 || src_rect.top > src_surface->resource.height || src_rect.top < 0
1508                 || src_rect.right > src_surface->resource.width || src_rect.right < 0
1509                 || src_rect.bottom > src_surface->resource.height || src_rect.bottom < 0)
1510         {
1511             WARN("Application gave us bad source rectangle for Blt.\n");
1512             return WINEDDERR_INVALIDRECT;
1513         }
1514     }
1515     else
1516     {
1517         memset(&src_rect, 0, sizeof(src_rect));
1518     }
1519
1520     if (!fx || !(fx->dwDDFX))
1521         flags &= ~WINEDDBLT_DDFX;
1522
1523     if (flags & WINEDDBLT_WAIT)
1524         flags &= ~WINEDDBLT_WAIT;
1525
1526     if (flags & WINEDDBLT_ASYNC)
1527     {
1528         static unsigned int once;
1529
1530         if (!once++)
1531             FIXME("Can't handle WINEDDBLT_ASYNC flag.\n");
1532         flags &= ~WINEDDBLT_ASYNC;
1533     }
1534
1535     /* WINEDDBLT_DONOTWAIT appeared in DX7. */
1536     if (flags & WINEDDBLT_DONOTWAIT)
1537     {
1538         static unsigned int once;
1539
1540         if (!once++)
1541             FIXME("Can't handle WINEDDBLT_DONOTWAIT flag.\n");
1542         flags &= ~WINEDDBLT_DONOTWAIT;
1543     }
1544
1545     if (!device->d3d_initialized)
1546     {
1547         WARN("D3D not initialized, using fallback.\n");
1548         goto cpu;
1549     }
1550
1551     /* We want to avoid invalidating the sysmem location for converted
1552      * surfaces, since otherwise we'd have to convert the data back when
1553      * locking them. */
1554     if (dst_surface->flags & SFLAG_CONVERTED)
1555     {
1556         WARN("Converted surface, using CPU blit.\n");
1557         return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1558     }
1559
1560     if (flags & ~simple_blit)
1561     {
1562         WARN("Using fallback for complex blit (%#x).\n", flags);
1563         goto fallback;
1564     }
1565
1566     if (src_surface && src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1567         src_swapchain = src_surface->container.u.swapchain;
1568     else
1569         src_swapchain = NULL;
1570
1571     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1572         dst_swapchain = dst_surface->container.u.swapchain;
1573     else
1574         dst_swapchain = NULL;
1575
1576     /* This isn't strictly needed. FBO blits for example could deal with
1577      * cross-swapchain blits by first downloading the source to a texture
1578      * before switching to the destination context. We just have this here to
1579      * not have to deal with the issue, since cross-swapchain blits should be
1580      * rare. */
1581     if (src_swapchain && dst_swapchain && src_swapchain != dst_swapchain)
1582     {
1583         FIXME("Using fallback for cross-swapchain blit.\n");
1584         goto fallback;
1585     }
1586
1587     scale = src_surface
1588             && (src_rect.right - src_rect.left != dst_rect.right - dst_rect.left
1589             || src_rect.bottom - src_rect.top != dst_rect.bottom - dst_rect.top);
1590     convert = src_surface && src_surface->resource.format->id != dst_surface->resource.format->id;
1591
1592     dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1593     if (src_surface)
1594         src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
1595     else
1596         src_ds_flags = 0;
1597
1598     if (src_ds_flags || dst_ds_flags)
1599     {
1600         if (flags & WINEDDBLT_DEPTHFILL)
1601         {
1602             float depth;
1603
1604             TRACE("Depth fill.\n");
1605
1606             if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
1607                 return WINED3DERR_INVALIDCALL;
1608
1609             if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &dst_rect, depth)))
1610                 return WINED3D_OK;
1611         }
1612         else
1613         {
1614             /* Accessing depth / stencil surfaces is supposed to fail while in
1615              * a scene, except for fills, which seem to work. */
1616             if (device->inScene)
1617             {
1618                 WARN("Rejecting depth / stencil access while in scene.\n");
1619                 return WINED3DERR_INVALIDCALL;
1620             }
1621
1622             if (src_ds_flags != dst_ds_flags)
1623             {
1624                 WARN("Rejecting depth / stencil blit between incompatible formats.\n");
1625                 return WINED3DERR_INVALIDCALL;
1626             }
1627
1628             if (src_rect.top || src_rect.left
1629                     || src_rect.bottom != src_surface->resource.height
1630                     || src_rect.right != src_surface->resource.width)
1631             {
1632                 WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
1633                         wine_dbgstr_rect(&src_rect));
1634                 return WINED3DERR_INVALIDCALL;
1635             }
1636
1637             if (dst_rect.top || dst_rect.left
1638                     || dst_rect.bottom != dst_surface->resource.height
1639                     || dst_rect.right != dst_surface->resource.width)
1640             {
1641                 WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
1642                         wine_dbgstr_rect(&src_rect));
1643                 return WINED3DERR_INVALIDCALL;
1644             }
1645
1646             if (scale)
1647             {
1648                 WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
1649                 return WINED3DERR_INVALIDCALL;
1650             }
1651
1652             if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
1653                 return WINED3D_OK;
1654         }
1655     }
1656     else
1657     {
1658         /* In principle this would apply to depth blits as well, but we don't
1659          * implement those in the CPU blitter at the moment. */
1660         if ((dst_surface->flags & SFLAG_INSYSMEM)
1661                 && (!src_surface || (src_surface->flags & SFLAG_INSYSMEM)))
1662         {
1663             if (scale)
1664                 TRACE("Not doing sysmem blit because of scaling.\n");
1665             else if (convert)
1666                 TRACE("Not doing sysmem blit because of format conversion.\n");
1667             else
1668                 return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1669         }
1670
1671         if (flags & WINEDDBLT_COLORFILL)
1672         {
1673             WINED3DCOLORVALUE color;
1674
1675             TRACE("Color fill.\n");
1676
1677             if (!surface_convert_color_to_float(dst_surface, fx->u5.dwFillColor, &color))
1678                 goto fallback;
1679
1680             if (SUCCEEDED(surface_color_fill(dst_surface, &dst_rect, &color)))
1681                 return WINED3D_OK;
1682         }
1683         else
1684         {
1685             TRACE("Color blit.\n");
1686
1687             /* Upload */
1688             if ((src_surface->flags & SFLAG_INSYSMEM) && !(dst_surface->flags & SFLAG_INSYSMEM))
1689             {
1690                 if (scale)
1691                     TRACE("Not doing upload because of scaling.\n");
1692                 else if (convert)
1693                     TRACE("Not doing upload because of format conversion.\n");
1694                 else
1695                 {
1696                     POINT dst_point = {dst_rect.left, dst_rect.top};
1697
1698                     if (SUCCEEDED(surface_upload_from_surface(dst_surface, &dst_point, src_surface, &src_rect)))
1699                     {
1700                         if (!surface_is_offscreen(dst_surface))
1701                             surface_load_location(dst_surface, dst_surface->draw_binding, NULL);
1702                         return WINED3D_OK;
1703                     }
1704                 }
1705             }
1706
1707             /* Use present for back -> front blits. The idea behind this is
1708              * that present is potentially faster than a blit, in particular
1709              * when FBO blits aren't available. Some ddraw applications like
1710              * Half-Life and Prince of Persia 3D use Blt() from the backbuffer
1711              * to the frontbuffer instead of doing a Flip(). D3D8 and D3D9
1712              * applications can't blit directly to the frontbuffer. */
1713             if (dst_swapchain && dst_swapchain->back_buffers
1714                     && dst_surface == dst_swapchain->front_buffer
1715                     && src_surface == dst_swapchain->back_buffers[0])
1716             {
1717                 WINED3DSWAPEFFECT swap_effect = dst_swapchain->presentParms.SwapEffect;
1718
1719                 TRACE("Using present for backbuffer -> frontbuffer blit.\n");
1720
1721                 /* Set the swap effect to COPY, we don't want the backbuffer
1722                  * to become undefined. */
1723                 dst_swapchain->presentParms.SwapEffect = WINED3DSWAPEFFECT_COPY;
1724                 wined3d_swapchain_present(dst_swapchain, NULL, NULL, dst_swapchain->win_handle, NULL, 0);
1725                 dst_swapchain->presentParms.SwapEffect = swap_effect;
1726
1727                 return WINED3D_OK;
1728             }
1729
1730             if (fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1731                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1732                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1733             {
1734                 TRACE("Using FBO blit.\n");
1735
1736                 surface_blt_fbo(device, filter,
1737                         src_surface, src_surface->draw_binding, &src_rect,
1738                         dst_surface, dst_surface->draw_binding, &dst_rect);
1739                 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
1740                 return WINED3D_OK;
1741             }
1742
1743             if (arbfp_blit.blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
1744                     &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
1745                     &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
1746             {
1747                 TRACE("Using arbfp blit.\n");
1748
1749                 if (SUCCEEDED(arbfp_blit_surface(device, filter, src_surface, &src_rect, dst_surface, &dst_rect)))
1750                     return WINED3D_OK;
1751             }
1752         }
1753     }
1754
1755 fallback:
1756
1757     /* Special cases for render targets. */
1758     if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
1759             || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
1760     {
1761         if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, &dst_rect,
1762                 src_surface, &src_rect, flags, fx, filter)))
1763             return WINED3D_OK;
1764     }
1765
1766 cpu:
1767
1768     /* For the rest call the X11 surface implementation. For render targets
1769      * this should be implemented OpenGL accelerated in BltOverride, other
1770      * blits are rather rare. */
1771     return surface_cpu_blt(dst_surface, &dst_rect, src_surface, &src_rect, flags, fx, filter);
1772 }
1773
1774 HRESULT CDECL wined3d_surface_get_render_target_data(struct wined3d_surface *surface,
1775         struct wined3d_surface *render_target)
1776 {
1777     TRACE("surface %p, render_target %p.\n", surface, render_target);
1778
1779     /* TODO: Check surface sizes, pools, etc. */
1780
1781     if (render_target->resource.multisample_type)
1782         return WINED3DERR_INVALIDCALL;
1783
1784     return wined3d_surface_blt(surface, NULL, render_target, NULL, 0, NULL, WINED3DTEXF_POINT);
1785 }
1786
1787 /* Context activation is done by the caller. */
1788 static void surface_remove_pbo(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
1789 {
1790     if (!surface->resource.heapMemory)
1791     {
1792         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
1793         surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
1794                 + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
1795     }
1796
1797     ENTER_GL();
1798     GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
1799     checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
1800     GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
1801             surface->resource.size, surface->resource.allocatedMemory));
1802     checkGLcall("glGetBufferSubDataARB");
1803     GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
1804     checkGLcall("glDeleteBuffersARB");
1805     LEAVE_GL();
1806
1807     surface->pbo = 0;
1808     surface->flags &= ~SFLAG_PBO;
1809 }
1810
1811 /* Do not call while under the GL lock. */
1812 static void surface_unload(struct wined3d_resource *resource)
1813 {
1814     struct wined3d_surface *surface = surface_from_resource(resource);
1815     struct wined3d_renderbuffer_entry *entry, *entry2;
1816     struct wined3d_device *device = resource->device;
1817     const struct wined3d_gl_info *gl_info;
1818     struct wined3d_context *context;
1819
1820     TRACE("surface %p.\n", surface);
1821
1822     if (resource->pool == WINED3DPOOL_DEFAULT)
1823     {
1824         /* Default pool resources are supposed to be destroyed before Reset is called.
1825          * Implicit resources stay however. So this means we have an implicit render target
1826          * or depth stencil. The content may be destroyed, but we still have to tear down
1827          * opengl resources, so we cannot leave early.
1828          *
1829          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
1830          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
1831          * or the depth stencil into an FBO the texture or render buffer will be removed
1832          * and all flags get lost
1833          */
1834         surface_init_sysmem(surface);
1835         /* We also get here when the ddraw swapchain is destroyed, for example
1836          * for a mode switch. In this case this surface won't necessarily be
1837          * an implicit surface. We have to mark it lost so that the
1838          * application can restore it after the mode switch. */
1839         surface->flags |= SFLAG_LOST;
1840     }
1841     else
1842     {
1843         /* Load the surface into system memory */
1844         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
1845         surface_modify_location(surface, surface->draw_binding, FALSE);
1846     }
1847     surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
1848     surface_modify_location(surface, SFLAG_INSRGBTEX, FALSE);
1849     surface->flags &= ~(SFLAG_ALLOCATED | SFLAG_SRGBALLOCATED);
1850
1851     context = context_acquire(device, NULL);
1852     gl_info = context->gl_info;
1853
1854     /* Destroy PBOs, but load them into real sysmem before */
1855     if (surface->flags & SFLAG_PBO)
1856         surface_remove_pbo(surface, gl_info);
1857
1858     /* Destroy fbo render buffers. This is needed for implicit render targets, for
1859      * all application-created targets the application has to release the surface
1860      * before calling _Reset
1861      */
1862     LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
1863     {
1864         ENTER_GL();
1865         gl_info->fbo_ops.glDeleteRenderbuffers(1, &entry->id);
1866         LEAVE_GL();
1867         list_remove(&entry->entry);
1868         HeapFree(GetProcessHeap(), 0, entry);
1869     }
1870     list_init(&surface->renderbuffers);
1871     surface->current_renderbuffer = NULL;
1872
1873     ENTER_GL();
1874
1875     /* If we're in a texture, the texture name belongs to the texture.
1876      * Otherwise, destroy it. */
1877     if (surface->container.type != WINED3D_CONTAINER_TEXTURE)
1878     {
1879         glDeleteTextures(1, &surface->texture_name);
1880         surface->texture_name = 0;
1881         glDeleteTextures(1, &surface->texture_name_srgb);
1882         surface->texture_name_srgb = 0;
1883     }
1884     if (surface->rb_multisample)
1885     {
1886         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_multisample);
1887         surface->rb_multisample = 0;
1888     }
1889     if (surface->rb_resolved)
1890     {
1891         gl_info->fbo_ops.glDeleteRenderbuffers(1, &surface->rb_resolved);
1892         surface->rb_resolved = 0;
1893     }
1894
1895     LEAVE_GL();
1896
1897     context_release(context);
1898
1899     resource_unload(resource);
1900 }
1901
1902 static const struct wined3d_resource_ops surface_resource_ops =
1903 {
1904     surface_unload,
1905 };
1906
1907 static const struct wined3d_surface_ops surface_ops =
1908 {
1909     surface_private_setup,
1910     surface_realize_palette,
1911     surface_draw_overlay,
1912     surface_map,
1913     surface_unmap,
1914 };
1915
1916 /*****************************************************************************
1917  * Initializes the GDI surface, aka creates the DIB section we render to
1918  * The DIB section creation is done by calling GetDC, which will create the
1919  * section and releasing the dc to allow the app to use it. The dib section
1920  * will stay until the surface is released
1921  *
1922  * GDI surfaces do not need to be a power of 2 in size, so the pow2 sizes
1923  * are set to the real sizes to save memory. The NONPOW2 flag is unset to
1924  * avoid confusion in the shared surface code.
1925  *
1926  * Returns:
1927  *  WINED3D_OK on success
1928  *  The return values of called methods on failure
1929  *
1930  *****************************************************************************/
1931 static HRESULT gdi_surface_private_setup(struct wined3d_surface *surface)
1932 {
1933     HRESULT hr;
1934
1935     TRACE("surface %p.\n", surface);
1936
1937     if (surface->resource.usage & WINED3DUSAGE_OVERLAY)
1938     {
1939         ERR("Overlays not yet supported by GDI surfaces.\n");
1940         return WINED3DERR_INVALIDCALL;
1941     }
1942
1943     /* Sysmem textures have memory already allocated - release it,
1944      * this avoids an unnecessary memcpy. */
1945     hr = surface_create_dib_section(surface);
1946     if (SUCCEEDED(hr))
1947     {
1948         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
1949         surface->resource.heapMemory = NULL;
1950         surface->resource.allocatedMemory = surface->dib.bitmap_data;
1951     }
1952
1953     /* We don't mind the nonpow2 stuff in GDI. */
1954     surface->pow2Width = surface->resource.width;
1955     surface->pow2Height = surface->resource.height;
1956
1957     return WINED3D_OK;
1958 }
1959
1960 static void gdi_surface_realize_palette(struct wined3d_surface *surface)
1961 {
1962     struct wined3d_palette *palette = surface->palette;
1963
1964     TRACE("surface %p.\n", surface);
1965
1966     if (!palette) return;
1967
1968     if (surface->flags & SFLAG_DIBSECTION)
1969     {
1970         RGBQUAD col[256];
1971         unsigned int i;
1972
1973         TRACE("Updating the DC's palette.\n");
1974
1975         for (i = 0; i < 256; ++i)
1976         {
1977             col[i].rgbRed = palette->palents[i].peRed;
1978             col[i].rgbGreen = palette->palents[i].peGreen;
1979             col[i].rgbBlue = palette->palents[i].peBlue;
1980             col[i].rgbReserved = 0;
1981         }
1982         SetDIBColorTable(surface->hDC, 0, 256, col);
1983     }
1984
1985     /* Update the image because of the palette change. Some games like e.g.
1986      * Red Alert call SetEntries a lot to implement fading. */
1987     /* Tell the swapchain to update the screen. */
1988     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
1989     {
1990         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
1991         if (surface == swapchain->front_buffer)
1992         {
1993             x11_copy_to_screen(swapchain, NULL);
1994         }
1995     }
1996 }
1997
1998 static HRESULT gdi_surface_draw_overlay(struct wined3d_surface *surface)
1999 {
2000     FIXME("GDI surfaces can't draw overlays yet.\n");
2001     return E_FAIL;
2002 }
2003
2004 static void gdi_surface_map(struct wined3d_surface *surface, const RECT *rect, DWORD flags)
2005 {
2006     TRACE("surface %p, rect %s, flags %#x.\n",
2007             surface, wine_dbgstr_rect(rect), flags);
2008
2009     if (!surface->resource.allocatedMemory)
2010     {
2011         /* This happens on gdi surfaces if the application set a user pointer
2012          * and resets it. Recreate the DIB section. */
2013         surface_create_dib_section(surface);
2014         surface->resource.allocatedMemory = surface->dib.bitmap_data;
2015     }
2016 }
2017
2018 static void gdi_surface_unmap(struct wined3d_surface *surface)
2019 {
2020     TRACE("surface %p.\n", surface);
2021
2022     /* Tell the swapchain to update the screen. */
2023     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
2024     {
2025         struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2026         if (surface == swapchain->front_buffer)
2027         {
2028             x11_copy_to_screen(swapchain, &surface->lockedRect);
2029         }
2030     }
2031
2032     memset(&surface->lockedRect, 0, sizeof(RECT));
2033 }
2034
2035 static const struct wined3d_surface_ops gdi_surface_ops =
2036 {
2037     gdi_surface_private_setup,
2038     gdi_surface_realize_palette,
2039     gdi_surface_draw_overlay,
2040     gdi_surface_map,
2041     gdi_surface_unmap,
2042 };
2043
2044 void surface_set_texture_name(struct wined3d_surface *surface, GLuint new_name, BOOL srgb)
2045 {
2046     GLuint *name;
2047     DWORD flag;
2048
2049     TRACE("surface %p, new_name %u, srgb %#x.\n", surface, new_name, srgb);
2050
2051     if(srgb)
2052     {
2053         name = &surface->texture_name_srgb;
2054         flag = SFLAG_INSRGBTEX;
2055     }
2056     else
2057     {
2058         name = &surface->texture_name;
2059         flag = SFLAG_INTEXTURE;
2060     }
2061
2062     if (!*name && new_name)
2063     {
2064         /* FIXME: We shouldn't need to remove SFLAG_INTEXTURE if the
2065          * surface has no texture name yet. See if we can get rid of this. */
2066         if (surface->flags & flag)
2067             ERR("Surface has %s set, but no texture name.\n", debug_surflocation(flag));
2068         surface_modify_location(surface, flag, FALSE);
2069     }
2070
2071     *name = new_name;
2072     surface_force_reload(surface);
2073 }
2074
2075 void surface_set_texture_target(struct wined3d_surface *surface, GLenum target)
2076 {
2077     TRACE("surface %p, target %#x.\n", surface, target);
2078
2079     if (surface->texture_target != target)
2080     {
2081         if (target == GL_TEXTURE_RECTANGLE_ARB)
2082         {
2083             surface->flags &= ~SFLAG_NORMCOORD;
2084         }
2085         else if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
2086         {
2087             surface->flags |= SFLAG_NORMCOORD;
2088         }
2089     }
2090     surface->texture_target = target;
2091     surface_force_reload(surface);
2092 }
2093
2094 /* Context activation is done by the caller. */
2095 void surface_bind(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
2096 {
2097     TRACE("surface %p, context %p, srgb %#x.\n", surface, context, srgb);
2098
2099     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2100     {
2101         struct wined3d_texture *texture = surface->container.u.texture;
2102
2103         TRACE("Passing to container (%p).\n", texture);
2104         texture->texture_ops->texture_bind(texture, context, srgb);
2105     }
2106     else
2107     {
2108         if (surface->texture_level)
2109         {
2110             ERR("Standalone surface %p is non-zero texture level %u.\n",
2111                     surface, surface->texture_level);
2112         }
2113
2114         if (srgb)
2115             ERR("Trying to bind standalone surface %p as sRGB.\n", surface);
2116
2117         ENTER_GL();
2118
2119         if (!surface->texture_name)
2120         {
2121             glGenTextures(1, &surface->texture_name);
2122             checkGLcall("glGenTextures");
2123
2124             TRACE("Surface %p given name %u.\n", surface, surface->texture_name);
2125
2126             context_bind_texture(context, surface->texture_target, surface->texture_name);
2127             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
2128             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
2129             glTexParameteri(surface->texture_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
2130             glTexParameteri(surface->texture_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2131             glTexParameteri(surface->texture_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2132             checkGLcall("glTexParameteri");
2133         }
2134         else
2135         {
2136             context_bind_texture(context, surface->texture_target, surface->texture_name);
2137         }
2138
2139         LEAVE_GL();
2140     }
2141 }
2142
2143 /* This call just downloads data, the caller is responsible for binding the
2144  * correct texture. */
2145 /* Context activation is done by the caller. */
2146 static void surface_download_data(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
2147 {
2148     const struct wined3d_format *format = surface->resource.format;
2149
2150     /* Only support read back of converted P8 surfaces. */
2151     if (surface->flags & SFLAG_CONVERTED && format->id != WINED3DFMT_P8_UINT)
2152     {
2153         ERR("Trying to read back converted surface %p with format %s.\n", surface, debug_d3dformat(format->id));
2154         return;
2155     }
2156
2157     ENTER_GL();
2158
2159     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2160     {
2161         TRACE("(%p) : Calling glGetCompressedTexImageARB level %d, format %#x, type %#x, data %p.\n",
2162                 surface, surface->texture_level, format->glFormat, format->glType,
2163                 surface->resource.allocatedMemory);
2164
2165         if (surface->flags & SFLAG_PBO)
2166         {
2167             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2168             checkGLcall("glBindBufferARB");
2169             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target, surface->texture_level, NULL));
2170             checkGLcall("glGetCompressedTexImageARB");
2171             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2172             checkGLcall("glBindBufferARB");
2173         }
2174         else
2175         {
2176             GL_EXTCALL(glGetCompressedTexImageARB(surface->texture_target,
2177                     surface->texture_level, surface->resource.allocatedMemory));
2178             checkGLcall("glGetCompressedTexImageARB");
2179         }
2180
2181         LEAVE_GL();
2182     }
2183     else
2184     {
2185         void *mem;
2186         GLenum gl_format = format->glFormat;
2187         GLenum gl_type = format->glType;
2188         int src_pitch = 0;
2189         int dst_pitch = 0;
2190
2191         /* In case of P8 the index is stored in the alpha component if the primary render target uses P8. */
2192         if (format->id == WINED3DFMT_P8_UINT && primary_render_target_is_p8(surface->resource.device))
2193         {
2194             gl_format = GL_ALPHA;
2195             gl_type = GL_UNSIGNED_BYTE;
2196         }
2197
2198         if (surface->flags & SFLAG_NONPOW2)
2199         {
2200             unsigned char alignment = surface->resource.device->surface_alignment;
2201             src_pitch = format->byte_count * surface->pow2Width;
2202             dst_pitch = wined3d_surface_get_pitch(surface);
2203             src_pitch = (src_pitch + alignment - 1) & ~(alignment - 1);
2204             mem = HeapAlloc(GetProcessHeap(), 0, src_pitch * surface->pow2Height);
2205         }
2206         else
2207         {
2208             mem = surface->resource.allocatedMemory;
2209         }
2210
2211         TRACE("(%p) : Calling glGetTexImage level %d, format %#x, type %#x, data %p\n",
2212                 surface, surface->texture_level, gl_format, gl_type, mem);
2213
2214         if (surface->flags & SFLAG_PBO)
2215         {
2216             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
2217             checkGLcall("glBindBufferARB");
2218
2219             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, NULL);
2220             checkGLcall("glGetTexImage");
2221
2222             GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
2223             checkGLcall("glBindBufferARB");
2224         }
2225         else
2226         {
2227             glGetTexImage(surface->texture_target, surface->texture_level, gl_format, gl_type, mem);
2228             checkGLcall("glGetTexImage");
2229         }
2230         LEAVE_GL();
2231
2232         if (surface->flags & SFLAG_NONPOW2)
2233         {
2234             const BYTE *src_data;
2235             BYTE *dst_data;
2236             UINT y;
2237             /*
2238              * Some games (e.g. warhammer 40k) don't work properly with the odd pitches, preventing
2239              * the surface pitch from being used to box non-power2 textures. Instead we have to use a hack to
2240              * repack the texture so that the bpp * width pitch can be used instead of bpp * pow2width.
2241              *
2242              * We're doing this...
2243              *
2244              * instead of boxing the texture :
2245              * |<-texture width ->|  -->pow2width|   /\
2246              * |111111111111111111|              |   |
2247              * |222 Texture 222222| boxed empty  | texture height
2248              * |3333 Data 33333333|              |   |
2249              * |444444444444444444|              |   \/
2250              * -----------------------------------   |
2251              * |     boxed  empty | boxed empty  | pow2height
2252              * |                  |              |   \/
2253              * -----------------------------------
2254              *
2255              *
2256              * we're repacking the data to the expected texture width
2257              *
2258              * |<-texture width ->|  -->pow2width|   /\
2259              * |111111111111111111222222222222222|   |
2260              * |222333333333333333333444444444444| texture height
2261              * |444444                           |   |
2262              * |                                 |   \/
2263              * |                                 |   |
2264              * |            empty                | pow2height
2265              * |                                 |   \/
2266              * -----------------------------------
2267              *
2268              * == is the same as
2269              *
2270              * |<-texture width ->|    /\
2271              * |111111111111111111|
2272              * |222222222222222222|texture height
2273              * |333333333333333333|
2274              * |444444444444444444|    \/
2275              * --------------------
2276              *
2277              * this also means that any references to allocatedMemory should work with the data as if were a
2278              * standard texture with a non-power2 width instead of texture boxed up to be a power2 texture.
2279              *
2280              * internally the texture is still stored in a boxed format so any references to textureName will
2281              * get a boxed texture with width pow2width and not a texture of width resource.width.
2282              *
2283              * Performance should not be an issue, because applications normally do not lock the surfaces when
2284              * rendering. If an app does, the SFLAG_DYNLOCK flag will kick in and the memory copy won't be released,
2285              * and doesn't have to be re-read. */
2286             src_data = mem;
2287             dst_data = surface->resource.allocatedMemory;
2288             TRACE("(%p) : Repacking the surface data from pitch %d to pitch %d\n", surface, src_pitch, dst_pitch);
2289             for (y = 1; y < surface->resource.height; ++y)
2290             {
2291                 /* skip the first row */
2292                 src_data += src_pitch;
2293                 dst_data += dst_pitch;
2294                 memcpy(dst_data, src_data, dst_pitch);
2295             }
2296
2297             HeapFree(GetProcessHeap(), 0, mem);
2298         }
2299     }
2300
2301     /* Surface has now been downloaded */
2302     surface->flags |= SFLAG_INSYSMEM;
2303 }
2304
2305 /* This call just uploads data, the caller is responsible for binding the
2306  * correct texture. */
2307 /* Context activation is done by the caller. */
2308 static void surface_upload_data(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2309         const struct wined3d_format *format, const RECT *src_rect, UINT src_pitch, const POINT *dst_point,
2310         BOOL srgb, const struct wined3d_bo_address *data)
2311 {
2312     UINT update_w = src_rect->right - src_rect->left;
2313     UINT update_h = src_rect->bottom - src_rect->top;
2314
2315     TRACE("surface %p, gl_info %p, format %s, src_rect %s, src_pitch %u, dst_point %s, srgb %#x, data {%#x:%p}.\n",
2316             surface, gl_info, debug_d3dformat(format->id), wine_dbgstr_rect(src_rect), src_pitch,
2317             wine_dbgstr_point(dst_point), srgb, data->buffer_object, data->addr);
2318
2319     if (format->heightscale != 1.0f && format->heightscale != 0.0f)
2320         update_h *= format->heightscale;
2321
2322     ENTER_GL();
2323
2324     if (data->buffer_object)
2325     {
2326         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, data->buffer_object));
2327         checkGLcall("glBindBufferARB");
2328     }
2329
2330     if (format->flags & WINED3DFMT_FLAG_COMPRESSED)
2331     {
2332         UINT row_length = wined3d_format_calculate_size(format, 1, update_w, 1);
2333         UINT row_count = (update_h + format->block_height - 1) / format->block_height;
2334         const BYTE *addr = data->addr;
2335         GLenum internal;
2336
2337         addr += (src_rect->top / format->block_height) * src_pitch;
2338         addr += (src_rect->left / format->block_width) * format->block_byte_count;
2339
2340         if (srgb)
2341             internal = format->glGammaInternal;
2342         else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2343             internal = format->rtInternal;
2344         else
2345             internal = format->glInternal;
2346
2347         TRACE("glCompressedTexSubImage2DARB, target %#x, level %d, x %d, y %d, w %d, h %d, "
2348                 "format %#x, image_size %#x, addr %p.\n", surface->texture_target, surface->texture_level,
2349                 dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr);
2350
2351         if (row_length == src_pitch)
2352         {
2353             GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2354                     dst_point->x, dst_point->y, update_w, update_h, internal, row_count * row_length, addr));
2355         }
2356         else
2357         {
2358             UINT row, y;
2359
2360             /* glCompressedTexSubImage2DARB() ignores pixel store state, so we
2361              * can't use the unpack row length like below. */
2362             for (row = 0, y = dst_point->y; row < row_count; ++row)
2363             {
2364                 GL_EXTCALL(glCompressedTexSubImage2DARB(surface->texture_target, surface->texture_level,
2365                         dst_point->x, y, update_w, format->block_height, internal, row_length, addr));
2366                 y += format->block_height;
2367                 addr += src_pitch;
2368             }
2369         }
2370         checkGLcall("glCompressedTexSubImage2DARB");
2371     }
2372     else
2373     {
2374         const BYTE *addr = data->addr;
2375
2376         addr += src_rect->top * src_pitch;
2377         addr += src_rect->left * format->byte_count;
2378
2379         TRACE("glTexSubImage2D, target %#x, level %d, x %d, y %d, w %d, h %d, format %#x, type %#x, addr %p.\n",
2380                 surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2381                 update_w, update_h, format->glFormat, format->glType, addr);
2382
2383         glPixelStorei(GL_UNPACK_ROW_LENGTH, src_pitch / format->byte_count);
2384         glTexSubImage2D(surface->texture_target, surface->texture_level, dst_point->x, dst_point->y,
2385                 update_w, update_h, format->glFormat, format->glType, addr);
2386         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
2387         checkGLcall("glTexSubImage2D");
2388     }
2389
2390     if (data->buffer_object)
2391     {
2392         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
2393         checkGLcall("glBindBufferARB");
2394     }
2395
2396     LEAVE_GL();
2397
2398     if (wined3d_settings.strict_draw_ordering)
2399         wglFlush();
2400
2401     if (gl_info->quirks & WINED3D_QUIRK_FBO_TEX_UPDATE)
2402     {
2403         struct wined3d_device *device = surface->resource.device;
2404         unsigned int i;
2405
2406         for (i = 0; i < device->context_count; ++i)
2407         {
2408             context_surface_update(device->contexts[i], surface);
2409         }
2410     }
2411 }
2412
2413 HRESULT surface_upload_from_surface(struct wined3d_surface *dst_surface, const POINT *dst_point,
2414         struct wined3d_surface *src_surface, const RECT *src_rect)
2415 {
2416     const struct wined3d_format *src_format;
2417     const struct wined3d_format *dst_format;
2418     const struct wined3d_gl_info *gl_info;
2419     struct wined3d_context *context;
2420     struct wined3d_bo_address data;
2421     struct wined3d_format format;
2422     UINT update_w, update_h;
2423     CONVERT_TYPES convert;
2424     UINT dst_w, dst_h;
2425     UINT src_w, src_h;
2426     UINT src_pitch;
2427     POINT p;
2428     RECT r;
2429
2430     TRACE("dst_surface %p, dst_point %s, src_surface %p, src_rect %s.\n",
2431             dst_surface, wine_dbgstr_point(dst_point),
2432             src_surface, wine_dbgstr_rect(src_rect));
2433
2434     src_format = src_surface->resource.format;
2435     dst_format = dst_surface->resource.format;
2436
2437     if (src_format->id != dst_format->id)
2438     {
2439         WARN("Source and destination surfaces should have the same format.\n");
2440         return WINED3DERR_INVALIDCALL;
2441     }
2442
2443     if (!dst_point)
2444     {
2445         p.x = 0;
2446         p.y = 0;
2447         dst_point = &p;
2448     }
2449     else if (dst_point->x < 0 || dst_point->y < 0)
2450     {
2451         WARN("Invalid destination point.\n");
2452         return WINED3DERR_INVALIDCALL;
2453     }
2454
2455     if (!src_rect)
2456     {
2457         r.left = 0;
2458         r.top = 0;
2459         r.right = src_surface->resource.width;
2460         r.bottom = src_surface->resource.height;
2461         src_rect = &r;
2462     }
2463     else if (src_rect->left < 0 || src_rect->left >= src_rect->right
2464             || src_rect->top < 0 || src_rect->top >= src_rect->bottom)
2465     {
2466         WARN("Invalid source rectangle.\n");
2467         return WINED3DERR_INVALIDCALL;
2468     }
2469
2470     src_w = src_surface->resource.width;
2471     src_h = src_surface->resource.height;
2472
2473     dst_w = dst_surface->resource.width;
2474     dst_h = dst_surface->resource.height;
2475
2476     update_w = src_rect->right - src_rect->left;
2477     update_h = src_rect->bottom - src_rect->top;
2478
2479     if (update_w > dst_w || dst_point->x > dst_w - update_w
2480             || update_h > dst_h || dst_point->y > dst_h - update_h)
2481     {
2482         WARN("Destination out of bounds.\n");
2483         return WINED3DERR_INVALIDCALL;
2484     }
2485
2486     /* NPOT block sizes would be silly. */
2487     if ((src_format->flags & WINED3DFMT_FLAG_COMPRESSED)
2488             && ((update_w & (src_format->block_width - 1) || update_h & (src_format->block_height - 1))
2489             && (src_w != update_w || dst_w != update_w || src_h != update_h || dst_h != update_h)))
2490     {
2491         WARN("Update rect not block-aligned.\n");
2492         return WINED3DERR_INVALIDCALL;
2493     }
2494
2495     /* Use wined3d_surface_blt() instead of uploading directly if we need conversion. */
2496     d3dfmt_get_conv(dst_surface, FALSE, TRUE, &format, &convert);
2497     if (convert != NO_CONVERSION || format.convert)
2498     {
2499         RECT dst_rect = {dst_point->x,  dst_point->y, dst_point->x + update_w, dst_point->y + update_h};
2500         return wined3d_surface_blt(dst_surface, &dst_rect, src_surface, src_rect, 0, NULL, WINED3DTEXF_POINT);
2501     }
2502
2503     context = context_acquire(dst_surface->resource.device, NULL);
2504     gl_info = context->gl_info;
2505
2506     /* Only load the surface for partial updates. For newly allocated texture
2507      * the texture wouldn't be the current location, and we'd upload zeroes
2508      * just to overwrite them again. */
2509     if (update_w == dst_w && update_h == dst_h)
2510         surface_prepare_texture(dst_surface, context, FALSE);
2511     else
2512         surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
2513     surface_bind(dst_surface, context, FALSE);
2514
2515     data.buffer_object = src_surface->pbo;
2516     data.addr = src_surface->resource.allocatedMemory;
2517     src_pitch = wined3d_surface_get_pitch(src_surface);
2518
2519     surface_upload_data(dst_surface, gl_info, src_format, src_rect, src_pitch, dst_point, FALSE, &data);
2520
2521     invalidate_active_texture(dst_surface->resource.device, context);
2522
2523     context_release(context);
2524
2525     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
2526     return WINED3D_OK;
2527 }
2528
2529 /* This call just allocates the texture, the caller is responsible for binding
2530  * the correct texture. */
2531 /* Context activation is done by the caller. */
2532 static void surface_allocate_surface(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info,
2533         const struct wined3d_format *format, BOOL srgb)
2534 {
2535     BOOL enable_client_storage = FALSE;
2536     GLsizei width = surface->pow2Width;
2537     GLsizei height = surface->pow2Height;
2538     const BYTE *mem = NULL;
2539     GLenum internal;
2540
2541     if (srgb)
2542     {
2543         internal = format->glGammaInternal;
2544     }
2545     else if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET && surface_is_offscreen(surface))
2546     {
2547         internal = format->rtInternal;
2548     }
2549     else
2550     {
2551         internal = format->glInternal;
2552     }
2553
2554     if (format->heightscale != 1.0f && format->heightscale != 0.0f) height *= format->heightscale;
2555
2556     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format %s, internal format %#x, width %d, height %d, gl format %#x, gl type=%#x\n",
2557             surface, surface->texture_target, surface->texture_level, debug_d3dformat(format->id),
2558             internal, width, height, format->glFormat, format->glType);
2559
2560     ENTER_GL();
2561
2562     if (gl_info->supported[APPLE_CLIENT_STORAGE])
2563     {
2564         if (surface->flags & (SFLAG_NONPOW2 | SFLAG_DIBSECTION | SFLAG_CONVERTED)
2565                 || !surface->resource.allocatedMemory)
2566         {
2567             /* In some cases we want to disable client storage.
2568              * SFLAG_NONPOW2 has a bigger opengl texture than the client memory, and different pitches
2569              * SFLAG_DIBSECTION: Dibsections may have read / write protections on the memory. Avoid issues...
2570              * SFLAG_CONVERTED: The conversion destination memory is freed after loading the surface
2571              * allocatedMemory == NULL: Not defined in the extension. Seems to disable client storage effectively
2572              */
2573             glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
2574             checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
2575             surface->flags &= ~SFLAG_CLIENT;
2576             enable_client_storage = TRUE;
2577         }
2578         else
2579         {
2580             surface->flags |= SFLAG_CLIENT;
2581
2582             /* Point OpenGL to our allocated texture memory. Do not use
2583              * resource.allocatedMemory here because it might point into a
2584              * PBO. Instead use heapMemory, but get the alignment right. */
2585             mem = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
2586                     + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
2587         }
2588     }
2589
2590     if (format->flags & WINED3DFMT_FLAG_COMPRESSED && mem)
2591     {
2592         GL_EXTCALL(glCompressedTexImage2DARB(surface->texture_target, surface->texture_level,
2593                 internal, width, height, 0, surface->resource.size, mem));
2594         checkGLcall("glCompressedTexImage2DARB");
2595     }
2596     else
2597     {
2598         glTexImage2D(surface->texture_target, surface->texture_level,
2599                 internal, width, height, 0, format->glFormat, format->glType, mem);
2600         checkGLcall("glTexImage2D");
2601     }
2602
2603     if(enable_client_storage) {
2604         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
2605         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
2606     }
2607     LEAVE_GL();
2608 }
2609
2610 /* In D3D the depth stencil dimensions have to be greater than or equal to the
2611  * render target dimensions. With FBOs, the dimensions have to be an exact match. */
2612 /* TODO: We should synchronize the renderbuffer's content with the texture's content. */
2613 /* GL locking is done by the caller */
2614 void surface_set_compatible_renderbuffer(struct wined3d_surface *surface, const struct wined3d_surface *rt)
2615 {
2616     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
2617     struct wined3d_renderbuffer_entry *entry;
2618     GLuint renderbuffer = 0;
2619     unsigned int src_width, src_height;
2620     unsigned int width, height;
2621
2622     if (rt && rt->resource.format->id != WINED3DFMT_NULL)
2623     {
2624         width = rt->pow2Width;
2625         height = rt->pow2Height;
2626     }
2627     else
2628     {
2629         width = surface->pow2Width;
2630         height = surface->pow2Height;
2631     }
2632
2633     src_width = surface->pow2Width;
2634     src_height = surface->pow2Height;
2635
2636     /* A depth stencil smaller than the render target is not valid */
2637     if (width > src_width || height > src_height) return;
2638
2639     /* Remove any renderbuffer set if the sizes match */
2640     if (gl_info->supported[ARB_FRAMEBUFFER_OBJECT]
2641             || (width == src_width && height == src_height))
2642     {
2643         surface->current_renderbuffer = NULL;
2644         return;
2645     }
2646
2647     /* Look if we've already got a renderbuffer of the correct dimensions */
2648     LIST_FOR_EACH_ENTRY(entry, &surface->renderbuffers, struct wined3d_renderbuffer_entry, entry)
2649     {
2650         if (entry->width == width && entry->height == height)
2651         {
2652             renderbuffer = entry->id;
2653             surface->current_renderbuffer = entry;
2654             break;
2655         }
2656     }
2657
2658     if (!renderbuffer)
2659     {
2660         gl_info->fbo_ops.glGenRenderbuffers(1, &renderbuffer);
2661         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
2662         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER,
2663                 surface->resource.format->glInternal, width, height);
2664
2665         entry = HeapAlloc(GetProcessHeap(), 0, sizeof(*entry));
2666         entry->width = width;
2667         entry->height = height;
2668         entry->id = renderbuffer;
2669         list_add_head(&surface->renderbuffers, &entry->entry);
2670
2671         surface->current_renderbuffer = entry;
2672     }
2673
2674     checkGLcall("set_compatible_renderbuffer");
2675 }
2676
2677 GLenum surface_get_gl_buffer(const struct wined3d_surface *surface)
2678 {
2679     const struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
2680
2681     TRACE("surface %p.\n", surface);
2682
2683     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
2684     {
2685         ERR("Surface %p is not on a swapchain.\n", surface);
2686         return GL_NONE;
2687     }
2688
2689     if (swapchain->back_buffers && swapchain->back_buffers[0] == surface)
2690     {
2691         if (swapchain->render_to_fbo)
2692         {
2693             TRACE("Returning GL_COLOR_ATTACHMENT0\n");
2694             return GL_COLOR_ATTACHMENT0;
2695         }
2696         TRACE("Returning GL_BACK\n");
2697         return GL_BACK;
2698     }
2699     else if (surface == swapchain->front_buffer)
2700     {
2701         TRACE("Returning GL_FRONT\n");
2702         return GL_FRONT;
2703     }
2704
2705     FIXME("Higher back buffer, returning GL_BACK\n");
2706     return GL_BACK;
2707 }
2708
2709 /* Slightly inefficient way to handle multiple dirty rects but it works :) */
2710 void surface_add_dirty_rect(struct wined3d_surface *surface, const WINED3DBOX *dirty_rect)
2711 {
2712     TRACE("surface %p, dirty_rect %p.\n", surface, dirty_rect);
2713
2714     if (!(surface->flags & SFLAG_INSYSMEM) && (surface->flags & SFLAG_INTEXTURE))
2715         /* No partial locking for textures yet. */
2716         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2717
2718     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2719     if (dirty_rect)
2720     {
2721         surface->dirtyRect.left = min(surface->dirtyRect.left, dirty_rect->Left);
2722         surface->dirtyRect.top = min(surface->dirtyRect.top, dirty_rect->Top);
2723         surface->dirtyRect.right = max(surface->dirtyRect.right, dirty_rect->Right);
2724         surface->dirtyRect.bottom = max(surface->dirtyRect.bottom, dirty_rect->Bottom);
2725     }
2726     else
2727     {
2728         surface->dirtyRect.left = 0;
2729         surface->dirtyRect.top = 0;
2730         surface->dirtyRect.right = surface->resource.width;
2731         surface->dirtyRect.bottom = surface->resource.height;
2732     }
2733
2734     /* if the container is a texture then mark it dirty. */
2735     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
2736     {
2737         TRACE("Passing to container.\n");
2738         wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
2739     }
2740 }
2741
2742 HRESULT surface_load(struct wined3d_surface *surface, BOOL srgb)
2743 {
2744     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
2745     BOOL ck_changed;
2746
2747     TRACE("surface %p, srgb %#x.\n", surface, srgb);
2748
2749     if (surface->resource.pool == WINED3DPOOL_SCRATCH)
2750     {
2751         ERR("Not supported on scratch surfaces.\n");
2752         return WINED3DERR_INVALIDCALL;
2753     }
2754
2755     ck_changed = !(surface->flags & SFLAG_GLCKEY) != !(surface->CKeyFlags & WINEDDSD_CKSRCBLT);
2756
2757     /* Reload if either the texture and sysmem have different ideas about the
2758      * color key, or the actual key values changed. */
2759     if (ck_changed || ((surface->CKeyFlags & WINEDDSD_CKSRCBLT)
2760             && (surface->glCKey.dwColorSpaceLowValue != surface->SrcBltCKey.dwColorSpaceLowValue
2761             || surface->glCKey.dwColorSpaceHighValue != surface->SrcBltCKey.dwColorSpaceHighValue)))
2762     {
2763         TRACE("Reloading because of color keying\n");
2764         /* To perform the color key conversion we need a sysmem copy of
2765          * the surface. Make sure we have it. */
2766
2767         surface_load_location(surface, SFLAG_INSYSMEM, NULL);
2768         /* Make sure the texture is reloaded because of the color key change,
2769          * this kills performance though :( */
2770         /* TODO: This is not necessarily needed with hw palettized texture support. */
2771         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
2772         /* Switching color keying on / off may change the internal format. */
2773         if (ck_changed)
2774             surface_force_reload(surface);
2775     }
2776     else if (!(surface->flags & flag))
2777     {
2778         TRACE("Reloading because surface is dirty.\n");
2779     }
2780     else
2781     {
2782         TRACE("surface is already in texture\n");
2783         return WINED3D_OK;
2784     }
2785
2786     /* No partial locking for textures yet. */
2787     surface_load_location(surface, flag, NULL);
2788     surface_evict_sysmem(surface);
2789
2790     return WINED3D_OK;
2791 }
2792
2793 /* See also float_16_to_32() in wined3d_private.h */
2794 static inline unsigned short float_32_to_16(const float *in)
2795 {
2796     int exp = 0;
2797     float tmp = fabsf(*in);
2798     unsigned int mantissa;
2799     unsigned short ret;
2800
2801     /* Deal with special numbers */
2802     if (*in == 0.0f)
2803         return 0x0000;
2804     if (isnan(*in))
2805         return 0x7c01;
2806     if (isinf(*in))
2807         return (*in < 0.0f ? 0xfc00 : 0x7c00);
2808
2809     if (tmp < powf(2, 10))
2810     {
2811         do
2812         {
2813             tmp = tmp * 2.0f;
2814             exp--;
2815         } while (tmp < powf(2, 10));
2816     }
2817     else if (tmp >= powf(2, 11))
2818     {
2819         do
2820         {
2821             tmp /= 2.0f;
2822             exp++;
2823         } while (tmp >= powf(2, 11));
2824     }
2825
2826     mantissa = (unsigned int)tmp;
2827     if (tmp - mantissa >= 0.5f)
2828         ++mantissa; /* Round to nearest, away from zero. */
2829
2830     exp += 10;  /* Normalize the mantissa. */
2831     exp += 15;  /* Exponent is encoded with excess 15. */
2832
2833     if (exp > 30) /* too big */
2834     {
2835         ret = 0x7c00; /* INF */
2836     }
2837     else if (exp <= 0)
2838     {
2839         /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */
2840         while (exp <= 0)
2841         {
2842             mantissa = mantissa >> 1;
2843             ++exp;
2844         }
2845         ret = mantissa & 0x3ff;
2846     }
2847     else
2848     {
2849         ret = (exp << 10) | (mantissa & 0x3ff);
2850     }
2851
2852     ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */
2853     return ret;
2854 }
2855
2856 ULONG CDECL wined3d_surface_incref(struct wined3d_surface *surface)
2857 {
2858     ULONG refcount;
2859
2860     TRACE("Surface %p, container %p of type %#x.\n",
2861             surface, surface->container.u.base, surface->container.type);
2862
2863     switch (surface->container.type)
2864     {
2865         case WINED3D_CONTAINER_TEXTURE:
2866             return wined3d_texture_incref(surface->container.u.texture);
2867
2868         case WINED3D_CONTAINER_SWAPCHAIN:
2869             return wined3d_swapchain_incref(surface->container.u.swapchain);
2870
2871         default:
2872             ERR("Unhandled container type %#x.\n", surface->container.type);
2873         case WINED3D_CONTAINER_NONE:
2874             break;
2875     }
2876
2877     refcount = InterlockedIncrement(&surface->resource.ref);
2878     TRACE("%p increasing refcount to %u.\n", surface, refcount);
2879
2880     return refcount;
2881 }
2882
2883 /* Do not call while under the GL lock. */
2884 ULONG CDECL wined3d_surface_decref(struct wined3d_surface *surface)
2885 {
2886     ULONG refcount;
2887
2888     TRACE("Surface %p, container %p of type %#x.\n",
2889             surface, surface->container.u.base, surface->container.type);
2890
2891     switch (surface->container.type)
2892     {
2893         case WINED3D_CONTAINER_TEXTURE:
2894             return wined3d_texture_decref(surface->container.u.texture);
2895
2896         case WINED3D_CONTAINER_SWAPCHAIN:
2897             return wined3d_swapchain_decref(surface->container.u.swapchain);
2898
2899         default:
2900             ERR("Unhandled container type %#x.\n", surface->container.type);
2901         case WINED3D_CONTAINER_NONE:
2902             break;
2903     }
2904
2905     refcount = InterlockedDecrement(&surface->resource.ref);
2906     TRACE("%p decreasing refcount to %u.\n", surface, refcount);
2907
2908     if (!refcount)
2909     {
2910         surface_cleanup(surface);
2911         surface->resource.parent_ops->wined3d_object_destroyed(surface->resource.parent);
2912
2913         TRACE("Destroyed surface %p.\n", surface);
2914         HeapFree(GetProcessHeap(), 0, surface);
2915     }
2916
2917     return refcount;
2918 }
2919
2920 DWORD CDECL wined3d_surface_set_priority(struct wined3d_surface *surface, DWORD priority)
2921 {
2922     return resource_set_priority(&surface->resource, priority);
2923 }
2924
2925 DWORD CDECL wined3d_surface_get_priority(const struct wined3d_surface *surface)
2926 {
2927     return resource_get_priority(&surface->resource);
2928 }
2929
2930 void CDECL wined3d_surface_preload(struct wined3d_surface *surface)
2931 {
2932     TRACE("surface %p.\n", surface);
2933
2934     if (!surface->resource.device->d3d_initialized)
2935     {
2936         ERR("D3D not initialized.\n");
2937         return;
2938     }
2939
2940     surface_internal_preload(surface, SRGB_ANY);
2941 }
2942
2943 void * CDECL wined3d_surface_get_parent(const struct wined3d_surface *surface)
2944 {
2945     TRACE("surface %p.\n", surface);
2946
2947     return surface->resource.parent;
2948 }
2949
2950 struct wined3d_resource * CDECL wined3d_surface_get_resource(struct wined3d_surface *surface)
2951 {
2952     TRACE("surface %p.\n", surface);
2953
2954     return &surface->resource;
2955 }
2956
2957 HRESULT CDECL wined3d_surface_get_blt_status(const struct wined3d_surface *surface, DWORD flags)
2958 {
2959     TRACE("surface %p, flags %#x.\n", surface, flags);
2960
2961     switch (flags)
2962     {
2963         case WINEDDGBS_CANBLT:
2964         case WINEDDGBS_ISBLTDONE:
2965             return WINED3D_OK;
2966
2967         default:
2968             return WINED3DERR_INVALIDCALL;
2969     }
2970 }
2971
2972 HRESULT CDECL wined3d_surface_get_flip_status(const struct wined3d_surface *surface, DWORD flags)
2973 {
2974     TRACE("surface %p, flags %#x.\n", surface, flags);
2975
2976     /* XXX: DDERR_INVALIDSURFACETYPE */
2977
2978     switch (flags)
2979     {
2980         case WINEDDGFS_CANFLIP:
2981         case WINEDDGFS_ISFLIPDONE:
2982             return WINED3D_OK;
2983
2984         default:
2985             return WINED3DERR_INVALIDCALL;
2986     }
2987 }
2988
2989 HRESULT CDECL wined3d_surface_is_lost(const struct wined3d_surface *surface)
2990 {
2991     TRACE("surface %p.\n", surface);
2992
2993     /* D3D8 and 9 loose full devices, ddraw only surfaces. */
2994     return surface->flags & SFLAG_LOST ? WINED3DERR_DEVICELOST : WINED3D_OK;
2995 }
2996
2997 HRESULT CDECL wined3d_surface_restore(struct wined3d_surface *surface)
2998 {
2999     TRACE("surface %p.\n", surface);
3000
3001     surface->flags &= ~SFLAG_LOST;
3002     return WINED3D_OK;
3003 }
3004
3005 HRESULT CDECL wined3d_surface_set_palette(struct wined3d_surface *surface, struct wined3d_palette *palette)
3006 {
3007     TRACE("surface %p, palette %p.\n", surface, palette);
3008
3009     if (surface->palette == palette)
3010     {
3011         TRACE("Nop palette change.\n");
3012         return WINED3D_OK;
3013     }
3014
3015     if (surface->palette && (surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
3016         surface->palette->flags &= ~WINEDDPCAPS_PRIMARYSURFACE;
3017
3018     surface->palette = palette;
3019
3020     if (palette)
3021     {
3022         if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3023             palette->flags |= WINEDDPCAPS_PRIMARYSURFACE;
3024
3025         surface->surface_ops->surface_realize_palette(surface);
3026     }
3027
3028     return WINED3D_OK;
3029 }
3030
3031 HRESULT CDECL wined3d_surface_set_color_key(struct wined3d_surface *surface,
3032         DWORD flags, const WINEDDCOLORKEY *color_key)
3033 {
3034     TRACE("surface %p, flags %#x, color_key %p.\n", surface, flags, color_key);
3035
3036     if (flags & WINEDDCKEY_COLORSPACE)
3037     {
3038         FIXME(" colorkey value not supported (%08x) !\n", flags);
3039         return WINED3DERR_INVALIDCALL;
3040     }
3041
3042     /* Dirtify the surface, but only if a key was changed. */
3043     if (color_key)
3044     {
3045         switch (flags & ~WINEDDCKEY_COLORSPACE)
3046         {
3047             case WINEDDCKEY_DESTBLT:
3048                 surface->DestBltCKey = *color_key;
3049                 surface->CKeyFlags |= WINEDDSD_CKDESTBLT;
3050                 break;
3051
3052             case WINEDDCKEY_DESTOVERLAY:
3053                 surface->DestOverlayCKey = *color_key;
3054                 surface->CKeyFlags |= WINEDDSD_CKDESTOVERLAY;
3055                 break;
3056
3057             case WINEDDCKEY_SRCOVERLAY:
3058                 surface->SrcOverlayCKey = *color_key;
3059                 surface->CKeyFlags |= WINEDDSD_CKSRCOVERLAY;
3060                 break;
3061
3062             case WINEDDCKEY_SRCBLT:
3063                 surface->SrcBltCKey = *color_key;
3064                 surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
3065                 break;
3066         }
3067     }
3068     else
3069     {
3070         switch (flags & ~WINEDDCKEY_COLORSPACE)
3071         {
3072             case WINEDDCKEY_DESTBLT:
3073                 surface->CKeyFlags &= ~WINEDDSD_CKDESTBLT;
3074                 break;
3075
3076             case WINEDDCKEY_DESTOVERLAY:
3077                 surface->CKeyFlags &= ~WINEDDSD_CKDESTOVERLAY;
3078                 break;
3079
3080             case WINEDDCKEY_SRCOVERLAY:
3081                 surface->CKeyFlags &= ~WINEDDSD_CKSRCOVERLAY;
3082                 break;
3083
3084             case WINEDDCKEY_SRCBLT:
3085                 surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
3086                 break;
3087         }
3088     }
3089
3090     return WINED3D_OK;
3091 }
3092
3093 struct wined3d_palette * CDECL wined3d_surface_get_palette(const struct wined3d_surface *surface)
3094 {
3095     TRACE("surface %p.\n", surface);
3096
3097     return surface->palette;
3098 }
3099
3100 DWORD CDECL wined3d_surface_get_pitch(const struct wined3d_surface *surface)
3101 {
3102     const struct wined3d_format *format = surface->resource.format;
3103     DWORD pitch;
3104
3105     TRACE("surface %p.\n", surface);
3106
3107     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3108     {
3109         /* Since compressed formats are block based, pitch means the amount of
3110          * bytes to the next row of block rather than the next row of pixels. */
3111         UINT row_block_count = (surface->resource.width + format->block_width - 1) / format->block_width;
3112         pitch = row_block_count * format->block_byte_count;
3113     }
3114     else
3115     {
3116         unsigned char alignment = surface->resource.device->surface_alignment;
3117         pitch = surface->resource.format->byte_count * surface->resource.width;  /* Bytes / row */
3118         pitch = (pitch + alignment - 1) & ~(alignment - 1);
3119     }
3120
3121     TRACE("Returning %u.\n", pitch);
3122
3123     return pitch;
3124 }
3125
3126 HRESULT CDECL wined3d_surface_set_mem(struct wined3d_surface *surface, void *mem)
3127 {
3128     TRACE("surface %p, mem %p.\n", surface, mem);
3129
3130     if (surface->flags & (SFLAG_LOCKED | SFLAG_DCINUSE))
3131     {
3132         WARN("Surface is locked or the DC is in use.\n");
3133         return WINED3DERR_INVALIDCALL;
3134     }
3135
3136     /* Render targets depend on their hdc, and we can't create an hdc on a user pointer. */
3137     if (surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
3138     {
3139         ERR("Not supported on render targets.\n");
3140         return WINED3DERR_INVALIDCALL;
3141     }
3142
3143     if (mem && mem != surface->resource.allocatedMemory)
3144     {
3145         void *release = NULL;
3146
3147         /* Do I have to copy the old surface content? */
3148         if (surface->flags & SFLAG_DIBSECTION)
3149         {
3150             SelectObject(surface->hDC, surface->dib.holdbitmap);
3151             DeleteDC(surface->hDC);
3152             /* Release the DIB section. */
3153             DeleteObject(surface->dib.DIBsection);
3154             surface->dib.bitmap_data = NULL;
3155             surface->resource.allocatedMemory = NULL;
3156             surface->hDC = NULL;
3157             surface->flags &= ~SFLAG_DIBSECTION;
3158         }
3159         else if (!(surface->flags & SFLAG_USERPTR))
3160         {
3161             release = surface->resource.heapMemory;
3162             surface->resource.heapMemory = NULL;
3163         }
3164         surface->resource.allocatedMemory = mem;
3165         surface->flags |= SFLAG_USERPTR;
3166
3167         /* Now the surface memory is most up do date. Invalidate drawable and texture. */
3168         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3169
3170         /* For client textures OpenGL has to be notified. */
3171         if (surface->flags & SFLAG_CLIENT)
3172             surface_release_client_storage(surface);
3173
3174         /* Now free the old memory if any. */
3175         HeapFree(GetProcessHeap(), 0, release);
3176     }
3177     else if (surface->flags & SFLAG_USERPTR)
3178     {
3179         /* HeapMemory should be NULL already. */
3180         if (surface->resource.heapMemory)
3181             ERR("User pointer surface has heap memory allocated.\n");
3182
3183         if (!mem)
3184         {
3185             surface->resource.allocatedMemory = NULL;
3186             surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
3187
3188             if (surface->flags & SFLAG_CLIENT)
3189                 surface_release_client_storage(surface);
3190
3191             surface_prepare_system_memory(surface);
3192         }
3193
3194         surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
3195     }
3196
3197     return WINED3D_OK;
3198 }
3199
3200 HRESULT CDECL wined3d_surface_set_overlay_position(struct wined3d_surface *surface, LONG x, LONG y)
3201 {
3202     LONG w, h;
3203
3204     TRACE("surface %p, x %d, y %d.\n", surface, x, y);
3205
3206     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3207     {
3208         WARN("Not an overlay surface.\n");
3209         return WINEDDERR_NOTAOVERLAYSURFACE;
3210     }
3211
3212     w = surface->overlay_destrect.right - surface->overlay_destrect.left;
3213     h = surface->overlay_destrect.bottom - surface->overlay_destrect.top;
3214     surface->overlay_destrect.left = x;
3215     surface->overlay_destrect.top = y;
3216     surface->overlay_destrect.right = x + w;
3217     surface->overlay_destrect.bottom = y + h;
3218
3219     surface->surface_ops->surface_draw_overlay(surface);
3220
3221     return WINED3D_OK;
3222 }
3223
3224 HRESULT CDECL wined3d_surface_get_overlay_position(const struct wined3d_surface *surface, LONG *x, LONG *y)
3225 {
3226     TRACE("surface %p, x %p, y %p.\n", surface, x, y);
3227
3228     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3229     {
3230         TRACE("Not an overlay surface.\n");
3231         return WINEDDERR_NOTAOVERLAYSURFACE;
3232     }
3233
3234     if (!surface->overlay_dest)
3235     {
3236         TRACE("Overlay not visible.\n");
3237         *x = 0;
3238         *y = 0;
3239         return WINEDDERR_OVERLAYNOTVISIBLE;
3240     }
3241
3242     *x = surface->overlay_destrect.left;
3243     *y = surface->overlay_destrect.top;
3244
3245     TRACE("Returning position %d, %d.\n", *x, *y);
3246
3247     return WINED3D_OK;
3248 }
3249
3250 HRESULT CDECL wined3d_surface_update_overlay_z_order(struct wined3d_surface *surface,
3251         DWORD flags, struct wined3d_surface *ref)
3252 {
3253     FIXME("surface %p, flags %#x, ref %p stub!\n", surface, flags, ref);
3254
3255     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3256     {
3257         TRACE("Not an overlay surface.\n");
3258         return WINEDDERR_NOTAOVERLAYSURFACE;
3259     }
3260
3261     return WINED3D_OK;
3262 }
3263
3264 HRESULT CDECL wined3d_surface_update_overlay(struct wined3d_surface *surface, const RECT *src_rect,
3265         struct wined3d_surface *dst_surface, const RECT *dst_rect, DWORD flags, const WINEDDOVERLAYFX *fx)
3266 {
3267     TRACE("surface %p, src_rect %s, dst_surface %p, dst_rect %s, flags %#x, fx %p.\n",
3268             surface, wine_dbgstr_rect(src_rect), dst_surface, wine_dbgstr_rect(dst_rect), flags, fx);
3269
3270     if (!(surface->resource.usage & WINED3DUSAGE_OVERLAY))
3271     {
3272         WARN("Not an overlay surface.\n");
3273         return WINEDDERR_NOTAOVERLAYSURFACE;
3274     }
3275     else if (!dst_surface)
3276     {
3277         WARN("Dest surface is NULL.\n");
3278         return WINED3DERR_INVALIDCALL;
3279     }
3280
3281     if (src_rect)
3282     {
3283         surface->overlay_srcrect = *src_rect;
3284     }
3285     else
3286     {
3287         surface->overlay_srcrect.left = 0;
3288         surface->overlay_srcrect.top = 0;
3289         surface->overlay_srcrect.right = surface->resource.width;
3290         surface->overlay_srcrect.bottom = surface->resource.height;
3291     }
3292
3293     if (dst_rect)
3294     {
3295         surface->overlay_destrect = *dst_rect;
3296     }
3297     else
3298     {
3299         surface->overlay_destrect.left = 0;
3300         surface->overlay_destrect.top = 0;
3301         surface->overlay_destrect.right = dst_surface ? dst_surface->resource.width : 0;
3302         surface->overlay_destrect.bottom = dst_surface ? dst_surface->resource.height : 0;
3303     }
3304
3305     if (surface->overlay_dest && (surface->overlay_dest != dst_surface || flags & WINEDDOVER_HIDE))
3306     {
3307         surface->overlay_dest = NULL;
3308         list_remove(&surface->overlay_entry);
3309     }
3310
3311     if (flags & WINEDDOVER_SHOW)
3312     {
3313         if (surface->overlay_dest != dst_surface)
3314         {
3315             surface->overlay_dest = dst_surface;
3316             list_add_tail(&dst_surface->overlays, &surface->overlay_entry);
3317         }
3318     }
3319     else if (flags & WINEDDOVER_HIDE)
3320     {
3321         /* tests show that the rectangles are erased on hide */
3322         surface->overlay_srcrect.left = 0; surface->overlay_srcrect.top = 0;
3323         surface->overlay_srcrect.right = 0; surface->overlay_srcrect.bottom = 0;
3324         surface->overlay_destrect.left = 0; surface->overlay_destrect.top = 0;
3325         surface->overlay_destrect.right = 0; surface->overlay_destrect.bottom = 0;
3326         surface->overlay_dest = NULL;
3327     }
3328
3329     surface->surface_ops->surface_draw_overlay(surface);
3330
3331     return WINED3D_OK;
3332 }
3333
3334 HRESULT CDECL wined3d_surface_set_clipper(struct wined3d_surface *surface, struct wined3d_clipper *clipper)
3335 {
3336     TRACE("surface %p, clipper %p.\n", surface, clipper);
3337
3338     surface->clipper = clipper;
3339
3340     return WINED3D_OK;
3341 }
3342
3343 struct wined3d_clipper * CDECL wined3d_surface_get_clipper(const struct wined3d_surface *surface)
3344 {
3345     TRACE("surface %p.\n", surface);
3346
3347     return surface->clipper;
3348 }
3349
3350 HRESULT CDECL wined3d_surface_set_format(struct wined3d_surface *surface, enum wined3d_format_id format_id)
3351 {
3352     const struct wined3d_format *format = wined3d_get_format(&surface->resource.device->adapter->gl_info, format_id);
3353
3354     TRACE("surface %p, format %s.\n", surface, debug_d3dformat(format_id));
3355
3356     if (surface->resource.format->id != WINED3DFMT_UNKNOWN)
3357     {
3358         FIXME("The format of the surface must be WINED3DFORMAT_UNKNOWN.\n");
3359         return WINED3DERR_INVALIDCALL;
3360     }
3361
3362     surface->resource.size = wined3d_format_calculate_size(format, surface->resource.device->surface_alignment,
3363             surface->pow2Width, surface->pow2Height);
3364     surface->flags |= (WINED3DFMT_D16_LOCKABLE == format_id) ? SFLAG_LOCKABLE : 0;
3365     surface->resource.format = format;
3366
3367     TRACE("size %u, byte_count %u\n", surface->resource.size, format->byte_count);
3368     TRACE("glFormat %#x, glInternal %#x, glType %#x.\n",
3369             format->glFormat, format->glInternal, format->glType);
3370
3371     return WINED3D_OK;
3372 }
3373
3374 static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst,
3375         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3376 {
3377     unsigned short *dst_s;
3378     const float *src_f;
3379     unsigned int x, y;
3380
3381     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3382
3383     for (y = 0; y < h; ++y)
3384     {
3385         src_f = (const float *)(src + y * pitch_in);
3386         dst_s = (unsigned short *) (dst + y * pitch_out);
3387         for (x = 0; x < w; ++x)
3388         {
3389             dst_s[x] = float_32_to_16(src_f + x);
3390         }
3391     }
3392 }
3393
3394 static void convert_r5g6b5_x8r8g8b8(const BYTE *src, BYTE *dst,
3395         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3396 {
3397     static const unsigned char convert_5to8[] =
3398     {
3399         0x00, 0x08, 0x10, 0x19, 0x21, 0x29, 0x31, 0x3a,
3400         0x42, 0x4a, 0x52, 0x5a, 0x63, 0x6b, 0x73, 0x7b,
3401         0x84, 0x8c, 0x94, 0x9c, 0xa5, 0xad, 0xb5, 0xbd,
3402         0xc5, 0xce, 0xd6, 0xde, 0xe6, 0xef, 0xf7, 0xff,
3403     };
3404     static const unsigned char convert_6to8[] =
3405     {
3406         0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
3407         0x20, 0x24, 0x28, 0x2d, 0x31, 0x35, 0x39, 0x3d,
3408         0x41, 0x45, 0x49, 0x4d, 0x51, 0x55, 0x59, 0x5d,
3409         0x61, 0x65, 0x69, 0x6d, 0x71, 0x75, 0x79, 0x7d,
3410         0x82, 0x86, 0x8a, 0x8e, 0x92, 0x96, 0x9a, 0x9e,
3411         0xa2, 0xa6, 0xaa, 0xae, 0xb2, 0xb6, 0xba, 0xbe,
3412         0xc2, 0xc6, 0xca, 0xce, 0xd2, 0xd7, 0xdb, 0xdf,
3413         0xe3, 0xe7, 0xeb, 0xef, 0xf3, 0xf7, 0xfb, 0xff,
3414     };
3415     unsigned int x, y;
3416
3417     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3418
3419     for (y = 0; y < h; ++y)
3420     {
3421         const WORD *src_line = (const WORD *)(src + y * pitch_in);
3422         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3423         for (x = 0; x < w; ++x)
3424         {
3425             WORD pixel = src_line[x];
3426             dst_line[x] = 0xff000000
3427                     | convert_5to8[(pixel & 0xf800) >> 11] << 16
3428                     | convert_6to8[(pixel & 0x07e0) >> 5] << 8
3429                     | convert_5to8[(pixel & 0x001f)];
3430         }
3431     }
3432 }
3433
3434 /* We use this for both B8G8R8A8 -> B8G8R8X8 and B8G8R8X8 -> B8G8R8A8, since
3435  * in both cases we're just setting the X / Alpha channel to 0xff. */
3436 static void convert_a8r8g8b8_x8r8g8b8(const BYTE *src, BYTE *dst,
3437         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3438 {
3439     unsigned int x, y;
3440
3441     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3442
3443     for (y = 0; y < h; ++y)
3444     {
3445         const DWORD *src_line = (const DWORD *)(src + y * pitch_in);
3446         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3447
3448         for (x = 0; x < w; ++x)
3449         {
3450             dst_line[x] = 0xff000000 | (src_line[x] & 0xffffff);
3451         }
3452     }
3453 }
3454
3455 static inline BYTE cliptobyte(int x)
3456 {
3457     return (BYTE)((x < 0) ? 0 : ((x > 255) ? 255 : x));
3458 }
3459
3460 static void convert_yuy2_x8r8g8b8(const BYTE *src, BYTE *dst,
3461         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3462 {
3463     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3464     unsigned int x, y;
3465
3466     TRACE("Converting %ux%u pixels, pitches %u %u.\n", w, h, pitch_in, pitch_out);
3467
3468     for (y = 0; y < h; ++y)
3469     {
3470         const BYTE *src_line = src + y * pitch_in;
3471         DWORD *dst_line = (DWORD *)(dst + y * pitch_out);
3472         for (x = 0; x < w; ++x)
3473         {
3474             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3475              *     C = Y - 16; D = U - 128; E = V - 128;
3476              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3477              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3478              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3479              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3480              * U and V are shared between the pixels. */
3481             if (!(x & 1)) /* For every even pixel, read new U and V. */
3482             {
3483                 d = (int) src_line[1] - 128;
3484                 e = (int) src_line[3] - 128;
3485                 r2 = 409 * e + 128;
3486                 g2 = - 100 * d - 208 * e + 128;
3487                 b2 = 516 * d + 128;
3488             }
3489             c2 = 298 * ((int) src_line[0] - 16);
3490             dst_line[x] = 0xff000000
3491                 | cliptobyte((c2 + r2) >> 8) << 16    /* red   */
3492                 | cliptobyte((c2 + g2) >> 8) << 8     /* green */
3493                 | cliptobyte((c2 + b2) >> 8);         /* blue  */
3494                 /* Scale RGB values to 0..255 range,
3495                  * then clip them if still not in range (may be negative),
3496                  * then shift them within DWORD if necessary. */
3497             src_line += 2;
3498         }
3499     }
3500 }
3501
3502 static void convert_yuy2_r5g6b5(const BYTE *src, BYTE *dst,
3503         DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h)
3504 {
3505     unsigned int x, y;
3506     int c2, d, e, r2 = 0, g2 = 0, b2 = 0;
3507
3508     TRACE("Converting %ux%u pixels, pitches %u %u\n", w, h, pitch_in, pitch_out);
3509
3510     for (y = 0; y < h; ++y)
3511     {
3512         const BYTE *src_line = src + y * pitch_in;
3513         WORD *dst_line = (WORD *)(dst + y * pitch_out);
3514         for (x = 0; x < w; ++x)
3515         {
3516             /* YUV to RGB conversion formulas from http://en.wikipedia.org/wiki/YUV:
3517              *     C = Y - 16; D = U - 128; E = V - 128;
3518              *     R = cliptobyte((298 * C + 409 * E + 128) >> 8);
3519              *     G = cliptobyte((298 * C - 100 * D - 208 * E + 128) >> 8);
3520              *     B = cliptobyte((298 * C + 516 * D + 128) >> 8);
3521              * Two adjacent YUY2 pixels are stored as four bytes: Y0 U Y1 V .
3522              * U and V are shared between the pixels. */
3523             if (!(x & 1)) /* For every even pixel, read new U and V. */
3524             {
3525                 d = (int) src_line[1] - 128;
3526                 e = (int) src_line[3] - 128;
3527                 r2 = 409 * e + 128;
3528                 g2 = - 100 * d - 208 * e + 128;
3529                 b2 = 516 * d + 128;
3530             }
3531             c2 = 298 * ((int) src_line[0] - 16);
3532             dst_line[x] = (cliptobyte((c2 + r2) >> 8) >> 3) << 11   /* red   */
3533                 | (cliptobyte((c2 + g2) >> 8) >> 2) << 5            /* green */
3534                 | (cliptobyte((c2 + b2) >> 8) >> 3);                /* blue  */
3535                 /* Scale RGB values to 0..255 range,
3536                  * then clip them if still not in range (may be negative),
3537                  * then shift them within DWORD if necessary. */
3538             src_line += 2;
3539         }
3540     }
3541 }
3542
3543 struct d3dfmt_convertor_desc
3544 {
3545     enum wined3d_format_id from, to;
3546     void (*convert)(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h);
3547 };
3548
3549 static const struct d3dfmt_convertor_desc convertors[] =
3550 {
3551     {WINED3DFMT_R32_FLOAT,      WINED3DFMT_R16_FLOAT,       convert_r32_float_r16_float},
3552     {WINED3DFMT_B5G6R5_UNORM,   WINED3DFMT_B8G8R8X8_UNORM,  convert_r5g6b5_x8r8g8b8},
3553     {WINED3DFMT_B8G8R8A8_UNORM, WINED3DFMT_B8G8R8X8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3554     {WINED3DFMT_B8G8R8X8_UNORM, WINED3DFMT_B8G8R8A8_UNORM,  convert_a8r8g8b8_x8r8g8b8},
3555     {WINED3DFMT_YUY2,           WINED3DFMT_B8G8R8X8_UNORM,  convert_yuy2_x8r8g8b8},
3556     {WINED3DFMT_YUY2,           WINED3DFMT_B5G6R5_UNORM,    convert_yuy2_r5g6b5},
3557 };
3558
3559 static inline const struct d3dfmt_convertor_desc *find_convertor(enum wined3d_format_id from,
3560         enum wined3d_format_id to)
3561 {
3562     unsigned int i;
3563
3564     for (i = 0; i < (sizeof(convertors) / sizeof(*convertors)); ++i)
3565     {
3566         if (convertors[i].from == from && convertors[i].to == to)
3567             return &convertors[i];
3568     }
3569
3570     return NULL;
3571 }
3572
3573 /*****************************************************************************
3574  * surface_convert_format
3575  *
3576  * Creates a duplicate of a surface in a different format. Is used by Blt to
3577  * blit between surfaces with different formats.
3578  *
3579  * Parameters
3580  *  source: Source surface
3581  *  fmt: Requested destination format
3582  *
3583  *****************************************************************************/
3584 static struct wined3d_surface *surface_convert_format(struct wined3d_surface *source, enum wined3d_format_id to_fmt)
3585 {
3586     const struct d3dfmt_convertor_desc *conv;
3587     WINED3DLOCKED_RECT lock_src, lock_dst;
3588     struct wined3d_surface *ret = NULL;
3589     HRESULT hr;
3590
3591     conv = find_convertor(source->resource.format->id, to_fmt);
3592     if (!conv)
3593     {
3594         FIXME("Cannot find a conversion function from format %s to %s.\n",
3595                 debug_d3dformat(source->resource.format->id), debug_d3dformat(to_fmt));
3596         return NULL;
3597     }
3598
3599     wined3d_surface_create(source->resource.device, source->resource.width,
3600             source->resource.height, to_fmt, TRUE /* lockable */, TRUE /* discard  */, 0 /* level */,
3601             0 /* usage */, WINED3DPOOL_SCRATCH, WINED3DMULTISAMPLE_NONE /* TODO: Multisampled conversion */,
3602             0 /* MultiSampleQuality */, source->surface_type, NULL /* parent */, &wined3d_null_parent_ops, &ret);
3603     if (!ret)
3604     {
3605         ERR("Failed to create a destination surface for conversion.\n");
3606         return NULL;
3607     }
3608
3609     memset(&lock_src, 0, sizeof(lock_src));
3610     memset(&lock_dst, 0, sizeof(lock_dst));
3611
3612     hr = wined3d_surface_map(source, &lock_src, NULL, WINED3DLOCK_READONLY);
3613     if (FAILED(hr))
3614     {
3615         ERR("Failed to lock the source surface.\n");
3616         wined3d_surface_decref(ret);
3617         return NULL;
3618     }
3619     hr = wined3d_surface_map(ret, &lock_dst, NULL, WINED3DLOCK_READONLY);
3620     if (FAILED(hr))
3621     {
3622         ERR("Failed to lock the destination surface.\n");
3623         wined3d_surface_unmap(source);
3624         wined3d_surface_decref(ret);
3625         return NULL;
3626     }
3627
3628     conv->convert(lock_src.pBits, lock_dst.pBits, lock_src.Pitch, lock_dst.Pitch,
3629             source->resource.width, source->resource.height);
3630
3631     wined3d_surface_unmap(ret);
3632     wined3d_surface_unmap(source);
3633
3634     return ret;
3635 }
3636
3637 static HRESULT _Blt_ColorFill(BYTE *buf, unsigned int width, unsigned int height,
3638         unsigned int bpp, UINT pitch, DWORD color)
3639 {
3640     BYTE *first;
3641     int x, y;
3642
3643     /* Do first row */
3644
3645 #define COLORFILL_ROW(type) \
3646 do { \
3647     type *d = (type *)buf; \
3648     for (x = 0; x < width; ++x) \
3649         d[x] = (type)color; \
3650 } while(0)
3651
3652     switch (bpp)
3653     {
3654         case 1:
3655             COLORFILL_ROW(BYTE);
3656             break;
3657
3658         case 2:
3659             COLORFILL_ROW(WORD);
3660             break;
3661
3662         case 3:
3663         {
3664             BYTE *d = buf;
3665             for (x = 0; x < width; ++x, d += 3)
3666             {
3667                 d[0] = (color      ) & 0xFF;
3668                 d[1] = (color >>  8) & 0xFF;
3669                 d[2] = (color >> 16) & 0xFF;
3670             }
3671             break;
3672         }
3673         case 4:
3674             COLORFILL_ROW(DWORD);
3675             break;
3676
3677         default:
3678             FIXME("Color fill not implemented for bpp %u!\n", bpp * 8);
3679             return WINED3DERR_NOTAVAILABLE;
3680     }
3681
3682 #undef COLORFILL_ROW
3683
3684     /* Now copy first row. */
3685     first = buf;
3686     for (y = 1; y < height; ++y)
3687     {
3688         buf += pitch;
3689         memcpy(buf, first, width * bpp);
3690     }
3691
3692     return WINED3D_OK;
3693 }
3694
3695 HRESULT CDECL wined3d_surface_unmap(struct wined3d_surface *surface)
3696 {
3697     TRACE("surface %p.\n", surface);
3698
3699     if (!(surface->flags & SFLAG_LOCKED))
3700     {
3701         WARN("Trying to unmap unmapped surface.\n");
3702         return WINEDDERR_NOTLOCKED;
3703     }
3704     surface->flags &= ~SFLAG_LOCKED;
3705
3706     surface->surface_ops->surface_unmap(surface);
3707
3708     return WINED3D_OK;
3709 }
3710
3711 HRESULT CDECL wined3d_surface_map(struct wined3d_surface *surface,
3712         WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
3713 {
3714     const struct wined3d_format *format = surface->resource.format;
3715
3716     TRACE("surface %p, locked_rect %p, rect %s, flags %#x.\n",
3717             surface, locked_rect, wine_dbgstr_rect(rect), flags);
3718
3719     if (surface->flags & SFLAG_LOCKED)
3720     {
3721         WARN("Surface is already mapped.\n");
3722         return WINED3DERR_INVALIDCALL;
3723     }
3724     if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED
3725             && rect && (rect->left || rect->top
3726             || rect->right != surface->resource.width
3727             || rect->bottom != surface->resource.height))
3728     {
3729         UINT width_mask = format->block_width - 1;
3730         UINT height_mask = format->block_height - 1;
3731
3732         if ((rect->left & width_mask) || (rect->right & width_mask)
3733                 || (rect->top & height_mask) || (rect->bottom & height_mask))
3734         {
3735             switch (surface->resource.pool)
3736             {
3737                 case WINED3DPOOL_DEFAULT:
3738                     WARN("Partial block lock with WINED3DPOOL_DEFAULT\n");
3739                     return WINED3DERR_INVALIDCALL;
3740
3741                 default:
3742                     FIXME("Partial block lock with %s\n", debug_d3dpool(surface->resource.pool));
3743             }
3744         }
3745     }
3746
3747     surface->flags |= SFLAG_LOCKED;
3748
3749     if (!(surface->flags & SFLAG_LOCKABLE))
3750         WARN("Trying to lock unlockable surface.\n");
3751
3752     surface->surface_ops->surface_map(surface, rect, flags);
3753
3754     locked_rect->Pitch = wined3d_surface_get_pitch(surface);
3755
3756     if (!rect)
3757     {
3758         locked_rect->pBits = surface->resource.allocatedMemory;
3759         surface->lockedRect.left = 0;
3760         surface->lockedRect.top = 0;
3761         surface->lockedRect.right = surface->resource.width;
3762         surface->lockedRect.bottom = surface->resource.height;
3763     }
3764     else
3765     {
3766         if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
3767         {
3768             /* Compressed textures are block based, so calculate the offset of
3769              * the block that contains the top-left pixel of the locked rectangle. */
3770             locked_rect->pBits = surface->resource.allocatedMemory
3771                     + ((rect->top / format->block_height) * locked_rect->Pitch)
3772                     + ((rect->left / format->block_width) * format->block_byte_count);
3773         }
3774         else
3775         {
3776             locked_rect->pBits = surface->resource.allocatedMemory
3777                     + (locked_rect->Pitch * rect->top)
3778                     + (rect->left * format->byte_count);
3779         }
3780         surface->lockedRect.left = rect->left;
3781         surface->lockedRect.top = rect->top;
3782         surface->lockedRect.right = rect->right;
3783         surface->lockedRect.bottom = rect->bottom;
3784     }
3785
3786     TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
3787     TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
3788
3789     return WINED3D_OK;
3790 }
3791
3792 HRESULT CDECL wined3d_surface_getdc(struct wined3d_surface *surface, HDC *dc)
3793 {
3794     WINED3DLOCKED_RECT lock;
3795     HRESULT hr;
3796
3797     TRACE("surface %p, dc %p.\n", surface, dc);
3798
3799     if (surface->flags & SFLAG_USERPTR)
3800     {
3801         ERR("Not supported on surfaces with application-provided memory.\n");
3802         return WINEDDERR_NODC;
3803     }
3804
3805     /* Give more detailed info for ddraw. */
3806     if (surface->flags & SFLAG_DCINUSE)
3807         return WINEDDERR_DCALREADYCREATED;
3808
3809     /* Can't GetDC if the surface is locked. */
3810     if (surface->flags & SFLAG_LOCKED)
3811         return WINED3DERR_INVALIDCALL;
3812
3813     /* Create a DIB section if there isn't a dc yet. */
3814     if (!surface->hDC)
3815     {
3816         if (surface->flags & SFLAG_CLIENT)
3817         {
3818             surface_load_location(surface, SFLAG_INSYSMEM, NULL);
3819             surface_release_client_storage(surface);
3820         }
3821         hr = surface_create_dib_section(surface);
3822         if (FAILED(hr))
3823             return WINED3DERR_INVALIDCALL;
3824
3825         /* Use the DIB section from now on if we are not using a PBO. */
3826         if (!(surface->flags & SFLAG_PBO))
3827             surface->resource.allocatedMemory = surface->dib.bitmap_data;
3828     }
3829
3830     /* Map the surface. */
3831     hr = wined3d_surface_map(surface, &lock, NULL, 0);
3832     if (FAILED(hr))
3833     {
3834         ERR("Map failed, hr %#x.\n", hr);
3835         return hr;
3836     }
3837
3838     /* Sync the DIB with the PBO. This can't be done earlier because Map()
3839      * activates the allocatedMemory. */
3840     if (surface->flags & SFLAG_PBO)
3841         memcpy(surface->dib.bitmap_data, surface->resource.allocatedMemory, surface->resource.size);
3842
3843     if (surface->resource.format->id == WINED3DFMT_P8_UINT
3844             || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
3845     {
3846         /* GetDC on palettized formats is unsupported in D3D9, and the method
3847          * is missing in D3D8, so this should only be used for DX <=7
3848          * surfaces (with non-device palettes). */
3849         const PALETTEENTRY *pal = NULL;
3850
3851         if (surface->palette)
3852         {
3853             pal = surface->palette->palents;
3854         }
3855         else
3856         {
3857             struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
3858             struct wined3d_surface *dds_primary = swapchain->front_buffer;
3859
3860             if (dds_primary && dds_primary->palette)
3861                 pal = dds_primary->palette->palents;
3862         }
3863
3864         if (pal)
3865         {
3866             RGBQUAD col[256];
3867             unsigned int i;
3868
3869             for (i = 0; i < 256; ++i)
3870             {
3871                 col[i].rgbRed = pal[i].peRed;
3872                 col[i].rgbGreen = pal[i].peGreen;
3873                 col[i].rgbBlue = pal[i].peBlue;
3874                 col[i].rgbReserved = 0;
3875             }
3876             SetDIBColorTable(surface->hDC, 0, 256, col);
3877         }
3878     }
3879
3880     surface->flags |= SFLAG_DCINUSE;
3881
3882     *dc = surface->hDC;
3883     TRACE("Returning dc %p.\n", *dc);
3884
3885     return WINED3D_OK;
3886 }
3887
3888 HRESULT CDECL wined3d_surface_releasedc(struct wined3d_surface *surface, HDC dc)
3889 {
3890     TRACE("surface %p, dc %p.\n", surface, dc);
3891
3892     if (!(surface->flags & SFLAG_DCINUSE))
3893         return WINEDDERR_NODC;
3894
3895     if (surface->hDC != dc)
3896     {
3897         WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
3898                 dc, surface->hDC);
3899         return WINEDDERR_NODC;
3900     }
3901
3902     /* Copy the contents of the DIB over to the PBO. */
3903     if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
3904         memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->resource.size);
3905
3906     /* We locked first, so unlock now. */
3907     wined3d_surface_unmap(surface);
3908
3909     surface->flags &= ~SFLAG_DCINUSE;
3910
3911     return WINED3D_OK;
3912 }
3913
3914 HRESULT CDECL wined3d_surface_flip(struct wined3d_surface *surface, struct wined3d_surface *override, DWORD flags)
3915 {
3916     TRACE("surface %p, override %p, flags %#x.\n", surface, override, flags);
3917
3918     if (flags)
3919     {
3920         static UINT once;
3921         if (!once++)
3922             FIXME("Ignoring flags %#x.\n", flags);
3923         else
3924             WARN("Ignoring flags %#x.\n", flags);
3925     }
3926
3927     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
3928     {
3929         ERR("Not supported on swapchain surfaces.\n");
3930         return WINEDDERR_NOTFLIPPABLE;
3931     }
3932
3933     /* Flipping is only supported on render targets and overlays. */
3934     if (!(surface->resource.usage & (WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_OVERLAY)))
3935     {
3936         WARN("Tried to flip a non-render target, non-overlay surface.\n");
3937         return WINEDDERR_NOTFLIPPABLE;
3938     }
3939
3940     flip_surface(surface, override);
3941
3942     /* Update overlays if they're visible. */
3943     if ((surface->resource.usage & WINED3DUSAGE_OVERLAY) && surface->overlay_dest)
3944         return surface->surface_ops->surface_draw_overlay(surface);
3945
3946     return WINED3D_OK;
3947 }
3948
3949 /* Do not call while under the GL lock. */
3950 void surface_internal_preload(struct wined3d_surface *surface, enum WINED3DSRGB srgb)
3951 {
3952     struct wined3d_device *device = surface->resource.device;
3953
3954     TRACE("iface %p, srgb %#x.\n", surface, srgb);
3955
3956     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
3957     {
3958         struct wined3d_texture *texture = surface->container.u.texture;
3959
3960         TRACE("Passing to container (%p).\n", texture);
3961         texture->texture_ops->texture_preload(texture, srgb);
3962     }
3963     else
3964     {
3965         struct wined3d_context *context;
3966
3967         TRACE("(%p) : About to load surface\n", surface);
3968
3969         /* TODO: Use already acquired context when possible. */
3970         context = context_acquire(device, NULL);
3971
3972         surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
3973
3974         if (surface->resource.pool == WINED3DPOOL_DEFAULT)
3975         {
3976             /* Tell opengl to try and keep this texture in video ram (well mostly) */
3977             GLclampf tmp;
3978             tmp = 0.9f;
3979             ENTER_GL();
3980             glPrioritizeTextures(1, &surface->texture_name, &tmp);
3981             LEAVE_GL();
3982         }
3983
3984         context_release(context);
3985     }
3986 }
3987
3988 BOOL surface_init_sysmem(struct wined3d_surface *surface)
3989 {
3990     if (!surface->resource.allocatedMemory)
3991     {
3992         surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
3993                 surface->resource.size + RESOURCE_ALIGNMENT);
3994         if (!surface->resource.heapMemory)
3995         {
3996             ERR("Out of memory\n");
3997             return FALSE;
3998         }
3999         surface->resource.allocatedMemory =
4000             (BYTE *)(((ULONG_PTR)surface->resource.heapMemory + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
4001     }
4002     else
4003     {
4004         memset(surface->resource.allocatedMemory, 0, surface->resource.size);
4005     }
4006
4007     surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
4008
4009     return TRUE;
4010 }
4011
4012 /* Read the framebuffer back into the surface */
4013 static void read_from_framebuffer(struct wined3d_surface *surface, const RECT *rect, void *dest, UINT pitch)
4014 {
4015     struct wined3d_device *device = surface->resource.device;
4016     const struct wined3d_gl_info *gl_info;
4017     struct wined3d_context *context;
4018     BYTE *mem;
4019     GLint fmt;
4020     GLint type;
4021     BYTE *row, *top, *bottom;
4022     int i;
4023     BOOL bpp;
4024     RECT local_rect;
4025     BOOL srcIsUpsideDown;
4026     GLint rowLen = 0;
4027     GLint skipPix = 0;
4028     GLint skipRow = 0;
4029
4030     if(wined3d_settings.rendertargetlock_mode == RTL_DISABLE) {
4031         static BOOL warned = FALSE;
4032         if(!warned) {
4033             ERR("The application tries to lock the render target, but render target locking is disabled\n");
4034             warned = TRUE;
4035         }
4036         return;
4037     }
4038
4039     context = context_acquire(device, surface);
4040     context_apply_blit_state(context, device);
4041     gl_info = context->gl_info;
4042
4043     ENTER_GL();
4044
4045     /* Select the correct read buffer, and give some debug output.
4046      * There is no need to keep track of the current read buffer or reset it, every part of the code
4047      * that reads sets the read buffer as desired.
4048      */
4049     if (surface_is_offscreen(surface))
4050     {
4051         /* Mapping the primary render target which is not on a swapchain.
4052          * Read from the back buffer. */
4053         TRACE("Mapping offscreen render target.\n");
4054         glReadBuffer(device->offscreenBuffer);
4055         srcIsUpsideDown = TRUE;
4056     }
4057     else
4058     {
4059         /* Onscreen surfaces are always part of a swapchain */
4060         GLenum buffer = surface_get_gl_buffer(surface);
4061         TRACE("Mapping %#x buffer.\n", buffer);
4062         glReadBuffer(buffer);
4063         checkGLcall("glReadBuffer");
4064         srcIsUpsideDown = FALSE;
4065     }
4066
4067     /* TODO: Get rid of the extra rectangle comparison and construction of a full surface rectangle */
4068     if (!rect)
4069     {
4070         local_rect.left = 0;
4071         local_rect.top = 0;
4072         local_rect.right = surface->resource.width;
4073         local_rect.bottom = surface->resource.height;
4074     }
4075     else
4076     {
4077         local_rect = *rect;
4078     }
4079     /* TODO: Get rid of the extra GetPitch call, LockRect does that too. Cache the pitch */
4080
4081     switch (surface->resource.format->id)
4082     {
4083         case WINED3DFMT_P8_UINT:
4084         {
4085             if (primary_render_target_is_p8(device))
4086             {
4087                 /* In case of P8 render targets the index is stored in the alpha component */
4088                 fmt = GL_ALPHA;
4089                 type = GL_UNSIGNED_BYTE;
4090                 mem = dest;
4091                 bpp = surface->resource.format->byte_count;
4092             }
4093             else
4094             {
4095                 /* GL can't return palettized data, so read ARGB pixels into a
4096                  * separate block of memory and convert them into palettized format
4097                  * in software. Slow, but if the app means to use palettized render
4098                  * targets and locks it...
4099                  *
4100                  * Use GL_RGB, GL_UNSIGNED_BYTE to read the surface for performance reasons
4101                  * Don't use GL_BGR as in the WINED3DFMT_R8G8B8 case, instead watch out
4102                  * for the color channels when palettizing the colors.
4103                  */
4104                 fmt = GL_RGB;
4105                 type = GL_UNSIGNED_BYTE;
4106                 pitch *= 3;
4107                 mem = HeapAlloc(GetProcessHeap(), 0, surface->resource.size * 3);
4108                 if (!mem)
4109                 {
4110                     ERR("Out of memory\n");
4111                     LEAVE_GL();
4112                     return;
4113                 }
4114                 bpp = surface->resource.format->byte_count * 3;
4115             }
4116         }
4117         break;
4118
4119         default:
4120             mem = dest;
4121             fmt = surface->resource.format->glFormat;
4122             type = surface->resource.format->glType;
4123             bpp = surface->resource.format->byte_count;
4124     }
4125
4126     if (surface->flags & SFLAG_PBO)
4127     {
4128         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, surface->pbo));
4129         checkGLcall("glBindBufferARB");
4130         if (mem)
4131         {
4132             ERR("mem not null for pbo -- unexpected\n");
4133             mem = NULL;
4134         }
4135     }
4136
4137     /* Save old pixel store pack state */
4138     glGetIntegerv(GL_PACK_ROW_LENGTH, &rowLen);
4139     checkGLcall("glGetIntegerv");
4140     glGetIntegerv(GL_PACK_SKIP_PIXELS, &skipPix);
4141     checkGLcall("glGetIntegerv");
4142     glGetIntegerv(GL_PACK_SKIP_ROWS, &skipRow);
4143     checkGLcall("glGetIntegerv");
4144
4145     /* Setup pixel store pack state -- to glReadPixels into the correct place */
4146     glPixelStorei(GL_PACK_ROW_LENGTH, surface->resource.width);
4147     checkGLcall("glPixelStorei");
4148     glPixelStorei(GL_PACK_SKIP_PIXELS, local_rect.left);
4149     checkGLcall("glPixelStorei");
4150     glPixelStorei(GL_PACK_SKIP_ROWS, local_rect.top);
4151     checkGLcall("glPixelStorei");
4152
4153     glReadPixels(local_rect.left, !srcIsUpsideDown ? (surface->resource.height - local_rect.bottom) : local_rect.top,
4154             local_rect.right - local_rect.left,
4155             local_rect.bottom - local_rect.top,
4156             fmt, type, mem);
4157     checkGLcall("glReadPixels");
4158
4159     /* Reset previous pixel store pack state */
4160     glPixelStorei(GL_PACK_ROW_LENGTH, rowLen);
4161     checkGLcall("glPixelStorei");
4162     glPixelStorei(GL_PACK_SKIP_PIXELS, skipPix);
4163     checkGLcall("glPixelStorei");
4164     glPixelStorei(GL_PACK_SKIP_ROWS, skipRow);
4165     checkGLcall("glPixelStorei");
4166
4167     if (surface->flags & SFLAG_PBO)
4168     {
4169         GL_EXTCALL(glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, 0));
4170         checkGLcall("glBindBufferARB");
4171
4172         /* Check if we need to flip the image. If we need to flip use glMapBufferARB
4173          * to get a pointer to it and perform the flipping in software. This is a lot
4174          * faster than calling glReadPixels for each line. In case we want more speed
4175          * we should rerender it flipped in a FBO and read the data back from the FBO. */
4176         if (!srcIsUpsideDown)
4177         {
4178             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4179             checkGLcall("glBindBufferARB");
4180
4181             mem = GL_EXTCALL(glMapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, GL_READ_WRITE_ARB));
4182             checkGLcall("glMapBufferARB");
4183         }
4184     }
4185
4186     /* TODO: Merge this with the palettization loop below for P8 targets */
4187     if(!srcIsUpsideDown) {
4188         UINT len, off;
4189         /* glReadPixels returns the image upside down, and there is no way to prevent this.
4190             Flip the lines in software */
4191         len = (local_rect.right - local_rect.left) * bpp;
4192         off = local_rect.left * bpp;
4193
4194         row = HeapAlloc(GetProcessHeap(), 0, len);
4195         if(!row) {
4196             ERR("Out of memory\n");
4197             if (surface->resource.format->id == WINED3DFMT_P8_UINT)
4198                 HeapFree(GetProcessHeap(), 0, mem);
4199             LEAVE_GL();
4200             return;
4201         }
4202
4203         top = mem + pitch * local_rect.top;
4204         bottom = mem + pitch * (local_rect.bottom - 1);
4205         for(i = 0; i < (local_rect.bottom - local_rect.top) / 2; i++) {
4206             memcpy(row, top + off, len);
4207             memcpy(top + off, bottom + off, len);
4208             memcpy(bottom + off, row, len);
4209             top += pitch;
4210             bottom -= pitch;
4211         }
4212         HeapFree(GetProcessHeap(), 0, row);
4213
4214         /* Unmap the temp PBO buffer */
4215         if (surface->flags & SFLAG_PBO)
4216         {
4217             GL_EXTCALL(glUnmapBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB));
4218             GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4219         }
4220     }
4221
4222     LEAVE_GL();
4223     context_release(context);
4224
4225     /* For P8 textures we need to perform an inverse palette lookup. This is
4226      * done by searching for a palette index which matches the RGB value.
4227      * Note this isn't guaranteed to work when there are multiple entries for
4228      * the same color but we have no choice. In case of P8 render targets,
4229      * the index is stored in the alpha component so no conversion is needed. */
4230     if (surface->resource.format->id == WINED3DFMT_P8_UINT && !primary_render_target_is_p8(device))
4231     {
4232         const PALETTEENTRY *pal = NULL;
4233         DWORD width = pitch / 3;
4234         int x, y, c;
4235
4236         if (surface->palette)
4237         {
4238             pal = surface->palette->palents;
4239         }
4240         else
4241         {
4242             ERR("Palette is missing, cannot perform inverse palette lookup\n");
4243             HeapFree(GetProcessHeap(), 0, mem);
4244             return;
4245         }
4246
4247         for(y = local_rect.top; y < local_rect.bottom; y++) {
4248             for(x = local_rect.left; x < local_rect.right; x++) {
4249                 /*                      start              lines            pixels      */
4250                 const BYTE *blue = mem + y * pitch + x * (sizeof(BYTE) * 3);
4251                 const BYTE *green = blue  + 1;
4252                 const BYTE *red = green + 1;
4253
4254                 for(c = 0; c < 256; c++) {
4255                     if(*red   == pal[c].peRed   &&
4256                        *green == pal[c].peGreen &&
4257                        *blue  == pal[c].peBlue)
4258                     {
4259                         *((BYTE *) dest + y * width + x) = c;
4260                         break;
4261                     }
4262                 }
4263             }
4264         }
4265         HeapFree(GetProcessHeap(), 0, mem);
4266     }
4267 }
4268
4269 /* Read the framebuffer contents into a texture. Note that this function
4270  * doesn't do any kind of flipping. Using this on an onscreen surface will
4271  * result in a flipped D3D texture. */
4272 void surface_load_fb_texture(struct wined3d_surface *surface, BOOL srgb)
4273 {
4274     struct wined3d_device *device = surface->resource.device;
4275     struct wined3d_context *context;
4276
4277     context = context_acquire(device, surface);
4278     device_invalidate_state(device, STATE_FRAMEBUFFER);
4279
4280     surface_prepare_texture(surface, context, srgb);
4281     surface_bind_and_dirtify(surface, context, srgb);
4282
4283     TRACE("Reading back offscreen render target %p.\n", surface);
4284
4285     ENTER_GL();
4286
4287     if (surface_is_offscreen(surface))
4288         glReadBuffer(device->offscreenBuffer);
4289     else
4290         glReadBuffer(surface_get_gl_buffer(surface));
4291     checkGLcall("glReadBuffer");
4292
4293     glCopyTexSubImage2D(surface->texture_target, surface->texture_level,
4294             0, 0, 0, 0, surface->resource.width, surface->resource.height);
4295     checkGLcall("glCopyTexSubImage2D");
4296
4297     LEAVE_GL();
4298
4299     context_release(context);
4300 }
4301
4302 /* Context activation is done by the caller. */
4303 static void surface_prepare_texture_internal(struct wined3d_surface *surface,
4304         struct wined3d_context *context, BOOL srgb)
4305 {
4306     DWORD alloc_flag = srgb ? SFLAG_SRGBALLOCATED : SFLAG_ALLOCATED;
4307     CONVERT_TYPES convert;
4308     struct wined3d_format format;
4309
4310     if (surface->flags & alloc_flag) return;
4311
4312     d3dfmt_get_conv(surface, TRUE, TRUE, &format, &convert);
4313     if (convert != NO_CONVERSION || format.convert) surface->flags |= SFLAG_CONVERTED;
4314     else surface->flags &= ~SFLAG_CONVERTED;
4315
4316     surface_bind_and_dirtify(surface, context, srgb);
4317     surface_allocate_surface(surface, context->gl_info, &format, srgb);
4318     surface->flags |= alloc_flag;
4319 }
4320
4321 /* Context activation is done by the caller. */
4322 void surface_prepare_texture(struct wined3d_surface *surface, struct wined3d_context *context, BOOL srgb)
4323 {
4324     if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
4325     {
4326         struct wined3d_texture *texture = surface->container.u.texture;
4327         UINT sub_count = texture->level_count * texture->layer_count;
4328         UINT i;
4329
4330         TRACE("surface %p is a subresource of texture %p.\n", surface, texture);
4331
4332         for (i = 0; i < sub_count; ++i)
4333         {
4334             struct wined3d_surface *s = surface_from_resource(texture->sub_resources[i]);
4335             surface_prepare_texture_internal(s, context, srgb);
4336         }
4337
4338         return;
4339     }
4340
4341     surface_prepare_texture_internal(surface, context, srgb);
4342 }
4343
4344 void surface_prepare_rb(struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info, BOOL multisample)
4345 {
4346     if (multisample)
4347     {
4348         if (surface->rb_multisample)
4349             return;
4350
4351         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_multisample);
4352         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_multisample);
4353         gl_info->fbo_ops.glRenderbufferStorageMultisample(GL_RENDERBUFFER, surface->resource.multisample_type,
4354                 surface->resource.format->glInternal, surface->pow2Width, surface->pow2Height);
4355         TRACE("Created multisample rb %u.\n", surface->rb_multisample);
4356     }
4357     else
4358     {
4359         if (surface->rb_resolved)
4360             return;
4361
4362         gl_info->fbo_ops.glGenRenderbuffers(1, &surface->rb_resolved);
4363         gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, surface->rb_resolved);
4364         gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, surface->resource.format->glInternal,
4365                 surface->pow2Width, surface->pow2Height);
4366         TRACE("Created resolved rb %u.\n", surface->rb_resolved);
4367     }
4368 }
4369
4370 static void flush_to_framebuffer_drawpixels(struct wined3d_surface *surface,
4371         const RECT *rect, GLenum fmt, GLenum type, UINT bpp, const BYTE *mem)
4372 {
4373     struct wined3d_device *device = surface->resource.device;
4374     UINT pitch = wined3d_surface_get_pitch(surface);
4375     const struct wined3d_gl_info *gl_info;
4376     struct wined3d_context *context;
4377     RECT local_rect;
4378     UINT w, h;
4379
4380     surface_get_rect(surface, rect, &local_rect);
4381
4382     mem += local_rect.top * pitch + local_rect.left * bpp;
4383     w = local_rect.right - local_rect.left;
4384     h = local_rect.bottom - local_rect.top;
4385
4386     /* Activate the correct context for the render target */
4387     context = context_acquire(device, surface);
4388     context_apply_blit_state(context, device);
4389     gl_info = context->gl_info;
4390
4391     ENTER_GL();
4392
4393     if (!surface_is_offscreen(surface))
4394     {
4395         GLenum buffer = surface_get_gl_buffer(surface);
4396         TRACE("Unlocking %#x buffer.\n", buffer);
4397         context_set_draw_buffer(context, buffer);
4398
4399         surface_translate_drawable_coords(surface, context->win_handle, &local_rect);
4400         glPixelZoom(1.0f, -1.0f);
4401     }
4402     else
4403     {
4404         /* Primary offscreen render target */
4405         TRACE("Offscreen render target.\n");
4406         context_set_draw_buffer(context, device->offscreenBuffer);
4407
4408         glPixelZoom(1.0f, 1.0f);
4409     }
4410
4411     glRasterPos3i(local_rect.left, local_rect.top, 1);
4412     checkGLcall("glRasterPos3i");
4413
4414     /* If not fullscreen, we need to skip a number of bytes to find the next row of data */
4415     glPixelStorei(GL_UNPACK_ROW_LENGTH, surface->resource.width);
4416
4417     if (surface->flags & SFLAG_PBO)
4418     {
4419         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
4420         checkGLcall("glBindBufferARB");
4421     }
4422
4423     glDrawPixels(w, h, fmt, type, mem);
4424     checkGLcall("glDrawPixels");
4425
4426     if (surface->flags & SFLAG_PBO)
4427     {
4428         GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0));
4429         checkGLcall("glBindBufferARB");
4430     }
4431
4432     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
4433     checkGLcall("glPixelStorei(GL_UNPACK_ROW_LENGTH, 0)");
4434
4435     LEAVE_GL();
4436
4437     if (wined3d_settings.strict_draw_ordering
4438             || (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
4439             && surface->container.u.swapchain->front_buffer == surface))
4440         wglFlush();
4441
4442     context_release(context);
4443 }
4444
4445 HRESULT d3dfmt_get_conv(const struct wined3d_surface *surface, BOOL need_alpha_ck,
4446         BOOL use_texturing, struct wined3d_format *format, CONVERT_TYPES *convert)
4447 {
4448     BOOL colorkey_active = need_alpha_ck && (surface->CKeyFlags & WINEDDSD_CKSRCBLT);
4449     const struct wined3d_device *device = surface->resource.device;
4450     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4451     BOOL blit_supported = FALSE;
4452
4453     /* Copy the default values from the surface. Below we might perform fixups */
4454     /* TODO: get rid of color keying desc fixups by using e.g. a table. */
4455     *format = *surface->resource.format;
4456     *convert = NO_CONVERSION;
4457
4458     /* Ok, now look if we have to do any conversion */
4459     switch (surface->resource.format->id)
4460     {
4461         case WINED3DFMT_P8_UINT:
4462             /* Below the call to blit_supported is disabled for Wine 1.2
4463              * because the function isn't operating correctly yet. At the
4464              * moment 8-bit blits are handled in software and if certain GL
4465              * extensions are around, surface conversion is performed at
4466              * upload time. The blit_supported call recognizes it as a
4467              * destination fixup. This type of upload 'fixup' and 8-bit to
4468              * 8-bit blits need to be handled by the blit_shader.
4469              * TODO: get rid of this #if 0. */
4470 #if 0
4471             blit_supported = device->blitter->blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
4472                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format,
4473                     &rect, surface->resource.usage, surface->resource.pool, surface->resource.format);
4474 #endif
4475             blit_supported = gl_info->supported[EXT_PALETTED_TEXTURE] || gl_info->supported[ARB_FRAGMENT_PROGRAM];
4476
4477             /* Use conversion when the blit_shader backend supports it. It only supports this in case of
4478              * texturing. Further also use conversion in case of color keying.
4479              * Paletted textures can be emulated using shaders but only do that for 2D purposes e.g. situations
4480              * in which the main render target uses p8. Some games like GTA Vice City use P8 for texturing which
4481              * conflicts with this.
4482              */
4483             if (!((blit_supported && device->fb.render_targets && surface == device->fb.render_targets[0]))
4484                     || colorkey_active || !use_texturing)
4485             {
4486                 format->glFormat = GL_RGBA;
4487                 format->glInternal = GL_RGBA;
4488                 format->glType = GL_UNSIGNED_BYTE;
4489                 format->conv_byte_count = 4;
4490                 if (colorkey_active)
4491                     *convert = CONVERT_PALETTED_CK;
4492                 else
4493                     *convert = CONVERT_PALETTED;
4494             }
4495             break;
4496
4497         case WINED3DFMT_B2G3R3_UNORM:
4498             /* **********************
4499                 GL_UNSIGNED_BYTE_3_3_2
4500                 ********************** */
4501             if (colorkey_active) {
4502                 /* This texture format will never be used.. So do not care about color keying
4503                     up until the point in time it will be needed :-) */
4504                 FIXME(" ColorKeying not supported in the RGB 332 format !\n");
4505             }
4506             break;
4507
4508         case WINED3DFMT_B5G6R5_UNORM:
4509             if (colorkey_active)
4510             {
4511                 *convert = CONVERT_CK_565;
4512                 format->glFormat = GL_RGBA;
4513                 format->glInternal = GL_RGB5_A1;
4514                 format->glType = GL_UNSIGNED_SHORT_5_5_5_1;
4515                 format->conv_byte_count = 2;
4516             }
4517             break;
4518
4519         case WINED3DFMT_B5G5R5X1_UNORM:
4520             if (colorkey_active)
4521             {
4522                 *convert = CONVERT_CK_5551;
4523                 format->glFormat = GL_BGRA;
4524                 format->glInternal = GL_RGB5_A1;
4525                 format->glType = GL_UNSIGNED_SHORT_1_5_5_5_REV;
4526                 format->conv_byte_count = 2;
4527             }
4528             break;
4529
4530         case WINED3DFMT_B8G8R8_UNORM:
4531             if (colorkey_active)
4532             {
4533                 *convert = CONVERT_CK_RGB24;
4534                 format->glFormat = GL_RGBA;
4535                 format->glInternal = GL_RGBA8;
4536                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4537                 format->conv_byte_count = 4;
4538             }
4539             break;
4540
4541         case WINED3DFMT_B8G8R8X8_UNORM:
4542             if (colorkey_active)
4543             {
4544                 *convert = CONVERT_RGB32_888;
4545                 format->glFormat = GL_RGBA;
4546                 format->glInternal = GL_RGBA8;
4547                 format->glType = GL_UNSIGNED_INT_8_8_8_8;
4548                 format->conv_byte_count = 4;
4549             }
4550             break;
4551
4552         default:
4553             break;
4554     }
4555
4556     return WINED3D_OK;
4557 }
4558
4559 void d3dfmt_p8_init_palette(const struct wined3d_surface *surface, BYTE table[256][4], BOOL colorkey)
4560 {
4561     const struct wined3d_device *device = surface->resource.device;
4562     const struct wined3d_palette *pal = surface->palette;
4563     BOOL index_in_alpha = FALSE;
4564     unsigned int i;
4565
4566     /* Old games like StarCraft, C&C, Red Alert and others use P8 render targets.
4567      * Reading back the RGB output each lockrect (each frame as they lock the whole screen)
4568      * is slow. Further RGB->P8 conversion is not possible because palettes can have
4569      * duplicate entries. Store the color key in the unused alpha component to speed the
4570      * download up and to make conversion unneeded. */
4571     index_in_alpha = primary_render_target_is_p8(device);
4572
4573     if (!pal)
4574     {
4575         ERR("This code should never get entered for DirectDraw!, expect problems\n");
4576         if (index_in_alpha)
4577         {
4578             /* Guarantees that memory representation remains correct after sysmem<->texture transfers even if
4579              * there's no palette at this time. */
4580             for (i = 0; i < 256; i++) table[i][3] = i;
4581         }
4582     }
4583     else
4584     {
4585         TRACE("Using surface palette %p\n", pal);
4586         /* Get the surface's palette */
4587         for (i = 0; i < 256; ++i)
4588         {
4589             table[i][0] = pal->palents[i].peRed;
4590             table[i][1] = pal->palents[i].peGreen;
4591             table[i][2] = pal->palents[i].peBlue;
4592
4593             /* When index_in_alpha is set the palette index is stored in the
4594              * alpha component. In case of a readback we can then read
4595              * GL_ALPHA. Color keying is handled in BltOverride using a
4596              * GL_ALPHA_TEST using GL_NOT_EQUAL. In case of index_in_alpha the
4597              * color key itself is passed to glAlphaFunc in other cases the
4598              * alpha component of pixels that should be masked away is set to 0. */
4599             if (index_in_alpha)
4600             {
4601                 table[i][3] = i;
4602             }
4603             else if (colorkey && (i >= surface->SrcBltCKey.dwColorSpaceLowValue)
4604                     && (i <= surface->SrcBltCKey.dwColorSpaceHighValue))
4605             {
4606                 table[i][3] = 0x00;
4607             }
4608             else if (pal->flags & WINEDDPCAPS_ALPHA)
4609             {
4610                 table[i][3] = pal->palents[i].peFlags;
4611             }
4612             else
4613             {
4614                 table[i][3] = 0xFF;
4615             }
4616         }
4617     }
4618 }
4619
4620 static HRESULT d3dfmt_convert_surface(const BYTE *src, BYTE *dst, UINT pitch, UINT width,
4621         UINT height, UINT outpitch, CONVERT_TYPES convert, struct wined3d_surface *surface)
4622 {
4623     const BYTE *source;
4624     BYTE *dest;
4625     TRACE("(%p)->(%p),(%d,%d,%d,%d,%p)\n", src, dst, pitch, height, outpitch, convert, surface);
4626
4627     switch (convert) {
4628         case NO_CONVERSION:
4629         {
4630             memcpy(dst, src, pitch * height);
4631             break;
4632         }
4633         case CONVERT_PALETTED:
4634         case CONVERT_PALETTED_CK:
4635         {
4636             BYTE table[256][4];
4637             unsigned int x, y;
4638
4639             d3dfmt_p8_init_palette(surface, table, (convert == CONVERT_PALETTED_CK));
4640
4641             for (y = 0; y < height; y++)
4642             {
4643                 source = src + pitch * y;
4644                 dest = dst + outpitch * y;
4645                 /* This is an 1 bpp format, using the width here is fine */
4646                 for (x = 0; x < width; x++) {
4647                     BYTE color = *source++;
4648                     *dest++ = table[color][0];
4649                     *dest++ = table[color][1];
4650                     *dest++ = table[color][2];
4651                     *dest++ = table[color][3];
4652                 }
4653             }
4654         }
4655         break;
4656
4657         case CONVERT_CK_565:
4658         {
4659             /* Converting the 565 format in 5551 packed to emulate color-keying.
4660
4661               Note : in all these conversion, it would be best to average the averaging
4662                       pixels to get the color of the pixel that will be color-keyed to
4663                       prevent 'color bleeding'. This will be done later on if ever it is
4664                       too visible.
4665
4666               Note2: Nvidia documents say that their driver does not support alpha + color keying
4667                      on the same surface and disables color keying in such a case
4668             */
4669             unsigned int x, y;
4670             const WORD *Source;
4671             WORD *Dest;
4672
4673             TRACE("Color keyed 565\n");
4674
4675             for (y = 0; y < height; y++) {
4676                 Source = (const WORD *)(src + y * pitch);
4677                 Dest = (WORD *) (dst + y * outpitch);
4678                 for (x = 0; x < width; x++ ) {
4679                     WORD color = *Source++;
4680                     *Dest = ((color & 0xFFC0) | ((color & 0x1F) << 1));
4681                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4682                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4683                         *Dest |= 0x0001;
4684                     Dest++;
4685                 }
4686             }
4687         }
4688         break;
4689
4690         case CONVERT_CK_5551:
4691         {
4692             /* Converting X1R5G5B5 format to R5G5B5A1 to emulate color-keying. */
4693             unsigned int x, y;
4694             const WORD *Source;
4695             WORD *Dest;
4696             TRACE("Color keyed 5551\n");
4697             for (y = 0; y < height; y++) {
4698                 Source = (const WORD *)(src + y * pitch);
4699                 Dest = (WORD *) (dst + y * outpitch);
4700                 for (x = 0; x < width; x++ ) {
4701                     WORD color = *Source++;
4702                     *Dest = color;
4703                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4704                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4705                         *Dest |= (1 << 15);
4706                     else
4707                         *Dest &= ~(1 << 15);
4708                     Dest++;
4709                 }
4710             }
4711         }
4712         break;
4713
4714         case CONVERT_CK_RGB24:
4715         {
4716             /* Converting R8G8B8 format to R8G8B8A8 with color-keying. */
4717             unsigned int x, y;
4718             for (y = 0; y < height; y++)
4719             {
4720                 source = src + pitch * y;
4721                 dest = dst + outpitch * y;
4722                 for (x = 0; x < width; x++) {
4723                     DWORD color = ((DWORD)source[0] << 16) + ((DWORD)source[1] << 8) + (DWORD)source[2] ;
4724                     DWORD dstcolor = color << 8;
4725                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4726                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4727                         dstcolor |= 0xff;
4728                     *(DWORD*)dest = dstcolor;
4729                     source += 3;
4730                     dest += 4;
4731                 }
4732             }
4733         }
4734         break;
4735
4736         case CONVERT_RGB32_888:
4737         {
4738             /* Converting X8R8G8B8 format to R8G8B8A8 with color-keying. */
4739             unsigned int x, y;
4740             for (y = 0; y < height; y++)
4741             {
4742                 source = src + pitch * y;
4743                 dest = dst + outpitch * y;
4744                 for (x = 0; x < width; x++) {
4745                     DWORD color = 0xffffff & *(const DWORD*)source;
4746                     DWORD dstcolor = color << 8;
4747                     if ((color < surface->SrcBltCKey.dwColorSpaceLowValue)
4748                             || (color > surface->SrcBltCKey.dwColorSpaceHighValue))
4749                         dstcolor |= 0xff;
4750                     *(DWORD*)dest = dstcolor;
4751                     source += 4;
4752                     dest += 4;
4753                 }
4754             }
4755         }
4756         break;
4757
4758         default:
4759             ERR("Unsupported conversion type %#x.\n", convert);
4760     }
4761     return WINED3D_OK;
4762 }
4763
4764 void flip_surface(struct wined3d_surface *front, struct wined3d_surface *back)
4765 {
4766     /* Flip the surface contents */
4767     /* Flip the DC */
4768     {
4769         HDC tmp;
4770         tmp = front->hDC;
4771         front->hDC = back->hDC;
4772         back->hDC = tmp;
4773     }
4774
4775     /* Flip the DIBsection */
4776     {
4777         HBITMAP tmp;
4778         BOOL hasDib = front->flags & SFLAG_DIBSECTION;
4779         tmp = front->dib.DIBsection;
4780         front->dib.DIBsection = back->dib.DIBsection;
4781         back->dib.DIBsection = tmp;
4782
4783         if (back->flags & SFLAG_DIBSECTION) front->flags |= SFLAG_DIBSECTION;
4784         else front->flags &= ~SFLAG_DIBSECTION;
4785         if (hasDib) back->flags |= SFLAG_DIBSECTION;
4786         else back->flags &= ~SFLAG_DIBSECTION;
4787     }
4788
4789     /* Flip the surface data */
4790     {
4791         void* tmp;
4792
4793         tmp = front->dib.bitmap_data;
4794         front->dib.bitmap_data = back->dib.bitmap_data;
4795         back->dib.bitmap_data = tmp;
4796
4797         tmp = front->resource.allocatedMemory;
4798         front->resource.allocatedMemory = back->resource.allocatedMemory;
4799         back->resource.allocatedMemory = tmp;
4800
4801         tmp = front->resource.heapMemory;
4802         front->resource.heapMemory = back->resource.heapMemory;
4803         back->resource.heapMemory = tmp;
4804     }
4805
4806     /* Flip the PBO */
4807     {
4808         GLuint tmp_pbo = front->pbo;
4809         front->pbo = back->pbo;
4810         back->pbo = tmp_pbo;
4811     }
4812
4813     /* client_memory should not be different, but just in case */
4814     {
4815         BOOL tmp;
4816         tmp = front->dib.client_memory;
4817         front->dib.client_memory = back->dib.client_memory;
4818         back->dib.client_memory = tmp;
4819     }
4820
4821     /* Flip the opengl texture */
4822     {
4823         GLuint tmp;
4824
4825         tmp = back->texture_name;
4826         back->texture_name = front->texture_name;
4827         front->texture_name = tmp;
4828
4829         tmp = back->texture_name_srgb;
4830         back->texture_name_srgb = front->texture_name_srgb;
4831         front->texture_name_srgb = tmp;
4832
4833         tmp = back->rb_multisample;
4834         back->rb_multisample = front->rb_multisample;
4835         front->rb_multisample = tmp;
4836
4837         tmp = back->rb_resolved;
4838         back->rb_resolved = front->rb_resolved;
4839         front->rb_resolved = tmp;
4840
4841         resource_unload(&back->resource);
4842         resource_unload(&front->resource);
4843     }
4844
4845     {
4846         DWORD tmp_flags = back->flags;
4847         back->flags = front->flags;
4848         front->flags = tmp_flags;
4849     }
4850 }
4851
4852 /* Does a direct frame buffer -> texture copy. Stretching is done with single
4853  * pixel copy calls. */
4854 static void fb_copy_to_texture_direct(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4855         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4856 {
4857     struct wined3d_device *device = dst_surface->resource.device;
4858     float xrel, yrel;
4859     UINT row;
4860     struct wined3d_context *context;
4861     BOOL upsidedown = FALSE;
4862     RECT dst_rect = *dst_rect_in;
4863
4864     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
4865      * glCopyTexSubImage is a bit picky about the parameters we pass to it
4866      */
4867     if(dst_rect.top > dst_rect.bottom) {
4868         UINT tmp = dst_rect.bottom;
4869         dst_rect.bottom = dst_rect.top;
4870         dst_rect.top = tmp;
4871         upsidedown = TRUE;
4872     }
4873
4874     context = context_acquire(device, src_surface);
4875     context_apply_blit_state(context, device);
4876     surface_internal_preload(dst_surface, SRGB_RGB);
4877     ENTER_GL();
4878
4879     /* Bind the target texture */
4880     context_bind_texture(context, dst_surface->texture_target, dst_surface->texture_name);
4881     if (surface_is_offscreen(src_surface))
4882     {
4883         TRACE("Reading from an offscreen target\n");
4884         upsidedown = !upsidedown;
4885         glReadBuffer(device->offscreenBuffer);
4886     }
4887     else
4888     {
4889         glReadBuffer(surface_get_gl_buffer(src_surface));
4890     }
4891     checkGLcall("glReadBuffer");
4892
4893     xrel = (float) (src_rect->right - src_rect->left) / (float) (dst_rect.right - dst_rect.left);
4894     yrel = (float) (src_rect->bottom - src_rect->top) / (float) (dst_rect.bottom - dst_rect.top);
4895
4896     if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4897     {
4898         FIXME("Doing a pixel by pixel copy from the framebuffer to a texture, expect major performance issues\n");
4899
4900         if(Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT) {
4901             ERR("Texture filtering not supported in direct blit\n");
4902         }
4903     }
4904     else if ((Filter != WINED3DTEXF_NONE && Filter != WINED3DTEXF_POINT)
4905             && ((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4906     {
4907         ERR("Texture filtering not supported in direct blit\n");
4908     }
4909
4910     if (upsidedown
4911             && !((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4912             && !((yrel - 1.0f < -eps) || (yrel - 1.0f > eps)))
4913     {
4914         /* Upside down copy without stretching is nice, one glCopyTexSubImage call will do */
4915
4916         glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4917                 dst_rect.left /*xoffset */, dst_rect.top /* y offset */,
4918                 src_rect->left, src_surface->resource.height - src_rect->bottom,
4919                 dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
4920     }
4921     else
4922     {
4923         UINT yoffset = src_surface->resource.height - src_rect->top + dst_rect.top - 1;
4924         /* I have to process this row by row to swap the image,
4925          * otherwise it would be upside down, so stretching in y direction
4926          * doesn't cost extra time
4927          *
4928          * However, stretching in x direction can be avoided if not necessary
4929          */
4930         for(row = dst_rect.top; row < dst_rect.bottom; row++) {
4931             if ((xrel - 1.0f < -eps) || (xrel - 1.0f > eps))
4932             {
4933                 /* Well, that stuff works, but it's very slow.
4934                  * find a better way instead
4935                  */
4936                 UINT col;
4937
4938                 for (col = dst_rect.left; col < dst_rect.right; ++col)
4939                 {
4940                     glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4941                             dst_rect.left + col /* x offset */, row /* y offset */,
4942                             src_rect->left + col * xrel, yoffset - (int) (row * yrel), 1, 1);
4943                 }
4944             }
4945             else
4946             {
4947                 glCopyTexSubImage2D(dst_surface->texture_target, dst_surface->texture_level,
4948                         dst_rect.left /* x offset */, row /* y offset */,
4949                         src_rect->left, yoffset - (int) (row * yrel), dst_rect.right - dst_rect.left, 1);
4950             }
4951         }
4952     }
4953     checkGLcall("glCopyTexSubImage2D");
4954
4955     LEAVE_GL();
4956     context_release(context);
4957
4958     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
4959      * path is never entered
4960      */
4961     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
4962 }
4963
4964 /* Uses the hardware to stretch and flip the image */
4965 static void fb_copy_to_texture_hwstretch(struct wined3d_surface *dst_surface, struct wined3d_surface *src_surface,
4966         const RECT *src_rect, const RECT *dst_rect_in, WINED3DTEXTUREFILTERTYPE Filter)
4967 {
4968     struct wined3d_device *device = dst_surface->resource.device;
4969     struct wined3d_swapchain *src_swapchain = NULL;
4970     GLuint src, backup = 0;
4971     float left, right, top, bottom; /* Texture coordinates */
4972     UINT fbwidth = src_surface->resource.width;
4973     UINT fbheight = src_surface->resource.height;
4974     struct wined3d_context *context;
4975     GLenum drawBuffer = GL_BACK;
4976     GLenum texture_target;
4977     BOOL noBackBufferBackup;
4978     BOOL src_offscreen;
4979     BOOL upsidedown = FALSE;
4980     RECT dst_rect = *dst_rect_in;
4981
4982     TRACE("Using hwstretch blit\n");
4983     /* Activate the Proper context for reading from the source surface, set it up for blitting */
4984     context = context_acquire(device, src_surface);
4985     context_apply_blit_state(context, device);
4986     surface_internal_preload(dst_surface, SRGB_RGB);
4987
4988     src_offscreen = surface_is_offscreen(src_surface);
4989     noBackBufferBackup = src_offscreen && wined3d_settings.offscreen_rendering_mode == ORM_FBO;
4990     if (!noBackBufferBackup && !src_surface->texture_name)
4991     {
4992         /* Get it a description */
4993         surface_internal_preload(src_surface, SRGB_RGB);
4994     }
4995     ENTER_GL();
4996
4997     /* Try to use an aux buffer for drawing the rectangle. This way it doesn't need restoring.
4998      * This way we don't have to wait for the 2nd readback to finish to leave this function.
4999      */
5000     if (context->aux_buffers >= 2)
5001     {
5002         /* Got more than one aux buffer? Use the 2nd aux buffer */
5003         drawBuffer = GL_AUX1;
5004     }
5005     else if ((!src_offscreen || device->offscreenBuffer == GL_BACK) && context->aux_buffers >= 1)
5006     {
5007         /* Only one aux buffer, but it isn't used (Onscreen rendering, or non-aux orm)? Use it! */
5008         drawBuffer = GL_AUX0;
5009     }
5010
5011     if(noBackBufferBackup) {
5012         glGenTextures(1, &backup);
5013         checkGLcall("glGenTextures");
5014         context_bind_texture(context, GL_TEXTURE_2D, backup);
5015         texture_target = GL_TEXTURE_2D;
5016     } else {
5017         /* Backup the back buffer and copy the source buffer into a texture to draw an upside down stretched quad. If
5018          * we are reading from the back buffer, the backup can be used as source texture
5019          */
5020         texture_target = src_surface->texture_target;
5021         context_bind_texture(context, texture_target, src_surface->texture_name);
5022         glEnable(texture_target);
5023         checkGLcall("glEnable(texture_target)");
5024
5025         /* For now invalidate the texture copy of the back buffer. Drawable and sysmem copy are untouched */
5026         src_surface->flags &= ~SFLAG_INTEXTURE;
5027     }
5028
5029     /* Make sure that the top pixel is always above the bottom pixel, and keep a separate upside down flag
5030      * glCopyTexSubImage is a bit picky about the parameters we pass to it
5031      */
5032     if(dst_rect.top > dst_rect.bottom) {
5033         UINT tmp = dst_rect.bottom;
5034         dst_rect.bottom = dst_rect.top;
5035         dst_rect.top = tmp;
5036         upsidedown = TRUE;
5037     }
5038
5039     if (src_offscreen)
5040     {
5041         TRACE("Reading from an offscreen target\n");
5042         upsidedown = !upsidedown;
5043         glReadBuffer(device->offscreenBuffer);
5044     }
5045     else
5046     {
5047         glReadBuffer(surface_get_gl_buffer(src_surface));
5048     }
5049
5050     /* TODO: Only back up the part that will be overwritten */
5051     glCopyTexSubImage2D(texture_target, 0,
5052                         0, 0 /* read offsets */,
5053                         0, 0,
5054                         fbwidth,
5055                         fbheight);
5056
5057     checkGLcall("glCopyTexSubImage2D");
5058
5059     /* No issue with overriding these - the sampler is dirty due to blit usage */
5060     glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER,
5061             wined3d_gl_mag_filter(magLookup, Filter));
5062     checkGLcall("glTexParameteri");
5063     glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER,
5064             wined3d_gl_min_mip_filter(minMipLookup, Filter, WINED3DTEXF_NONE));
5065     checkGLcall("glTexParameteri");
5066
5067     if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5068         src_swapchain = src_surface->container.u.swapchain;
5069     if (!src_swapchain || src_surface == src_swapchain->back_buffers[0])
5070     {
5071         src = backup ? backup : src_surface->texture_name;
5072     }
5073     else
5074     {
5075         glReadBuffer(GL_FRONT);
5076         checkGLcall("glReadBuffer(GL_FRONT)");
5077
5078         glGenTextures(1, &src);
5079         checkGLcall("glGenTextures(1, &src)");
5080         context_bind_texture(context, GL_TEXTURE_2D, src);
5081
5082         /* TODO: Only copy the part that will be read. Use src_rect->left, src_rect->bottom as origin, but with the width watch
5083          * out for power of 2 sizes
5084          */
5085         glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, src_surface->pow2Width,
5086                 src_surface->pow2Height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
5087         checkGLcall("glTexImage2D");
5088         glCopyTexSubImage2D(GL_TEXTURE_2D, 0,
5089                             0, 0 /* read offsets */,
5090                             0, 0,
5091                             fbwidth,
5092                             fbheight);
5093
5094         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5095         checkGLcall("glTexParameteri");
5096         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5097         checkGLcall("glTexParameteri");
5098
5099         glReadBuffer(GL_BACK);
5100         checkGLcall("glReadBuffer(GL_BACK)");
5101
5102         if(texture_target != GL_TEXTURE_2D) {
5103             glDisable(texture_target);
5104             glEnable(GL_TEXTURE_2D);
5105             texture_target = GL_TEXTURE_2D;
5106         }
5107     }
5108     checkGLcall("glEnd and previous");
5109
5110     left = src_rect->left;
5111     right = src_rect->right;
5112
5113     if (!upsidedown)
5114     {
5115         top = src_surface->resource.height - src_rect->top;
5116         bottom = src_surface->resource.height - src_rect->bottom;
5117     }
5118     else
5119     {
5120         top = src_surface->resource.height - src_rect->bottom;
5121         bottom = src_surface->resource.height - src_rect->top;
5122     }
5123
5124     if (src_surface->flags & SFLAG_NORMCOORD)
5125     {
5126         left /= src_surface->pow2Width;
5127         right /= src_surface->pow2Width;
5128         top /= src_surface->pow2Height;
5129         bottom /= src_surface->pow2Height;
5130     }
5131
5132     /* draw the source texture stretched and upside down. The correct surface is bound already */
5133     glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP);
5134     glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP);
5135
5136     context_set_draw_buffer(context, drawBuffer);
5137     glReadBuffer(drawBuffer);
5138
5139     glBegin(GL_QUADS);
5140         /* bottom left */
5141         glTexCoord2f(left, bottom);
5142         glVertex2i(0, 0);
5143
5144         /* top left */
5145         glTexCoord2f(left, top);
5146         glVertex2i(0, dst_rect.bottom - dst_rect.top);
5147
5148         /* top right */
5149         glTexCoord2f(right, top);
5150         glVertex2i(dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5151
5152         /* bottom right */
5153         glTexCoord2f(right, bottom);
5154         glVertex2i(dst_rect.right - dst_rect.left, 0);
5155     glEnd();
5156     checkGLcall("glEnd and previous");
5157
5158     if (texture_target != dst_surface->texture_target)
5159     {
5160         glDisable(texture_target);
5161         glEnable(dst_surface->texture_target);
5162         texture_target = dst_surface->texture_target;
5163     }
5164
5165     /* Now read the stretched and upside down image into the destination texture */
5166     context_bind_texture(context, texture_target, dst_surface->texture_name);
5167     glCopyTexSubImage2D(texture_target,
5168                         0,
5169                         dst_rect.left, dst_rect.top, /* xoffset, yoffset */
5170                         0, 0, /* We blitted the image to the origin */
5171                         dst_rect.right - dst_rect.left, dst_rect.bottom - dst_rect.top);
5172     checkGLcall("glCopyTexSubImage2D");
5173
5174     if(drawBuffer == GL_BACK) {
5175         /* Write the back buffer backup back */
5176         if(backup) {
5177             if(texture_target != GL_TEXTURE_2D) {
5178                 glDisable(texture_target);
5179                 glEnable(GL_TEXTURE_2D);
5180                 texture_target = GL_TEXTURE_2D;
5181             }
5182             context_bind_texture(context, GL_TEXTURE_2D, backup);
5183         }
5184         else
5185         {
5186             if (texture_target != src_surface->texture_target)
5187             {
5188                 glDisable(texture_target);
5189                 glEnable(src_surface->texture_target);
5190                 texture_target = src_surface->texture_target;
5191             }
5192             context_bind_texture(context, src_surface->texture_target, src_surface->texture_name);
5193         }
5194
5195         glBegin(GL_QUADS);
5196             /* top left */
5197             glTexCoord2f(0.0f, 0.0f);
5198             glVertex2i(0, fbheight);
5199
5200             /* bottom left */
5201             glTexCoord2f(0.0f, (float)fbheight / (float)src_surface->pow2Height);
5202             glVertex2i(0, 0);
5203
5204             /* bottom right */
5205             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width,
5206                     (float)fbheight / (float)src_surface->pow2Height);
5207             glVertex2i(fbwidth, 0);
5208
5209             /* top right */
5210             glTexCoord2f((float)fbwidth / (float)src_surface->pow2Width, 0.0f);
5211             glVertex2i(fbwidth, fbheight);
5212         glEnd();
5213     }
5214     glDisable(texture_target);
5215     checkGLcall("glDisable(texture_target)");
5216
5217     /* Cleanup */
5218     if (src != src_surface->texture_name && src != backup)
5219     {
5220         glDeleteTextures(1, &src);
5221         checkGLcall("glDeleteTextures(1, &src)");
5222     }
5223     if(backup) {
5224         glDeleteTextures(1, &backup);
5225         checkGLcall("glDeleteTextures(1, &backup)");
5226     }
5227
5228     LEAVE_GL();
5229
5230     if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5231
5232     context_release(context);
5233
5234     /* The texture is now most up to date - If the surface is a render target and has a drawable, this
5235      * path is never entered
5236      */
5237     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
5238 }
5239
5240 /* Front buffer coordinates are always full screen coordinates, but our GL
5241  * drawable is limited to the window's client area. The sysmem and texture
5242  * copies do have the full screen size. Note that GL has a bottom-left
5243  * origin, while D3D has a top-left origin. */
5244 void surface_translate_drawable_coords(const struct wined3d_surface *surface, HWND window, RECT *rect)
5245 {
5246     UINT drawable_height;
5247
5248     if (surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5249             && surface == surface->container.u.swapchain->front_buffer)
5250     {
5251         POINT offset = {0, 0};
5252         RECT windowsize;
5253
5254         ScreenToClient(window, &offset);
5255         OffsetRect(rect, offset.x, offset.y);
5256
5257         GetClientRect(window, &windowsize);
5258         drawable_height = windowsize.bottom - windowsize.top;
5259     }
5260     else
5261     {
5262         drawable_height = surface->resource.height;
5263     }
5264
5265     rect->top = drawable_height - rect->top;
5266     rect->bottom = drawable_height - rect->bottom;
5267 }
5268
5269 static void surface_blt_to_drawable(const struct wined3d_device *device,
5270         WINED3DTEXTUREFILTERTYPE filter, BOOL color_key,
5271         struct wined3d_surface *src_surface, const RECT *src_rect_in,
5272         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
5273 {
5274     struct wined3d_context *context;
5275     RECT src_rect, dst_rect;
5276
5277     src_rect = *src_rect_in;
5278     dst_rect = *dst_rect_in;
5279
5280     /* Make sure the surface is up-to-date. This should probably use
5281      * surface_load_location() and worry about the destination surface too,
5282      * unless we're overwriting it completely. */
5283     surface_internal_preload(src_surface, SRGB_RGB);
5284
5285     /* Activate the destination context, set it up for blitting */
5286     context = context_acquire(device, dst_surface);
5287     context_apply_blit_state(context, device);
5288
5289     if (!surface_is_offscreen(dst_surface))
5290         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
5291
5292     device->blitter->set_shader(device->blit_priv, context, src_surface);
5293
5294     ENTER_GL();
5295
5296     if (color_key)
5297     {
5298         glEnable(GL_ALPHA_TEST);
5299         checkGLcall("glEnable(GL_ALPHA_TEST)");
5300
5301         /* When the primary render target uses P8, the alpha component
5302          * contains the palette index. Which means that the colorkey is one of
5303          * the palette entries. In other cases pixels that should be masked
5304          * away have alpha set to 0. */
5305         if (primary_render_target_is_p8(device))
5306             glAlphaFunc(GL_NOTEQUAL, (float)src_surface->SrcBltCKey.dwColorSpaceLowValue / 256.0f);
5307         else
5308             glAlphaFunc(GL_NOTEQUAL, 0.0f);
5309         checkGLcall("glAlphaFunc");
5310     }
5311     else
5312     {
5313         glDisable(GL_ALPHA_TEST);
5314         checkGLcall("glDisable(GL_ALPHA_TEST)");
5315     }
5316
5317     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
5318
5319     if (color_key)
5320     {
5321         glDisable(GL_ALPHA_TEST);
5322         checkGLcall("glDisable(GL_ALPHA_TEST)");
5323     }
5324
5325     LEAVE_GL();
5326
5327     /* Leave the opengl state valid for blitting */
5328     device->blitter->unset_shader(context->gl_info);
5329
5330     if (wined3d_settings.strict_draw_ordering
5331             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
5332             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
5333         wglFlush(); /* Flush to ensure ordering across contexts. */
5334
5335     context_release(context);
5336 }
5337
5338 /* Do not call while under the GL lock. */
5339 HRESULT surface_color_fill(struct wined3d_surface *s, const RECT *rect, const WINED3DCOLORVALUE *color)
5340 {
5341     struct wined3d_device *device = s->resource.device;
5342     const struct blit_shader *blitter;
5343
5344     blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_COLOR_FILL,
5345             NULL, 0, 0, NULL, rect, s->resource.usage, s->resource.pool, s->resource.format);
5346     if (!blitter)
5347     {
5348         FIXME("No blitter is capable of performing the requested color fill operation.\n");
5349         return WINED3DERR_INVALIDCALL;
5350     }
5351
5352     return blitter->color_fill(device, s, rect, color);
5353 }
5354
5355 /* Do not call while under the GL lock. */
5356 static HRESULT IWineD3DSurfaceImpl_BltOverride(struct wined3d_surface *dst_surface, const RECT *dst_rect,
5357         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags, const WINEDDBLTFX *DDBltFx,
5358         WINED3DTEXTUREFILTERTYPE Filter)
5359 {
5360     struct wined3d_device *device = dst_surface->resource.device;
5361     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
5362     struct wined3d_swapchain *srcSwapchain = NULL, *dstSwapchain = NULL;
5363
5364     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, blt_fx %p, filter %s.\n",
5365             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
5366             flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
5367
5368     /* Get the swapchain. One of the surfaces has to be a primary surface */
5369     if (dst_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5370     {
5371         WARN("Destination is in sysmem, rejecting gl blt\n");
5372         return WINED3DERR_INVALIDCALL;
5373     }
5374
5375     if (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5376         dstSwapchain = dst_surface->container.u.swapchain;
5377
5378     if (src_surface)
5379     {
5380         if (src_surface->resource.pool == WINED3DPOOL_SYSTEMMEM)
5381         {
5382             WARN("Src is in sysmem, rejecting gl blt\n");
5383             return WINED3DERR_INVALIDCALL;
5384         }
5385
5386         if (src_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN)
5387             srcSwapchain = src_surface->container.u.swapchain;
5388     }
5389
5390     /* Early sort out of cases where no render target is used */
5391     if (!dstSwapchain && !srcSwapchain
5392             && src_surface != device->fb.render_targets[0]
5393             && dst_surface != device->fb.render_targets[0])
5394     {
5395         TRACE("No surface is render target, not using hardware blit.\n");
5396         return WINED3DERR_INVALIDCALL;
5397     }
5398
5399     /* No destination color keying supported */
5400     if (flags & (WINEDDBLT_KEYDEST | WINEDDBLT_KEYDESTOVERRIDE))
5401     {
5402         /* Can we support that with glBlendFunc if blitting to the frame buffer? */
5403         TRACE("Destination color key not supported in accelerated Blit, falling back to software\n");
5404         return WINED3DERR_INVALIDCALL;
5405     }
5406
5407     if (dstSwapchain && dstSwapchain == srcSwapchain)
5408     {
5409         FIXME("Implement hardware blit between two surfaces on the same swapchain\n");
5410         return WINED3DERR_INVALIDCALL;
5411     }
5412
5413     if (dstSwapchain && srcSwapchain)
5414     {
5415         FIXME("Implement hardware blit between two different swapchains\n");
5416         return WINED3DERR_INVALIDCALL;
5417     }
5418
5419     if (dstSwapchain)
5420     {
5421         /* Handled with regular texture -> swapchain blit */
5422         if (src_surface == device->fb.render_targets[0])
5423             TRACE("Blit from active render target to a swapchain\n");
5424     }
5425     else if (srcSwapchain && dst_surface == device->fb.render_targets[0])
5426     {
5427         FIXME("Implement blit from a swapchain to the active render target\n");
5428         return WINED3DERR_INVALIDCALL;
5429     }
5430
5431     if ((srcSwapchain || src_surface == device->fb.render_targets[0]) && !dstSwapchain)
5432     {
5433         /* Blit from render target to texture */
5434         BOOL stretchx;
5435
5436         /* P8 read back is not implemented */
5437         if (src_surface->resource.format->id == WINED3DFMT_P8_UINT
5438                 || dst_surface->resource.format->id == WINED3DFMT_P8_UINT)
5439         {
5440             TRACE("P8 read back not supported by frame buffer to texture blit\n");
5441             return WINED3DERR_INVALIDCALL;
5442         }
5443
5444         if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
5445         {
5446             TRACE("Color keying not supported by frame buffer to texture blit\n");
5447             return WINED3DERR_INVALIDCALL;
5448             /* Destination color key is checked above */
5449         }
5450
5451         if (dst_rect->right - dst_rect->left != src_rect->right - src_rect->left)
5452             stretchx = TRUE;
5453         else
5454             stretchx = FALSE;
5455
5456         /* Blt is a pretty powerful call, while glCopyTexSubImage2D is not. glCopyTexSubImage cannot
5457          * flip the image nor scale it.
5458          *
5459          * -> If the app asks for a unscaled, upside down copy, just perform one glCopyTexSubImage2D call
5460          * -> If the app wants a image width an unscaled width, copy it line per line
5461          * -> If the app wants a image that is scaled on the x axis, and the destination rectangle is smaller
5462          *    than the frame buffer, draw an upside down scaled image onto the fb, read it back and restore the
5463          *    back buffer. This is slower than reading line per line, thus not used for flipping
5464          * -> If the app wants a scaled image with a dest rect that is bigger than the fb, it has to be copied
5465          *    pixel by pixel. */
5466         if (!stretchx || dst_rect->right - dst_rect->left > src_surface->resource.width
5467                 || dst_rect->bottom - dst_rect->top > src_surface->resource.height)
5468         {
5469             TRACE("No stretching in x direction, using direct framebuffer -> texture copy\n");
5470             fb_copy_to_texture_direct(dst_surface, src_surface, src_rect, dst_rect, Filter);
5471         } else {
5472             TRACE("Using hardware stretching to flip / stretch the texture\n");
5473             fb_copy_to_texture_hwstretch(dst_surface, src_surface, src_rect, dst_rect, Filter);
5474         }
5475
5476         if (!(dst_surface->flags & SFLAG_DONOTFREE))
5477         {
5478             HeapFree(GetProcessHeap(), 0, dst_surface->resource.heapMemory);
5479             dst_surface->resource.allocatedMemory = NULL;
5480             dst_surface->resource.heapMemory = NULL;
5481         }
5482         else
5483         {
5484             dst_surface->flags &= ~SFLAG_INSYSMEM;
5485         }
5486
5487         return WINED3D_OK;
5488     }
5489     else if (src_surface)
5490     {
5491         /* Blit from offscreen surface to render target */
5492         DWORD oldCKeyFlags = src_surface->CKeyFlags;
5493         WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
5494
5495         TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
5496
5497         if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5498                 src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
5499                 dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
5500         {
5501             FIXME("Unsupported blit operation falling back to software\n");
5502             return WINED3DERR_INVALIDCALL;
5503         }
5504
5505         /* Color keying: Check if we have to do a color keyed blt,
5506          * and if not check if a color key is activated.
5507          *
5508          * Just modify the color keying parameters in the surface and restore them afterwards
5509          * The surface keeps track of the color key last used to load the opengl surface.
5510          * PreLoad will catch the change to the flags and color key and reload if necessary.
5511          */
5512         if (flags & WINEDDBLT_KEYSRC)
5513         {
5514             /* Use color key from surface */
5515         }
5516         else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
5517         {
5518             /* Use color key from DDBltFx */
5519             src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
5520             src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
5521         }
5522         else
5523         {
5524             /* Do not use color key */
5525             src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
5526         }
5527
5528         surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
5529                 src_surface, src_rect, dst_surface, dst_rect);
5530
5531         /* Restore the color key parameters */
5532         src_surface->CKeyFlags = oldCKeyFlags;
5533         src_surface->SrcBltCKey = oldBltCKey;
5534
5535         surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
5536
5537         return WINED3D_OK;
5538     }
5539
5540     /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
5541     TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
5542     return WINED3DERR_INVALIDCALL;
5543 }
5544
5545 /* GL locking is done by the caller */
5546 static void surface_depth_blt(const struct wined3d_surface *surface, struct wined3d_context *context,
5547         GLuint texture, GLint x, GLint y, GLsizei w, GLsizei h, GLenum target)
5548 {
5549     struct wined3d_device *device = surface->resource.device;
5550     const struct wined3d_gl_info *gl_info = context->gl_info;
5551     GLint compare_mode = GL_NONE;
5552     struct blt_info info;
5553     GLint old_binding = 0;
5554     RECT rect;
5555
5556     glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
5557
5558     glDisable(GL_CULL_FACE);
5559     glDisable(GL_BLEND);
5560     glDisable(GL_ALPHA_TEST);
5561     glDisable(GL_SCISSOR_TEST);
5562     glDisable(GL_STENCIL_TEST);
5563     glEnable(GL_DEPTH_TEST);
5564     glDepthFunc(GL_ALWAYS);
5565     glDepthMask(GL_TRUE);
5566     glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
5567     glViewport(x, y, w, h);
5568
5569     SetRect(&rect, 0, h, w, 0);
5570     surface_get_blt_info(target, &rect, surface->pow2Width, surface->pow2Height, &info);
5571     context_active_texture(context, context->gl_info, 0);
5572     glGetIntegerv(info.binding, &old_binding);
5573     glBindTexture(info.bind_target, texture);
5574     if (gl_info->supported[ARB_SHADOW])
5575     {
5576         glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
5577         if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
5578     }
5579
5580     device->shader_backend->shader_select_depth_blt(device->shader_priv,
5581             gl_info, info.tex_type, &surface->ds_current_size);
5582
5583     glBegin(GL_TRIANGLE_STRIP);
5584     glTexCoord3fv(info.coords[0]);
5585     glVertex2f(-1.0f, -1.0f);
5586     glTexCoord3fv(info.coords[1]);
5587     glVertex2f(1.0f, -1.0f);
5588     glTexCoord3fv(info.coords[2]);
5589     glVertex2f(-1.0f, 1.0f);
5590     glTexCoord3fv(info.coords[3]);
5591     glVertex2f(1.0f, 1.0f);
5592     glEnd();
5593
5594     if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
5595     glBindTexture(info.bind_target, old_binding);
5596
5597     glPopAttrib();
5598
5599     device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
5600 }
5601
5602 void surface_modify_ds_location(struct wined3d_surface *surface,
5603         DWORD location, UINT w, UINT h)
5604 {
5605     TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
5606
5607     if (location & ~SFLAG_DS_LOCATIONS)
5608         FIXME("Invalid location (%#x) specified.\n", location);
5609
5610     surface->ds_current_size.cx = w;
5611     surface->ds_current_size.cy = h;
5612     surface->flags &= ~SFLAG_DS_LOCATIONS;
5613     surface->flags |= location;
5614 }
5615
5616 /* Context activation is done by the caller. */
5617 void surface_load_ds_location(struct wined3d_surface *surface, struct wined3d_context *context, DWORD location)
5618 {
5619     struct wined3d_device *device = surface->resource.device;
5620     GLsizei w, h;
5621
5622     TRACE("surface %p, new location %#x.\n", surface, location);
5623
5624     /* TODO: Make this work for modes other than FBO */
5625     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
5626
5627     if (!(surface->flags & location))
5628     {
5629         w = surface->ds_current_size.cx;
5630         h = surface->ds_current_size.cy;
5631         surface->ds_current_size.cx = 0;
5632         surface->ds_current_size.cy = 0;
5633     }
5634     else
5635     {
5636         w = surface->resource.width;
5637         h = surface->resource.height;
5638     }
5639
5640     if (surface->ds_current_size.cx == surface->resource.width
5641             && surface->ds_current_size.cy == surface->resource.height)
5642     {
5643         TRACE("Location (%#x) is already up to date.\n", location);
5644         return;
5645     }
5646
5647     if (surface->current_renderbuffer)
5648     {
5649         FIXME("Not supported with fixed up depth stencil.\n");
5650         return;
5651     }
5652
5653     if (!(surface->flags & SFLAG_DS_LOCATIONS))
5654     {
5655         /* This mostly happens when a depth / stencil is used without being
5656          * cleared first. In principle we could upload from sysmem, or
5657          * explicitly clear before first usage. For the moment there don't
5658          * appear to be a lot of applications depending on this, so a FIXME
5659          * should do. */
5660         FIXME("No up to date depth stencil location.\n");
5661         surface->flags |= location;
5662         surface->ds_current_size.cx = surface->resource.width;
5663         surface->ds_current_size.cy = surface->resource.height;
5664         return;
5665     }
5666
5667     if (location == SFLAG_DS_OFFSCREEN)
5668     {
5669         GLint old_binding = 0;
5670         GLenum bind_target;
5671
5672         /* The render target is allowed to be smaller than the depth/stencil
5673          * buffer, so the onscreen depth/stencil buffer is potentially smaller
5674          * than the offscreen surface. Don't overwrite the offscreen surface
5675          * with undefined data. */
5676         w = min(w, context->swapchain->presentParms.BackBufferWidth);
5677         h = min(h, context->swapchain->presentParms.BackBufferHeight);
5678
5679         TRACE("Copying onscreen depth buffer to depth texture.\n");
5680
5681         ENTER_GL();
5682
5683         if (!device->depth_blt_texture)
5684         {
5685             glGenTextures(1, &device->depth_blt_texture);
5686         }
5687
5688         /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
5689          * directly on the FBO texture. That's because we need to flip. */
5690         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5691                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5692         if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
5693         {
5694             glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
5695             bind_target = GL_TEXTURE_RECTANGLE_ARB;
5696         }
5697         else
5698         {
5699             glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
5700             bind_target = GL_TEXTURE_2D;
5701         }
5702         glBindTexture(bind_target, device->depth_blt_texture);
5703         /* We use GL_DEPTH_COMPONENT instead of the surface's specific
5704          * internal format, because the internal format might include stencil
5705          * data. In principle we should copy stencil data as well, but unless
5706          * the driver supports stencil export it's hard to do, and doesn't
5707          * seem to be needed in practice. If the hardware doesn't support
5708          * writing stencil data, the glCopyTexImage2D() call might trigger
5709          * software fallbacks. */
5710         glCopyTexImage2D(bind_target, 0, GL_DEPTH_COMPONENT, 0, 0, w, h, 0);
5711         glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
5712         glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
5713         glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
5714         glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
5715         glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
5716         glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
5717         glBindTexture(bind_target, old_binding);
5718
5719         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5720                 NULL, surface, SFLAG_INTEXTURE);
5721         context_set_draw_buffer(context, GL_NONE);
5722         glReadBuffer(GL_NONE);
5723
5724         /* Do the actual blit */
5725         surface_depth_blt(surface, context, device->depth_blt_texture, 0, 0, w, h, bind_target);
5726         checkGLcall("depth_blt");
5727
5728         context_invalidate_state(context, STATE_FRAMEBUFFER);
5729
5730         LEAVE_GL();
5731
5732         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5733     }
5734     else if (location == SFLAG_DS_ONSCREEN)
5735     {
5736         TRACE("Copying depth texture to onscreen depth buffer.\n");
5737
5738         ENTER_GL();
5739
5740         context_apply_fbo_state_blit(context, GL_FRAMEBUFFER,
5741                 context->swapchain->front_buffer, NULL, SFLAG_INDRAWABLE);
5742         surface_depth_blt(surface, context, surface->texture_name,
5743                 0, surface->pow2Height - h, w, h, surface->texture_target);
5744         checkGLcall("depth_blt");
5745
5746         context_invalidate_state(context, STATE_FRAMEBUFFER);
5747
5748         LEAVE_GL();
5749
5750         if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
5751     }
5752     else
5753     {
5754         ERR("Invalid location (%#x) specified.\n", location);
5755     }
5756
5757     surface->flags |= location;
5758     surface->ds_current_size.cx = surface->resource.width;
5759     surface->ds_current_size.cy = surface->resource.height;
5760 }
5761
5762 void surface_modify_location(struct wined3d_surface *surface, DWORD location, BOOL persistent)
5763 {
5764     const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
5765     struct wined3d_surface *overlay;
5766
5767     TRACE("surface %p, location %s, persistent %#x.\n",
5768             surface, debug_surflocation(location), persistent);
5769
5770     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface)
5771             && (location & SFLAG_INDRAWABLE))
5772         ERR("Trying to invalidate the SFLAG_INDRAWABLE location of an offscreen surface.\n");
5773
5774     if (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
5775             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
5776         location |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
5777
5778     if (persistent)
5779     {
5780         if (((surface->flags & SFLAG_INTEXTURE) && !(location & SFLAG_INTEXTURE))
5781                 || ((surface->flags & SFLAG_INSRGBTEX) && !(location & SFLAG_INSRGBTEX)))
5782         {
5783             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5784             {
5785                 TRACE("Passing to container.\n");
5786                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5787             }
5788         }
5789         surface->flags &= ~SFLAG_LOCATIONS;
5790         surface->flags |= location;
5791
5792         /* Redraw emulated overlays, if any */
5793         if (location & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
5794         {
5795             LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, struct wined3d_surface, overlay_entry)
5796             {
5797                 overlay->surface_ops->surface_draw_overlay(overlay);
5798             }
5799         }
5800     }
5801     else
5802     {
5803         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (location & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
5804         {
5805             if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
5806             {
5807                 TRACE("Passing to container\n");
5808                 wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
5809             }
5810         }
5811         surface->flags &= ~location;
5812     }
5813
5814     if (!(surface->flags & SFLAG_LOCATIONS))
5815     {
5816         ERR("Surface %p does not have any up to date location.\n", surface);
5817     }
5818 }
5819
5820 static DWORD resource_access_from_location(DWORD location)
5821 {
5822     switch (location)
5823     {
5824         case SFLAG_INSYSMEM:
5825             return WINED3D_RESOURCE_ACCESS_CPU;
5826
5827         case SFLAG_INDRAWABLE:
5828         case SFLAG_INSRGBTEX:
5829         case SFLAG_INTEXTURE:
5830         case SFLAG_INRB_MULTISAMPLE:
5831         case SFLAG_INRB_RESOLVED:
5832             return WINED3D_RESOURCE_ACCESS_GPU;
5833
5834         default:
5835             FIXME("Unhandled location %#x.\n", location);
5836             return 0;
5837     }
5838 }
5839
5840 static void surface_load_sysmem(struct wined3d_surface *surface,
5841         const struct wined3d_gl_info *gl_info, const RECT *rect)
5842 {
5843     surface_prepare_system_memory(surface);
5844
5845     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED))
5846         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5847
5848     /* Download the surface to system memory. */
5849     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
5850     {
5851         struct wined3d_device *device = surface->resource.device;
5852         struct wined3d_context *context;
5853
5854         /* TODO: Use already acquired context when possible. */
5855         context = context_acquire(device, NULL);
5856
5857         surface_bind_and_dirtify(surface, context, !(surface->flags & SFLAG_INTEXTURE));
5858         surface_download_data(surface, gl_info);
5859
5860         context_release(context);
5861
5862         return;
5863     }
5864
5865     if (surface->flags & SFLAG_INDRAWABLE)
5866     {
5867         read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
5868                 wined3d_surface_get_pitch(surface));
5869         return;
5870     }
5871
5872     FIXME("Can't load surface %p with location flags %#x into sysmem.\n",
5873             surface, surface->flags & SFLAG_LOCATIONS);
5874 }
5875
5876 static HRESULT surface_load_drawable(struct wined3d_surface *surface,
5877         const struct wined3d_gl_info *gl_info, const RECT *rect)
5878 {
5879     struct wined3d_device *device = surface->resource.device;
5880     struct wined3d_format format;
5881     CONVERT_TYPES convert;
5882     UINT byte_count;
5883     BYTE *mem;
5884
5885     if (wined3d_settings.offscreen_rendering_mode == ORM_FBO && surface_is_offscreen(surface))
5886     {
5887         ERR("Trying to load offscreen surface into SFLAG_INDRAWABLE.\n");
5888         return WINED3DERR_INVALIDCALL;
5889     }
5890
5891     if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
5892         surface_load_location(surface, SFLAG_INTEXTURE, NULL);
5893
5894     if (surface->flags & SFLAG_INTEXTURE)
5895     {
5896         RECT r;
5897
5898         surface_get_rect(surface, rect, &r);
5899         surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
5900
5901         return WINED3D_OK;
5902     }
5903
5904     if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
5905     {
5906         /* This needs colorspace conversion from sRGB to RGB. We take the slow
5907          * path through sysmem. */
5908         surface_load_location(surface, SFLAG_INSYSMEM, rect);
5909     }
5910
5911     d3dfmt_get_conv(surface, FALSE, FALSE, &format, &convert);
5912
5913     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
5914      * SFLAG_CONVERTED but it isn't set (yet) in all cases where it is getting
5915      * called. */
5916     if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
5917     {
5918         struct wined3d_context *context;
5919
5920         TRACE("Removing the pbo attached to surface %p.\n", surface);
5921
5922         /* TODO: Use already acquired context when possible. */
5923         context = context_acquire(device, NULL);
5924
5925         surface_remove_pbo(surface, gl_info);
5926
5927         context_release(context);
5928     }
5929
5930     if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
5931     {
5932         UINT height = surface->resource.height;
5933         UINT width = surface->resource.width;
5934         UINT src_pitch, dst_pitch;
5935
5936         byte_count = format.conv_byte_count;
5937         src_pitch = wined3d_surface_get_pitch(surface);
5938
5939         /* Stick to the alignment for the converted surface too, makes it
5940          * easier to load the surface. */
5941         dst_pitch = width * byte_count;
5942         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
5943
5944         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
5945         {
5946             ERR("Out of memory (%u).\n", dst_pitch * height);
5947             return E_OUTOFMEMORY;
5948         }
5949
5950         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem,
5951                 src_pitch, width, height, dst_pitch, convert, surface);
5952
5953         surface->flags |= SFLAG_CONVERTED;
5954     }
5955     else
5956     {
5957         surface->flags &= ~SFLAG_CONVERTED;
5958         mem = surface->resource.allocatedMemory;
5959         byte_count = format.byte_count;
5960     }
5961
5962     flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
5963
5964     /* Don't delete PBO memory. */
5965     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
5966         HeapFree(GetProcessHeap(), 0, mem);
5967
5968     return WINED3D_OK;
5969 }
5970
5971 static HRESULT surface_load_texture(struct wined3d_surface *surface,
5972         const struct wined3d_gl_info *gl_info, const RECT *rect, BOOL srgb)
5973 {
5974     RECT src_rect = {0, 0, surface->resource.width, surface->resource.height};
5975     struct wined3d_device *device = surface->resource.device;
5976     struct wined3d_context *context;
5977     UINT width, src_pitch, dst_pitch;
5978     struct wined3d_bo_address data;
5979     struct wined3d_format format;
5980     POINT dst_point = {0, 0};
5981     CONVERT_TYPES convert;
5982     BYTE *mem;
5983
5984     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
5985             && surface_is_offscreen(surface)
5986             && (surface->flags & SFLAG_INDRAWABLE))
5987     {
5988         surface_load_fb_texture(surface, srgb);
5989
5990         return WINED3D_OK;
5991     }
5992
5993     if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
5994             && (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB)
5995             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
5996                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
5997                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
5998     {
5999         if (srgb)
6000             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INTEXTURE,
6001                     &src_rect, surface, SFLAG_INSRGBTEX, &src_rect);
6002         else
6003             surface_blt_fbo(device, WINED3DTEXF_POINT, surface, SFLAG_INSRGBTEX,
6004                     &src_rect, surface, SFLAG_INTEXTURE, &src_rect);
6005
6006         return WINED3D_OK;
6007     }
6008
6009     if (surface->flags & (SFLAG_INRB_MULTISAMPLE | SFLAG_INRB_RESOLVED)
6010             && (!srgb || (surface->resource.format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB))
6011             && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
6012                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
6013                 NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
6014     {
6015         DWORD src_location = surface->flags & SFLAG_INRB_RESOLVED ? SFLAG_INRB_RESOLVED : SFLAG_INRB_MULTISAMPLE;
6016         DWORD dst_location = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
6017         RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6018
6019         surface_blt_fbo(device, WINED3DTEXF_POINT, surface, src_location,
6020                 &rect, surface, dst_location, &rect);
6021
6022         return WINED3D_OK;
6023     }
6024
6025     /* Upload from system memory */
6026
6027     d3dfmt_get_conv(surface, TRUE /* We need color keying */,
6028             TRUE /* We will use textures */, &format, &convert);
6029
6030     if (srgb)
6031     {
6032         if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
6033         {
6034             /* Performance warning... */
6035             FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
6036             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6037         }
6038     }
6039     else
6040     {
6041         if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
6042         {
6043             /* Performance warning... */
6044             FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
6045             surface_load_location(surface, SFLAG_INSYSMEM, rect);
6046         }
6047     }
6048
6049     if (!(surface->flags & SFLAG_INSYSMEM))
6050     {
6051         WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
6052         /* Lets hope we get it from somewhere... */
6053         surface_load_location(surface, SFLAG_INSYSMEM, rect);
6054     }
6055
6056     /* TODO: Use already acquired context when possible. */
6057     context = context_acquire(device, NULL);
6058
6059     surface_prepare_texture(surface, context, srgb);
6060     surface_bind_and_dirtify(surface, context, srgb);
6061
6062     if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
6063     {
6064         surface->flags |= SFLAG_GLCKEY;
6065         surface->glCKey = surface->SrcBltCKey;
6066     }
6067     else surface->flags &= ~SFLAG_GLCKEY;
6068
6069     width = surface->resource.width;
6070     src_pitch = wined3d_surface_get_pitch(surface);
6071
6072     /* Don't use PBOs for converted surfaces. During PBO conversion we look at
6073      * SFLAG_CONVERTED but it isn't set (yet) in all cases it is getting
6074      * called. */
6075     if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
6076     {
6077         TRACE("Removing the pbo attached to surface %p.\n", surface);
6078         surface_remove_pbo(surface, gl_info);
6079     }
6080
6081     if (format.convert)
6082     {
6083         /* This code is entered for texture formats which need a fixup. */
6084         UINT height = surface->resource.height;
6085
6086         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6087         dst_pitch = width * format.conv_byte_count;
6088         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6089
6090         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6091         {
6092             ERR("Out of memory (%u).\n", dst_pitch * height);
6093             context_release(context);
6094             return E_OUTOFMEMORY;
6095         }
6096         format.convert(surface->resource.allocatedMemory, mem, src_pitch, width, height);
6097     }
6098     else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
6099     {
6100         /* This code is only entered for color keying fixups */
6101         UINT height = surface->resource.height;
6102
6103         /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
6104         dst_pitch = width * format.conv_byte_count;
6105         dst_pitch = (dst_pitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
6106
6107         if (!(mem = HeapAlloc(GetProcessHeap(), 0, dst_pitch * height)))
6108         {
6109             ERR("Out of memory (%u).\n", dst_pitch * height);
6110             context_release(context);
6111             return E_OUTOFMEMORY;
6112         }
6113         d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, src_pitch,
6114                 width, height, dst_pitch, convert, surface);
6115     }
6116     else
6117     {
6118         mem = surface->resource.allocatedMemory;
6119     }
6120
6121     data.buffer_object = surface->flags & SFLAG_PBO ? surface->pbo : 0;
6122     data.addr = mem;
6123     surface_upload_data(surface, gl_info, &format, &src_rect, src_pitch, &dst_point, srgb, &data);
6124
6125     context_release(context);
6126
6127     /* Don't delete PBO memory. */
6128     if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
6129         HeapFree(GetProcessHeap(), 0, mem);
6130
6131     return WINED3D_OK;
6132 }
6133
6134 static void surface_multisample_resolve(struct wined3d_surface *surface)
6135 {
6136     RECT rect = {0, 0, surface->resource.width, surface->resource.height};
6137
6138     if (!(surface->flags & SFLAG_INRB_MULTISAMPLE))
6139         ERR("Trying to resolve multisampled surface %p, but location SFLAG_INRB_MULTISAMPLE not current.\n", surface);
6140
6141     surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
6142             surface, SFLAG_INRB_MULTISAMPLE, &rect, surface, SFLAG_INRB_RESOLVED, &rect);
6143 }
6144
6145 HRESULT surface_load_location(struct wined3d_surface *surface, DWORD location, const RECT *rect)
6146 {
6147     struct wined3d_device *device = surface->resource.device;
6148     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
6149     HRESULT hr;
6150
6151     TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(location), wine_dbgstr_rect(rect));
6152
6153     if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
6154     {
6155         if (location == SFLAG_INTEXTURE)
6156         {
6157             struct wined3d_context *context = context_acquire(device, NULL);
6158             surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
6159             context_release(context);
6160             return WINED3D_OK;
6161         }
6162         else
6163         {
6164             FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(location));
6165             return WINED3DERR_INVALIDCALL;
6166         }
6167     }
6168
6169     if (location == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6170         location = SFLAG_INTEXTURE;
6171
6172     if (surface->flags & location)
6173     {
6174         TRACE("Location already up to date.\n");
6175         return WINED3D_OK;
6176     }
6177
6178     if (WARN_ON(d3d_surface))
6179     {
6180         DWORD required_access = resource_access_from_location(location);
6181         if ((surface->resource.access_flags & required_access) != required_access)
6182             WARN("Operation requires %#x access, but surface only has %#x.\n",
6183                     required_access, surface->resource.access_flags);
6184     }
6185
6186     if (!(surface->flags & SFLAG_LOCATIONS))
6187     {
6188         ERR("Surface %p does not have any up to date location.\n", surface);
6189         surface->flags |= SFLAG_LOST;
6190         return WINED3DERR_DEVICELOST;
6191     }
6192
6193     switch (location)
6194     {
6195         case SFLAG_INSYSMEM:
6196             surface_load_sysmem(surface, gl_info, rect);
6197             break;
6198
6199         case SFLAG_INDRAWABLE:
6200             if (FAILED(hr = surface_load_drawable(surface, gl_info, rect)))
6201                 return hr;
6202             break;
6203
6204         case SFLAG_INRB_RESOLVED:
6205             surface_multisample_resolve(surface);
6206             break;
6207
6208         case SFLAG_INTEXTURE:
6209         case SFLAG_INSRGBTEX:
6210             if (FAILED(hr = surface_load_texture(surface, gl_info, rect, location == SFLAG_INSRGBTEX)))
6211                 return hr;
6212             break;
6213
6214         default:
6215             ERR("Don't know how to handle location %#x.\n", location);
6216             break;
6217     }
6218
6219     if (!rect)
6220     {
6221         surface->flags |= location;
6222
6223         if (location != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
6224             surface_evict_sysmem(surface);
6225     }
6226
6227     if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
6228             && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
6229     {
6230         surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
6231     }
6232
6233     return WINED3D_OK;
6234 }
6235
6236 BOOL surface_is_offscreen(const struct wined3d_surface *surface)
6237 {
6238     struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
6239
6240     /* Not on a swapchain - must be offscreen */
6241     if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
6242
6243     /* The front buffer is always onscreen */
6244     if (surface == swapchain->front_buffer) return FALSE;
6245
6246     /* If the swapchain is rendered to an FBO, the backbuffer is
6247      * offscreen, otherwise onscreen */
6248     return swapchain->render_to_fbo;
6249 }
6250
6251 static HRESULT ffp_blit_alloc(struct wined3d_device *device) { return WINED3D_OK; }
6252 /* Context activation is done by the caller. */
6253 static void ffp_blit_free(struct wined3d_device *device) { }
6254
6255 /* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
6256 /* Context activation is done by the caller. */
6257 static void ffp_blit_p8_upload_palette(const struct wined3d_surface *surface, const struct wined3d_gl_info *gl_info)
6258 {
6259     BYTE table[256][4];
6260     BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
6261
6262     d3dfmt_p8_init_palette(surface, table, colorkey_active);
6263
6264     TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
6265     ENTER_GL();
6266     GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
6267     LEAVE_GL();
6268 }
6269
6270 /* Context activation is done by the caller. */
6271 static HRESULT ffp_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6272 {
6273     enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
6274
6275     /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
6276      * else the surface is converted in software at upload time in LoadLocation.
6277      */
6278     if(fixup == COMPLEX_FIXUP_P8 && context->gl_info->supported[EXT_PALETTED_TEXTURE])
6279         ffp_blit_p8_upload_palette(surface, context->gl_info);
6280
6281     ENTER_GL();
6282     glEnable(surface->texture_target);
6283     checkGLcall("glEnable(surface->texture_target)");
6284     LEAVE_GL();
6285     return WINED3D_OK;
6286 }
6287
6288 /* Context activation is done by the caller. */
6289 static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
6290 {
6291     ENTER_GL();
6292     glDisable(GL_TEXTURE_2D);
6293     checkGLcall("glDisable(GL_TEXTURE_2D)");
6294     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
6295     {
6296         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
6297         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
6298     }
6299     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
6300     {
6301         glDisable(GL_TEXTURE_RECTANGLE_ARB);
6302         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
6303     }
6304     LEAVE_GL();
6305 }
6306
6307 static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6308         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6309         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6310 {
6311     enum complex_fixup src_fixup;
6312
6313     switch (blit_op)
6314     {
6315         case WINED3D_BLIT_OP_COLOR_BLIT:
6316             if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
6317                 return FALSE;
6318
6319             src_fixup = get_complex_fixup(src_format->color_fixup);
6320             if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
6321             {
6322                 TRACE("Checking support for fixup:\n");
6323                 dump_color_fixup_desc(src_format->color_fixup);
6324             }
6325
6326             if (!is_identity_fixup(dst_format->color_fixup))
6327             {
6328                 TRACE("Destination fixups are not supported\n");
6329                 return FALSE;
6330             }
6331
6332             if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
6333             {
6334                 TRACE("P8 fixup supported\n");
6335                 return TRUE;
6336             }
6337
6338             /* We only support identity conversions. */
6339             if (is_identity_fixup(src_format->color_fixup))
6340             {
6341                 TRACE("[OK]\n");
6342                 return TRUE;
6343             }
6344
6345             TRACE("[FAILED]\n");
6346             return FALSE;
6347
6348         case WINED3D_BLIT_OP_COLOR_FILL:
6349             if (dst_pool == WINED3DPOOL_SYSTEMMEM)
6350                 return FALSE;
6351
6352             if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
6353             {
6354                 if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
6355                     return FALSE;
6356             }
6357             else if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
6358             {
6359                 TRACE("Color fill not supported\n");
6360                 return FALSE;
6361             }
6362
6363             /* FIXME: We should reject color fills on formats with fixups,
6364              * but this would break P8 color fills for example. */
6365
6366             return TRUE;
6367
6368         case WINED3D_BLIT_OP_DEPTH_FILL:
6369             return TRUE;
6370
6371         default:
6372             TRACE("Unsupported blit_op=%d\n", blit_op);
6373             return FALSE;
6374     }
6375 }
6376
6377 /* Do not call while under the GL lock. */
6378 static HRESULT ffp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
6379         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
6380 {
6381     const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
6382     struct wined3d_fb_state fb = {&dst_surface, NULL};
6383
6384     return device_clear_render_targets(device, 1, &fb,
6385             1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
6386 }
6387
6388 /* Do not call while under the GL lock. */
6389 static HRESULT ffp_blit_depth_fill(struct wined3d_device *device,
6390         struct wined3d_surface *surface, const RECT *rect, float depth)
6391 {
6392     const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
6393     struct wined3d_fb_state fb = {NULL, surface};
6394
6395     return device_clear_render_targets(device, 0, &fb,
6396             1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
6397 }
6398
6399 const struct blit_shader ffp_blit =  {
6400     ffp_blit_alloc,
6401     ffp_blit_free,
6402     ffp_blit_set,
6403     ffp_blit_unset,
6404     ffp_blit_supported,
6405     ffp_blit_color_fill,
6406     ffp_blit_depth_fill,
6407 };
6408
6409 static HRESULT cpu_blit_alloc(struct wined3d_device *device)
6410 {
6411     return WINED3D_OK;
6412 }
6413
6414 /* Context activation is done by the caller. */
6415 static void cpu_blit_free(struct wined3d_device *device)
6416 {
6417 }
6418
6419 /* Context activation is done by the caller. */
6420 static HRESULT cpu_blit_set(void *blit_priv, struct wined3d_context *context, struct wined3d_surface *surface)
6421 {
6422     return WINED3D_OK;
6423 }
6424
6425 /* Context activation is done by the caller. */
6426 static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
6427 {
6428 }
6429
6430 static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
6431         const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
6432         const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
6433 {
6434     if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
6435     {
6436         return TRUE;
6437     }
6438
6439     return FALSE;
6440 }
6441
6442 static HRESULT surface_cpu_blt_compressed(const BYTE *src_data, BYTE *dst_data,
6443         UINT src_pitch, UINT dst_pitch, UINT update_w, UINT update_h,
6444         const struct wined3d_format *format, DWORD flags, const WINEDDBLTFX *fx)
6445 {
6446     UINT row_block_count;
6447     const BYTE *src_row;
6448     BYTE *dst_row;
6449     UINT x, y;
6450
6451     src_row = src_data;
6452     dst_row = dst_data;
6453
6454     row_block_count = (update_w + format->block_width - 1) / format->block_width;
6455
6456     if (!flags)
6457     {
6458         for (y = 0; y < update_h; y += format->block_height)
6459         {
6460             memcpy(dst_row, src_row, row_block_count * format->block_byte_count);
6461             src_row += src_pitch;
6462             dst_row += dst_pitch;
6463         }
6464
6465         return WINED3D_OK;
6466     }
6467
6468     if (flags == WINEDDBLT_DDFX && fx->dwDDFX == WINEDDBLTFX_MIRRORUPDOWN)
6469     {
6470         src_row += (((update_h / format->block_height) - 1) * src_pitch);
6471
6472         switch (format->id)
6473         {
6474             case WINED3DFMT_DXT1:
6475                 for (y = 0; y < update_h; y += format->block_height)
6476                 {
6477                     struct block
6478                     {
6479                         WORD color[2];
6480                         BYTE control_row[4];
6481                     };
6482
6483                     const struct block *s = (const struct block *)src_row;
6484                     struct block *d = (struct block *)dst_row;
6485
6486                     for (x = 0; x < row_block_count; ++x)
6487                     {
6488                         d[x].color[0] = s[x].color[0];
6489                         d[x].color[1] = s[x].color[1];
6490                         d[x].control_row[0] = s[x].control_row[3];
6491                         d[x].control_row[1] = s[x].control_row[2];
6492                         d[x].control_row[2] = s[x].control_row[1];
6493                         d[x].control_row[3] = s[x].control_row[0];
6494                     }
6495                     src_row -= src_pitch;
6496                     dst_row += dst_pitch;
6497                 }
6498                 return WINED3D_OK;
6499
6500             case WINED3DFMT_DXT3:
6501                 for (y = 0; y < update_h; y += format->block_height)
6502                 {
6503                     struct block
6504                     {
6505                         WORD alpha_row[4];
6506                         WORD color[2];
6507                         BYTE control_row[4];
6508                     };
6509
6510                     const struct block *s = (const struct block *)src_row;
6511                     struct block *d = (struct block *)dst_row;
6512
6513                     for (x = 0; x < row_block_count; ++x)
6514                     {
6515                         d[x].alpha_row[0] = s[x].alpha_row[3];
6516                         d[x].alpha_row[1] = s[x].alpha_row[2];
6517                         d[x].alpha_row[2] = s[x].alpha_row[1];
6518                         d[x].alpha_row[3] = s[x].alpha_row[0];
6519                         d[x].color[0] = s[x].color[0];
6520                         d[x].color[1] = s[x].color[1];
6521                         d[x].control_row[0] = s[x].control_row[3];
6522                         d[x].control_row[1] = s[x].control_row[2];
6523                         d[x].control_row[2] = s[x].control_row[1];
6524                         d[x].control_row[3] = s[x].control_row[0];
6525                     }
6526                     src_row -= src_pitch;
6527                     dst_row += dst_pitch;
6528                 }
6529                 return WINED3D_OK;
6530
6531             default:
6532                 FIXME("Compressed flip not implemented for format %s.\n",
6533                         debug_d3dformat(format->id));
6534                 return E_NOTIMPL;
6535         }
6536     }
6537
6538     FIXME("Unsupported blit on compressed surface (format %s, flags %#x, DDFX %#x).\n",
6539             debug_d3dformat(format->id), flags, flags & WINEDDBLT_DDFX ? fx->dwDDFX : 0);
6540
6541     return E_NOTIMPL;
6542 }
6543
6544 static HRESULT surface_cpu_blt(struct wined3d_surface *dst_surface, const RECT *dst_rect,
6545         struct wined3d_surface *src_surface, const RECT *src_rect, DWORD flags,
6546         const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
6547 {
6548     int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
6549     const struct wined3d_format *src_format, *dst_format;
6550     struct wined3d_surface *orig_src = src_surface;
6551     WINED3DLOCKED_RECT dlock, slock;
6552     HRESULT hr = WINED3D_OK;
6553     const BYTE *sbuf;
6554     RECT xdst,xsrc;
6555     BYTE *dbuf;
6556     int x, y;
6557
6558     TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
6559             dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
6560             flags, fx, debug_d3dtexturefiltertype(filter));
6561
6562     xsrc = *src_rect;
6563
6564     if (!src_surface)
6565     {
6566         RECT full_rect;
6567
6568         full_rect.left = 0;
6569         full_rect.top = 0;
6570         full_rect.right = dst_surface->resource.width;
6571         full_rect.bottom = dst_surface->resource.height;
6572         IntersectRect(&xdst, &full_rect, dst_rect);
6573     }
6574     else
6575     {
6576         BOOL clip_horiz, clip_vert;
6577
6578         xdst = *dst_rect;
6579         clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
6580         clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
6581
6582         if (clip_vert || clip_horiz)
6583         {
6584             /* Now check if this is a special case or not... */
6585             if ((flags & WINEDDBLT_DDFX)
6586                     || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
6587                     || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
6588             {
6589                 WARN("Out of screen rectangle in special case. Not handled right now.\n");
6590                 return WINED3D_OK;
6591             }
6592
6593             if (clip_horiz)
6594             {
6595                 if (xdst.left < 0)
6596                 {
6597                     xsrc.left -= xdst.left;
6598                     xdst.left = 0;
6599                 }
6600                 if (xdst.right > dst_surface->resource.width)
6601                 {
6602                     xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
6603                     xdst.right = (int)dst_surface->resource.width;
6604                 }
6605             }
6606
6607             if (clip_vert)
6608             {
6609                 if (xdst.top < 0)
6610                 {
6611                     xsrc.top -= xdst.top;
6612                     xdst.top = 0;
6613                 }
6614                 if (xdst.bottom > dst_surface->resource.height)
6615                 {
6616                     xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
6617                     xdst.bottom = (int)dst_surface->resource.height;
6618                 }
6619             }
6620
6621             /* And check if after clipping something is still to be done... */
6622             if ((xdst.right <= 0) || (xdst.bottom <= 0)
6623                     || (xdst.left >= (int)dst_surface->resource.width)
6624                     || (xdst.top >= (int)dst_surface->resource.height)
6625                     || (xsrc.right <= 0) || (xsrc.bottom <= 0)
6626                     || (xsrc.left >= (int)src_surface->resource.width)
6627                     || (xsrc.top >= (int)src_surface->resource.height))
6628             {
6629                 TRACE("Nothing to be done after clipping.\n");
6630                 return WINED3D_OK;
6631             }
6632         }
6633     }
6634
6635     if (src_surface == dst_surface)
6636     {
6637         wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6638         slock = dlock;
6639         src_format = dst_surface->resource.format;
6640         dst_format = src_format;
6641     }
6642     else
6643     {
6644         dst_format = dst_surface->resource.format;
6645         if (src_surface)
6646         {
6647             if (dst_surface->resource.format->id != src_surface->resource.format->id)
6648             {
6649                 src_surface = surface_convert_format(src_surface, dst_format->id);
6650                 if (!src_surface)
6651                 {
6652                     /* The conv function writes a FIXME */
6653                     WARN("Cannot convert source surface format to dest format.\n");
6654                     goto release;
6655                 }
6656             }
6657             wined3d_surface_map(src_surface, &slock, NULL, WINED3DLOCK_READONLY);
6658             src_format = src_surface->resource.format;
6659         }
6660         else
6661         {
6662             src_format = dst_format;
6663         }
6664         if (dst_rect)
6665             wined3d_surface_map(dst_surface, &dlock, &xdst, 0);
6666         else
6667             wined3d_surface_map(dst_surface, &dlock, NULL, 0);
6668     }
6669
6670     bpp = dst_surface->resource.format->byte_count;
6671     srcheight = xsrc.bottom - xsrc.top;
6672     srcwidth = xsrc.right - xsrc.left;
6673     dstheight = xdst.bottom - xdst.top;
6674     dstwidth = xdst.right - xdst.left;
6675     width = (xdst.right - xdst.left) * bpp;
6676
6677     if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_COMPRESSED)
6678     {
6679         TRACE("%s -> %s copy.\n", debug_d3dformat(src_format->id), debug_d3dformat(dst_format->id));
6680
6681         if (src_surface == dst_surface)
6682         {
6683             FIXME("Only plain blits supported on compressed surfaces.\n");
6684             hr = E_NOTIMPL;
6685             goto release;
6686         }
6687
6688         if (srcheight != dstheight || srcwidth != dstwidth)
6689         {
6690             WARN("Stretching not supported on compressed surfaces.\n");
6691             hr = WINED3DERR_INVALIDCALL;
6692             goto release;
6693         }
6694
6695         if (srcwidth & (src_format->block_width - 1) || srcheight & (src_format->block_height - 1))
6696         {
6697             WARN("Rectangle not block-aligned.\n");
6698             hr = WINED3DERR_INVALIDCALL;
6699             goto release;
6700         }
6701
6702         hr = surface_cpu_blt_compressed(slock.pBits, dlock.pBits,
6703                 slock.Pitch, dlock.Pitch, dstwidth, dstheight,
6704                 src_format, flags, fx);
6705         goto release;
6706     }
6707
6708     if (dst_rect && src_surface != dst_surface)
6709         dbuf = dlock.pBits;
6710     else
6711         dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
6712
6713     /* First, all the 'source-less' blits */
6714     if (flags & WINEDDBLT_COLORFILL)
6715     {
6716         hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
6717         flags &= ~WINEDDBLT_COLORFILL;
6718     }
6719
6720     if (flags & WINEDDBLT_DEPTHFILL)
6721     {
6722         FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
6723     }
6724     if (flags & WINEDDBLT_ROP)
6725     {
6726         /* Catch some degenerate cases here. */
6727         switch (fx->dwROP)
6728         {
6729             case BLACKNESS:
6730                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
6731                 break;
6732             case 0xAA0029: /* No-op */
6733                 break;
6734             case WHITENESS:
6735                 hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
6736                 break;
6737             case SRCCOPY: /* Well, we do that below? */
6738                 break;
6739             default:
6740                 FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
6741                 goto error;
6742         }
6743         flags &= ~WINEDDBLT_ROP;
6744     }
6745     if (flags & WINEDDBLT_DDROPS)
6746     {
6747         FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
6748     }
6749     /* Now the 'with source' blits. */
6750     if (src_surface)
6751     {
6752         const BYTE *sbase;
6753         int sx, xinc, sy, yinc;
6754
6755         if (!dstwidth || !dstheight) /* Hmm... stupid program? */
6756             goto release;
6757
6758         if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
6759                 && (srcwidth != dstwidth || srcheight != dstheight))
6760         {
6761             /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
6762             FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
6763         }
6764
6765         sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
6766         xinc = (srcwidth << 16) / dstwidth;
6767         yinc = (srcheight << 16) / dstheight;
6768
6769         if (!flags)
6770         {
6771             /* No effects, we can cheat here. */
6772             if (dstwidth == srcwidth)
6773             {
6774                 if (dstheight == srcheight)
6775                 {
6776                     /* No stretching in either direction. This needs to be as
6777                      * fast as possible. */
6778                     sbuf = sbase;
6779
6780                     /* Check for overlapping surfaces. */
6781                     if (src_surface != dst_surface || xdst.top < xsrc.top
6782                             || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
6783                     {
6784                         /* No overlap, or dst above src, so copy from top downwards. */
6785                         for (y = 0; y < dstheight; ++y)
6786                         {
6787                             memcpy(dbuf, sbuf, width);
6788                             sbuf += slock.Pitch;
6789                             dbuf += dlock.Pitch;
6790                         }
6791                     }
6792                     else if (xdst.top > xsrc.top)
6793                     {
6794                         /* Copy from bottom upwards. */
6795                         sbuf += (slock.Pitch*dstheight);
6796                         dbuf += (dlock.Pitch*dstheight);
6797                         for (y = 0; y < dstheight; ++y)
6798                         {
6799                             sbuf -= slock.Pitch;
6800                             dbuf -= dlock.Pitch;
6801                             memcpy(dbuf, sbuf, width);
6802                         }
6803                     }
6804                     else
6805                     {
6806                         /* Src and dst overlapping on the same line, use memmove. */
6807                         for (y = 0; y < dstheight; ++y)
6808                         {
6809                             memmove(dbuf, sbuf, width);
6810                             sbuf += slock.Pitch;
6811                             dbuf += dlock.Pitch;
6812                         }
6813                     }
6814                 }
6815                 else
6816                 {
6817                     /* Stretching in y direction only. */
6818                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6819                     {
6820                         sbuf = sbase + (sy >> 16) * slock.Pitch;
6821                         memcpy(dbuf, sbuf, width);
6822                         dbuf += dlock.Pitch;
6823                     }
6824                 }
6825             }
6826             else
6827             {
6828                 /* Stretching in X direction. */
6829                 int last_sy = -1;
6830                 for (y = sy = 0; y < dstheight; ++y, sy += yinc)
6831                 {
6832                     sbuf = sbase + (sy >> 16) * slock.Pitch;
6833
6834                     if ((sy >> 16) == (last_sy >> 16))
6835                     {
6836                         /* This source row is the same as last source row -
6837                          * Copy the already stretched row. */
6838                         memcpy(dbuf, dbuf - dlock.Pitch, width);
6839                     }
6840                     else
6841                     {
6842 #define STRETCH_ROW(type) \
6843 do { \
6844     const type *s = (const type *)sbuf; \
6845     type *d = (type *)dbuf; \
6846     for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
6847         d[x] = s[sx >> 16]; \
6848 } while(0)
6849
6850                         switch(bpp)
6851                         {
6852                             case 1:
6853                                 STRETCH_ROW(BYTE);
6854                                 break;
6855                             case 2:
6856                                 STRETCH_ROW(WORD);
6857                                 break;
6858                             case 4:
6859                                 STRETCH_ROW(DWORD);
6860                                 break;
6861                             case 3:
6862                             {
6863                                 const BYTE *s;
6864                                 BYTE *d = dbuf;
6865                                 for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
6866                                 {
6867                                     DWORD pixel;
6868
6869                                     s = sbuf + 3 * (sx >> 16);
6870                                     pixel = s[0] | (s[1] << 8) | (s[2] << 16);
6871                                     d[0] = (pixel      ) & 0xff;
6872                                     d[1] = (pixel >>  8) & 0xff;
6873                                     d[2] = (pixel >> 16) & 0xff;
6874                                     d += 3;
6875                                 }
6876                                 break;
6877                             }
6878                             default:
6879                                 FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
6880                                 hr = WINED3DERR_NOTAVAILABLE;
6881                                 goto error;
6882                         }
6883 #undef STRETCH_ROW
6884                     }
6885                     dbuf += dlock.Pitch;
6886                     last_sy = sy;
6887                 }
6888             }
6889         }
6890         else
6891         {
6892             LONG dstyinc = dlock.Pitch, dstxinc = bpp;
6893             DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
6894             DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
6895             if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
6896             {
6897                 /* The color keying flags are checked for correctness in ddraw */
6898                 if (flags & WINEDDBLT_KEYSRC)
6899                 {
6900                     keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
6901                     keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
6902                 }
6903                 else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
6904                 {
6905                     keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
6906                     keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
6907                 }
6908
6909                 if (flags & WINEDDBLT_KEYDEST)
6910                 {
6911                     /* Destination color keys are taken from the source surface! */
6912                     destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
6913                     destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
6914                 }
6915                 else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
6916                 {
6917                     destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
6918                     destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
6919                 }
6920
6921                 if (bpp == 1)
6922                 {
6923                     keymask = 0xff;
6924                 }
6925                 else
6926                 {
6927                     keymask = src_format->red_mask
6928                             | src_format->green_mask
6929                             | src_format->blue_mask;
6930                 }
6931                 flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
6932             }
6933
6934             if (flags & WINEDDBLT_DDFX)
6935             {
6936                 BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
6937                 LONG tmpxy;
6938                 dTopLeft     = dbuf;
6939                 dTopRight    = dbuf + ((dstwidth - 1) * bpp);
6940                 dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
6941                 dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
6942
6943                 if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
6944                 {
6945                     /* I don't think we need to do anything about this flag */
6946                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
6947                 }
6948                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
6949                 {
6950                     tmp          = dTopRight;
6951                     dTopRight    = dTopLeft;
6952                     dTopLeft     = tmp;
6953                     tmp          = dBottomRight;
6954                     dBottomRight = dBottomLeft;
6955                     dBottomLeft  = tmp;
6956                     dstxinc = dstxinc * -1;
6957                 }
6958                 if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
6959                 {
6960                     tmp          = dTopLeft;
6961                     dTopLeft     = dBottomLeft;
6962                     dBottomLeft  = tmp;
6963                     tmp          = dTopRight;
6964                     dTopRight    = dBottomRight;
6965                     dBottomRight = tmp;
6966                     dstyinc = dstyinc * -1;
6967                 }
6968                 if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
6969                 {
6970                     /* I don't think we need to do anything about this flag */
6971                     WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
6972                 }
6973                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
6974                 {
6975                     tmp          = dBottomRight;
6976                     dBottomRight = dTopLeft;
6977                     dTopLeft     = tmp;
6978                     tmp          = dBottomLeft;
6979                     dBottomLeft  = dTopRight;
6980                     dTopRight    = tmp;
6981                     dstxinc = dstxinc * -1;
6982                     dstyinc = dstyinc * -1;
6983                 }
6984                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
6985                 {
6986                     tmp          = dTopLeft;
6987                     dTopLeft     = dBottomLeft;
6988                     dBottomLeft  = dBottomRight;
6989                     dBottomRight = dTopRight;
6990                     dTopRight    = tmp;
6991                     tmpxy   = dstxinc;
6992                     dstxinc = dstyinc;
6993                     dstyinc = tmpxy;
6994                     dstxinc = dstxinc * -1;
6995                 }
6996                 if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
6997                 {
6998                     tmp          = dTopLeft;
6999                     dTopLeft     = dTopRight;
7000                     dTopRight    = dBottomRight;
7001                     dBottomRight = dBottomLeft;
7002                     dBottomLeft  = tmp;
7003                     tmpxy   = dstxinc;
7004                     dstxinc = dstyinc;
7005                     dstyinc = tmpxy;
7006                     dstyinc = dstyinc * -1;
7007                 }
7008                 if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
7009                 {
7010                     /* I don't think we need to do anything about this flag */
7011                     WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
7012                 }
7013                 dbuf = dTopLeft;
7014                 flags &= ~(WINEDDBLT_DDFX);
7015             }
7016
7017 #define COPY_COLORKEY_FX(type) \
7018 do { \
7019     const type *s; \
7020     type *d = (type *)dbuf, *dx, tmp; \
7021     for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
7022     { \
7023         s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
7024         dx = d; \
7025         for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
7026         { \
7027             tmp = s[sx >> 16]; \
7028             if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
7029                     && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
7030             { \
7031                 dx[0] = tmp; \
7032             } \
7033             dx = (type *)(((BYTE *)dx) + dstxinc); \
7034         } \
7035         d = (type *)(((BYTE *)d) + dstyinc); \
7036     } \
7037 } while(0)
7038
7039             switch (bpp)
7040             {
7041                 case 1:
7042                     COPY_COLORKEY_FX(BYTE);
7043                     break;
7044                 case 2:
7045                     COPY_COLORKEY_FX(WORD);
7046                     break;
7047                 case 4:
7048                     COPY_COLORKEY_FX(DWORD);
7049                     break;
7050                 case 3:
7051                 {
7052                     const BYTE *s;
7053                     BYTE *d = dbuf, *dx;
7054                     for (y = sy = 0; y < dstheight; ++y, sy += yinc)
7055                     {
7056                         sbuf = sbase + (sy >> 16) * slock.Pitch;
7057                         dx = d;
7058                         for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
7059                         {
7060                             DWORD pixel, dpixel = 0;
7061                             s = sbuf + 3 * (sx>>16);
7062                             pixel = s[0] | (s[1] << 8) | (s[2] << 16);
7063                             dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
7064                             if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
7065                                     && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
7066                             {
7067                                 dx[0] = (pixel      ) & 0xff;
7068                                 dx[1] = (pixel >>  8) & 0xff;
7069                                 dx[2] = (pixel >> 16) & 0xff;
7070                             }
7071                             dx += dstxinc;
7072                         }
7073                         d += dstyinc;
7074                     }
7075                     break;
7076                 }
7077                 default:
7078                     FIXME("%s color-keyed blit not implemented for bpp %u!\n",
7079                           (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
7080                     hr = WINED3DERR_NOTAVAILABLE;
7081                     goto error;
7082 #undef COPY_COLORKEY_FX
7083             }
7084         }
7085     }
7086
7087 error:
7088     if (flags && FIXME_ON(d3d_surface))
7089     {
7090         FIXME("\tUnsupported flags: %#x.\n", flags);
7091     }
7092
7093 release:
7094     wined3d_surface_unmap(dst_surface);
7095     if (src_surface && src_surface != dst_surface)
7096         wined3d_surface_unmap(src_surface);
7097     /* Release the converted surface, if any. */
7098     if (src_surface && src_surface != orig_src)
7099         wined3d_surface_decref(src_surface);
7100
7101     return hr;
7102 }
7103
7104 /* Do not call while under the GL lock. */
7105 static HRESULT cpu_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
7106         const RECT *dst_rect, const WINED3DCOLORVALUE *color)
7107 {
7108     static const RECT src_rect;
7109     WINEDDBLTFX BltFx;
7110
7111     memset(&BltFx, 0, sizeof(BltFx));
7112     BltFx.dwSize = sizeof(BltFx);
7113     BltFx.u5.dwFillColor = wined3d_format_convert_from_float(dst_surface, color);
7114     return surface_cpu_blt(dst_surface, dst_rect, NULL, &src_rect,
7115             WINEDDBLT_COLORFILL, &BltFx, WINED3DTEXF_POINT);
7116 }
7117
7118 /* Do not call while under the GL lock. */
7119 static HRESULT cpu_blit_depth_fill(struct wined3d_device *device,
7120         struct wined3d_surface *surface, const RECT *rect, float depth)
7121 {
7122     FIXME("Depth filling not implemented by cpu_blit.\n");
7123     return WINED3DERR_INVALIDCALL;
7124 }
7125
7126 const struct blit_shader cpu_blit =  {
7127     cpu_blit_alloc,
7128     cpu_blit_free,
7129     cpu_blit_set,
7130     cpu_blit_unset,
7131     cpu_blit_supported,
7132     cpu_blit_color_fill,
7133     cpu_blit_depth_fill,
7134 };
7135
7136 static HRESULT surface_init(struct wined3d_surface *surface, WINED3DSURFTYPE surface_type, UINT alignment,
7137         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
7138         UINT multisample_quality, struct wined3d_device *device, DWORD usage, enum wined3d_format_id format_id,
7139         WINED3DPOOL pool, void *parent, const struct wined3d_parent_ops *parent_ops)
7140 {
7141     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
7142     const struct wined3d_format *format = wined3d_get_format(gl_info, format_id);
7143     unsigned int resource_size;
7144     HRESULT hr;
7145
7146     if (multisample_quality > 0)
7147     {
7148         FIXME("multisample_quality set to %u, substituting 0.\n", multisample_quality);
7149         multisample_quality = 0;
7150     }
7151
7152     /* Quick lockable sanity check.
7153      * TODO: remove this after surfaces, usage and lockability have been debugged properly
7154      * this function is too deep to need to care about things like this.
7155      * Levels need to be checked too, since they all affect what can be done. */
7156     switch (pool)
7157     {
7158         case WINED3DPOOL_SCRATCH:
7159             if (!lockable)
7160             {
7161                 FIXME("Called with a pool of SCRATCH and a lockable of FALSE "
7162                         "which are mutually exclusive, setting lockable to TRUE.\n");
7163                 lockable = TRUE;
7164             }
7165             break;
7166
7167         case WINED3DPOOL_SYSTEMMEM:
7168             if (!lockable)
7169                 FIXME("Called with a pool of SYSTEMMEM and a lockable of FALSE, this is acceptable but unexpected.\n");
7170             break;
7171
7172         case WINED3DPOOL_MANAGED:
7173             if (usage & WINED3DUSAGE_DYNAMIC)
7174                 FIXME("Called with a pool of MANAGED and a usage of DYNAMIC which are mutually exclusive.\n");
7175             break;
7176
7177         case WINED3DPOOL_DEFAULT:
7178             if (lockable && !(usage & (WINED3DUSAGE_DYNAMIC | WINED3DUSAGE_RENDERTARGET | WINED3DUSAGE_DEPTHSTENCIL)))
7179                 WARN("Creating a lockable surface with a POOL of DEFAULT, that doesn't specify DYNAMIC usage.\n");
7180             break;
7181
7182         default:
7183             FIXME("Unknown pool %#x.\n", pool);
7184             break;
7185     };
7186
7187     if (usage & WINED3DUSAGE_RENDERTARGET && pool != WINED3DPOOL_DEFAULT)
7188         FIXME("Trying to create a render target that isn't in the default pool.\n");
7189
7190     /* FIXME: Check that the format is supported by the device. */
7191
7192     resource_size = wined3d_format_calculate_size(format, alignment, width, height);
7193     if (!resource_size)
7194         return WINED3DERR_INVALIDCALL;
7195
7196     surface->surface_type = surface_type;
7197
7198     switch (surface_type)
7199     {
7200         case SURFACE_OPENGL:
7201             surface->surface_ops = &surface_ops;
7202             break;
7203
7204         case SURFACE_GDI:
7205             surface->surface_ops = &gdi_surface_ops;
7206             break;
7207
7208         default:
7209             ERR("Requested unknown surface implementation %#x.\n", surface_type);
7210             return WINED3DERR_INVALIDCALL;
7211     }
7212
7213     hr = resource_init(&surface->resource, device, WINED3DRTYPE_SURFACE, format,
7214             multisample_type, multisample_quality, usage, pool, width, height, 1,
7215             resource_size, parent, parent_ops, &surface_resource_ops);
7216     if (FAILED(hr))
7217     {
7218         WARN("Failed to initialize resource, returning %#x.\n", hr);
7219         return hr;
7220     }
7221
7222     /* "Standalone" surface. */
7223     surface_set_container(surface, WINED3D_CONTAINER_NONE, NULL);
7224
7225     surface->texture_level = level;
7226     list_init(&surface->overlays);
7227
7228     /* Flags */
7229     surface->flags = SFLAG_NORMCOORD; /* Default to normalized coords. */
7230     if (discard)
7231         surface->flags |= SFLAG_DISCARD;
7232     if (lockable || format_id == WINED3DFMT_D16_LOCKABLE)
7233         surface->flags |= SFLAG_LOCKABLE;
7234     /* I'm not sure if this qualifies as a hack or as an optimization. It
7235      * seems reasonable to assume that lockable render targets will get
7236      * locked, so we might as well set SFLAG_DYNLOCK right at surface
7237      * creation. However, the other reason we want to do this is that several
7238      * ddraw applications access surface memory while the surface isn't
7239      * mapped. The SFLAG_DYNLOCK behaviour of keeping SYSMEM around for
7240      * future locks prevents these from crashing. */
7241     if (lockable && (usage & WINED3DUSAGE_RENDERTARGET))
7242         surface->flags |= SFLAG_DYNLOCK;
7243
7244     /* Mark the texture as dirty so that it gets loaded first time around. */
7245     surface_add_dirty_rect(surface, NULL);
7246     list_init(&surface->renderbuffers);
7247
7248     TRACE("surface %p, memory %p, size %u\n",
7249             surface, surface->resource.allocatedMemory, surface->resource.size);
7250
7251     /* Call the private setup routine */
7252     hr = surface->surface_ops->surface_private_setup(surface);
7253     if (FAILED(hr))
7254     {
7255         ERR("Private setup failed, returning %#x\n", hr);
7256         surface_cleanup(surface);
7257         return hr;
7258     }
7259
7260     /* Similar to lockable rendertargets above, creating the DIB section
7261      * during surface initialization prevents the sysmem pointer from changing
7262      * after a wined3d_surface_getdc() call. */
7263     if ((usage & WINED3DUSAGE_OWNDC) && !surface->hDC
7264             && SUCCEEDED(surface_create_dib_section(surface)))
7265     {
7266         HeapFree(GetProcessHeap(), 0, surface->resource.heapMemory);
7267         surface->resource.heapMemory = NULL;
7268         surface->resource.allocatedMemory = surface->dib.bitmap_data;
7269     }
7270
7271     return hr;
7272 }
7273
7274 HRESULT CDECL wined3d_surface_create(struct wined3d_device *device, UINT width, UINT height,
7275         enum wined3d_format_id format_id, BOOL lockable, BOOL discard, UINT level, DWORD usage, WINED3DPOOL pool,
7276         WINED3DMULTISAMPLE_TYPE multisample_type, DWORD multisample_quality, WINED3DSURFTYPE surface_type,
7277         void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_surface **surface)
7278 {
7279     struct wined3d_surface *object;
7280     HRESULT hr;
7281
7282     TRACE("device %p, width %u, height %u, format %s, lockable %#x, discard %#x, level %u\n",
7283             device, width, height, debug_d3dformat(format_id), lockable, discard, level);
7284     TRACE("surface %p, usage %s (%#x), pool %s, multisample_type %#x, multisample_quality %u\n",
7285             surface, debug_d3dusage(usage), usage, debug_d3dpool(pool), multisample_type, multisample_quality);
7286     TRACE("surface_type %#x, parent %p, parent_ops %p.\n", surface_type, parent, parent_ops);
7287
7288     if (surface_type == SURFACE_OPENGL && !device->adapter)
7289     {
7290         ERR("OpenGL surfaces are not available without OpenGL.\n");
7291         return WINED3DERR_NOTAVAILABLE;
7292     }
7293
7294     object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object));
7295     if (!object)
7296     {
7297         ERR("Failed to allocate surface memory.\n");
7298         return WINED3DERR_OUTOFVIDEOMEMORY;
7299     }
7300
7301     hr = surface_init(object, surface_type, device->surface_alignment, width, height, level, lockable,
7302             discard, multisample_type, multisample_quality, device, usage, format_id, pool, parent, parent_ops);
7303     if (FAILED(hr))
7304     {
7305         WARN("Failed to initialize surface, returning %#x.\n", hr);
7306         HeapFree(GetProcessHeap(), 0, object);
7307         return hr;
7308     }
7309
7310     TRACE("Created surface %p.\n", object);
7311     *surface = object;
7312
7313     return hr;
7314 }