Do frag prog allocation and shader upload in TCL init
[nouveau] / src / nv50_exa.c
1 /*
2  * Copyright 2007 NVIDIA, Corporation
3  * Copyright 2008 Ben Skeggs
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23
24 #include "nv_include.h"
25 #include "nv_rop.h"
26
27 #include "nv50_accel.h"
28 #include "nv50_texture.h"
29
30 struct nv50_exa_state {
31         Bool have_mask;
32
33         struct {
34                 PictTransformPtr transform;
35                 float width;
36                 float height;
37         } unit[2];
38 };
39 static struct nv50_exa_state exa_state;
40
41 #define NV50EXA_LOCALS(p)                                              \
42         ScrnInfoPtr pScrn = xf86Screens[(p)->drawable.pScreen->myNum]; \
43         NVPtr pNv = NVPTR(pScrn);                                      \
44         struct nouveau_channel *chan = pNv->chan; (void)chan;          \
45         struct nouveau_grobj *eng2d = pNv->Nv2D; (void)eng2d;          \
46         struct nouveau_grobj *tesla = pNv->Nv3D; (void)tesla;          \
47         struct nv50_exa_state *state = &exa_state; (void)state
48
49 #define BF(f) (NV50TCL_BLEND_FUNC_SRC_RGB_##f | 0x4000)
50
51 struct nv50_blend_op {
52         unsigned src_alpha;
53         unsigned dst_alpha;
54         unsigned src_blend;
55         unsigned dst_blend;
56 };
57
58 static struct nv50_blend_op
59 NV50EXABlendOp[] = {
60 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
61 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
62 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
63 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
64 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
65 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
66 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
67 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
68 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
69 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
70 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
71 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
72 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) },
73 };
74
75 static Bool
76 NV50EXA2DSurfaceFormat(PixmapPtr ppix, uint32_t *fmt)
77 {
78         NV50EXA_LOCALS(ppix);
79
80         switch (ppix->drawable.depth) {
81         case 8 : *fmt = NV50_2D_SRC_FORMAT_8BPP; break;
82         case 15: *fmt = NV50_2D_SRC_FORMAT_15BPP; break;
83         case 16: *fmt = NV50_2D_SRC_FORMAT_16BPP; break;
84         case 24: *fmt = NV50_2D_SRC_FORMAT_24BPP; break;
85         case 32: *fmt = NV50_2D_SRC_FORMAT_32BPP; break;
86         default:
87                  xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
88                             "Unknown surface format for bpp=%d\n",
89                             ppix->drawable.depth);
90                  return FALSE;
91         }
92
93         return TRUE;
94 }
95
96 static void NV50EXASetClip(PixmapPtr ppix, int x, int y, int w, int h)
97 {
98         NV50EXA_LOCALS(ppix);
99
100         BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4);
101         OUT_RING  (chan, x);
102         OUT_RING  (chan, y);
103         OUT_RING  (chan, w);
104         OUT_RING  (chan, h);
105 }
106
107 static Bool
108 NV50EXAAcquireSurface2D(PixmapPtr ppix, int is_src)
109 {
110         NV50EXA_LOCALS(ppix);
111         int mthd = is_src ? NV50_2D_SRC_FORMAT : NV50_2D_DST_FORMAT;
112         uint32_t fmt, bo_flags;
113
114         if (!NV50EXA2DSurfaceFormat(ppix, &fmt))
115                 return FALSE;
116
117         bo_flags  = NOUVEAU_BO_VRAM;
118         bo_flags |= is_src ? NOUVEAU_BO_RD : NOUVEAU_BO_WR;
119
120         if (exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase) {
121                 BEGIN_RING(chan, eng2d, mthd, 2);
122                 OUT_RING  (chan, fmt);
123                 OUT_RING  (chan, 1);
124                 BEGIN_RING(chan, eng2d, mthd + 0x14, 1);
125                 OUT_RING  (chan, (uint32_t)exaGetPixmapPitch(ppix));
126         } else {
127                 BEGIN_RING(chan, eng2d, mthd, 5);
128                 OUT_RING  (chan, fmt);
129                 OUT_RING  (chan, 0);
130                 OUT_RING  (chan, 0);
131                 OUT_RING  (chan, 1);
132                 OUT_RING  (chan, 0);
133         }
134
135         BEGIN_RING(chan, eng2d, mthd + 0x18, 4);
136         OUT_RING  (chan, ppix->drawable.width);
137         OUT_RING  (chan, ppix->drawable.height);
138         OUT_PIXMAPh(chan, ppix, 0, bo_flags);
139         OUT_PIXMAPl(chan, ppix, 0, bo_flags);
140
141         if (is_src == 0)
142                 NV50EXASetClip(ppix, 0, 0, ppix->drawable.width, ppix->drawable.height);
143
144         return TRUE;
145 }
146
147 static void
148 NV50EXASetPattern(PixmapPtr pdpix, int col0, int col1, int pat0, int pat1)
149 {
150         NV50EXA_LOCALS(pdpix);
151
152         BEGIN_RING(chan, eng2d, NV50_2D_PATTERN_COLOR(0), 4);
153         OUT_RING  (chan, col0);
154         OUT_RING  (chan, col1);
155         OUT_RING  (chan, pat0);
156         OUT_RING  (chan, pat1);
157 }
158
159 static void
160 NV50EXASetROP(PixmapPtr pdpix, int alu, Pixel planemask)
161 {
162         NV50EXA_LOCALS(pdpix);
163         int rop;
164
165         if (planemask != ~0)
166                 rop = NVROP[alu].copy_planemask;
167         else
168                 rop = NVROP[alu].copy;
169
170         BEGIN_RING(chan, eng2d, NV50_2D_OPERATION, 1);
171         if (alu == GXcopy && planemask == ~0) {
172                 OUT_RING  (chan, NV50_2D_OPERATION_SRCCOPY);
173                 return;
174         } else {
175                 OUT_RING  (chan, NV50_2D_OPERATION_SRCCOPY_PREMULT);
176         }
177
178         BEGIN_RING(chan, eng2d, NV50_2D_PATTERN_FORMAT, 2);
179         switch (pdpix->drawable.depth) {
180                 case  8: OUT_RING  (chan, 3); break;
181                 case 15: OUT_RING  (chan, 1); break;
182                 case 16: OUT_RING  (chan, 0); break;
183                 case 24:
184                 case 32:
185                 default:
186                          OUT_RING  (chan, 2);
187                          break;
188         }
189         OUT_RING  (chan, 1);
190
191         /* There are 16 alu's.
192          * 0-15: copy
193          * 16-31: copy_planemask
194          */
195
196         if (planemask != ~0) {
197                 alu += 16;
198                 NV50EXASetPattern(pdpix, 0, planemask, ~0, ~0);
199         } else {
200                 if (pNv->currentRop > 15)
201                         NV50EXASetPattern(pdpix, ~0, ~0, ~0, ~0);
202         }
203
204         if (pNv->currentRop != alu) {
205                 BEGIN_RING(chan, eng2d, NV50_2D_ROP, 1);
206                 OUT_RING  (chan, rop);
207                 pNv->currentRop = alu;
208         }
209 }
210
211 Bool
212 NV50EXAPrepareSolid(PixmapPtr pdpix, int alu, Pixel planemask, Pixel fg)
213 {
214         NV50EXA_LOCALS(pdpix);
215         uint32_t fmt;
216
217         planemask |= ~0 << pScrn->depth;
218
219         if (!NV50EXA2DSurfaceFormat(pdpix, &fmt))
220                 NOUVEAU_FALLBACK("rect format\n");
221         if (!NV50EXAAcquireSurface2D(pdpix, 0))
222                 NOUVEAU_FALLBACK("dest pixmap\n");
223         NV50EXASetROP(pdpix, alu, planemask);
224
225         BEGIN_RING(chan, eng2d, 0x580, 3);
226         OUT_RING  (chan, 4);
227         OUT_RING  (chan, fmt);
228         OUT_RING  (chan, fg);
229
230         return TRUE;
231 }
232
233 void
234 NV50EXASolid(PixmapPtr pdpix, int x1, int y1, int x2, int y2)
235 {
236         NV50EXA_LOCALS(pdpix);
237
238         BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4);
239         OUT_RING  (chan, x1);
240         OUT_RING  (chan, y1);
241         OUT_RING  (chan, x2);
242         OUT_RING  (chan, y2);
243
244         if((x2 - x1) * (y2 - y1) >= 512)
245                 FIRE_RING (chan);
246 }
247
248 void
249 NV50EXADoneSolid(PixmapPtr pdpix)
250 {
251 }
252
253 Bool
254 NV50EXAPrepareCopy(PixmapPtr pspix, PixmapPtr pdpix, int dx, int dy,
255                    int alu, Pixel planemask)
256 {
257         NV50EXA_LOCALS(pdpix);
258
259         planemask |= ~0 << pScrn->depth;
260
261         if (!NV50EXAAcquireSurface2D(pspix, 1))
262                 NOUVEAU_FALLBACK("src pixmap\n");
263         if (!NV50EXAAcquireSurface2D(pdpix, 0))
264                 NOUVEAU_FALLBACK("dest pixmap\n");
265         NV50EXASetROP(pdpix, alu, planemask);
266
267         return TRUE;
268 }
269
270 void
271 NV50EXACopy(PixmapPtr pdpix, int srcX , int srcY,
272                              int dstX , int dstY,
273                              int width, int height)
274 {
275         NV50EXA_LOCALS(pdpix);
276
277         BEGIN_RING(chan, eng2d, 0x0110, 1);
278         OUT_RING  (chan, 0);
279         BEGIN_RING(chan, eng2d, 0x088c, 1);
280         OUT_RING  (chan, 0);
281         BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 12);
282         OUT_RING  (chan, dstX);
283         OUT_RING  (chan, dstY);
284         OUT_RING  (chan, width);
285         OUT_RING  (chan, height);
286         OUT_RING  (chan, 0);
287         OUT_RING  (chan, 1);
288         OUT_RING  (chan, 0);
289         OUT_RING  (chan, 1);
290         OUT_RING  (chan, 0);
291         OUT_RING  (chan, srcX);
292         OUT_RING  (chan, 0);
293         OUT_RING  (chan, srcY);
294
295         if(width * height >= 512)
296                 FIRE_RING (chan);
297 }
298
299 void
300 NV50EXADoneCopy(PixmapPtr pdpix)
301 {
302 }
303
304 Bool
305 NV50EXAUploadSIFC(const char *src, int src_pitch,
306                   PixmapPtr pdpix, int x, int y, int w, int h, int cpp)
307 {
308         NV50EXA_LOCALS(pdpix);
309         int line_dwords = (w * cpp + 3) / 4;
310         uint32_t sifc_fmt;
311
312         if (!NV50EXA2DSurfaceFormat(pdpix, &sifc_fmt))
313                 NOUVEAU_FALLBACK("hostdata format\n");
314         if (!NV50EXAAcquireSurface2D(pdpix, 0))
315                 NOUVEAU_FALLBACK("dest pixmap\n");
316
317         /* If the pitch isn't aligned to a dword, then you can get corruption at the end of a line. */
318         NV50EXASetClip(pdpix, x, y, w, h);
319
320         BEGIN_RING(chan, eng2d, NV50_2D_OPERATION, 1);
321         OUT_RING  (chan, NV50_2D_OPERATION_SRCCOPY);
322         BEGIN_RING(chan, eng2d, NV50_2D_SIFC_UNK0800, 2);
323         OUT_RING  (chan, 0);
324         OUT_RING  (chan, sifc_fmt);
325         BEGIN_RING(chan, eng2d, NV50_2D_SIFC_WIDTH, 10);
326         OUT_RING  (chan, (line_dwords * 4) / cpp);
327         OUT_RING  (chan, h);
328         OUT_RING  (chan, 0);
329         OUT_RING  (chan, 1);
330         OUT_RING  (chan, 0);
331         OUT_RING  (chan, 1);
332         OUT_RING  (chan, 0);
333         OUT_RING  (chan, x);
334         OUT_RING  (chan, 0);
335         OUT_RING  (chan, y);
336
337         while (h--) {
338                 int count = line_dwords;
339                 const char *p = src;
340
341                 while(count) {
342                         int size = count > 1792 ? 1792 : count;
343
344                         BEGIN_RING(chan, eng2d,
345                                          NV50_2D_SIFC_DATA | 0x40000000, size);
346                         OUT_RINGp (chan, p, size);
347
348                         p += size * cpp;
349                         count -= size;
350                 }
351
352                 src += src_pitch;
353         }
354
355         return TRUE;
356 }
357
358 static Bool
359 NV50EXACheckRenderTarget(PicturePtr ppict)
360 {
361         if (ppict->pDrawable->width > 8192 ||
362             ppict->pDrawable->height > 8192)
363                 NOUVEAU_FALLBACK("render target dimensions exceeded %dx%d\n",
364                                  ppict->pDrawable->width,
365                                  ppict->pDrawable->height);
366
367         switch (ppict->format) {
368         case PICT_a8r8g8b8:
369         case PICT_x8r8g8b8:
370         case PICT_r5g6b5:
371         case PICT_a8:
372                 break;
373         default:
374                 NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format);
375         }
376
377         return TRUE;
378 }
379
380 static Bool
381 NV50EXARenderTarget(PixmapPtr ppix, PicturePtr ppict)
382 {
383         NV50EXA_LOCALS(ppix);
384         unsigned format;
385
386         /*XXX: Scanout buffer not tiled, someone needs to figure it out */
387         if (exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
388                 NOUVEAU_FALLBACK("pixmap is scanout buffer\n");
389
390         switch (ppict->format) {
391         case PICT_a8r8g8b8: format = NV50TCL_RT_FORMAT_32BPP; break;
392         case PICT_x8r8g8b8: format = NV50TCL_RT_FORMAT_24BPP; break;
393         case PICT_r5g6b5  : format = NV50TCL_RT_FORMAT_16BPP; break;
394         case PICT_a8      : format = NV50TCL_RT_FORMAT_8BPP; break;
395         default:
396                 NOUVEAU_FALLBACK("invalid picture format\n");
397         }
398
399         BEGIN_RING(chan, tesla, NV50TCL_RT_ADDRESS_HIGH(0), 5);
400         OUT_PIXMAPh(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
401         OUT_PIXMAPl(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
402         OUT_RING  (chan, format);
403         OUT_RING  (chan, 0);
404         OUT_RING  (chan, 0x00000000);
405         BEGIN_RING(chan, tesla, NV50TCL_RT_HORIZ(0), 2);
406         OUT_RING  (chan, ppix->drawable.width);
407         OUT_RING  (chan, ppix->drawable.height);
408         BEGIN_RING(chan, tesla, 0x1224, 1);
409         OUT_RING  (chan, 0x00000001);
410
411         return TRUE;
412 }
413
414 static Bool
415 NV50EXACheckTexture(PicturePtr ppict)
416 {
417         if (ppict->pDrawable->width > 8192 ||
418             ppict->pDrawable->height > 8192)
419                 NOUVEAU_FALLBACK("texture dimensions exceeded %dx%d\n",
420                                  ppict->pDrawable->width,
421                                  ppict->pDrawable->height);
422
423         switch (ppict->format) {
424         case PICT_a8r8g8b8:
425         case PICT_a8b8g8r8:
426         case PICT_x8r8g8b8:
427         case PICT_x8b8g8r8:
428         case PICT_r5g6b5:
429         case PICT_a8:
430                 break;
431         default:
432                 NOUVEAU_FALLBACK("picture format 0x%08x\n", ppict->format);
433         }
434
435         switch (ppict->filter) {
436         case PictFilterNearest:
437         case PictFilterBilinear:
438                 break;
439         default:
440                 NOUVEAU_FALLBACK("picture filter %d\n", ppict->filter);
441         }
442
443         return TRUE;
444 }
445
446 static Bool
447 NV50EXATexture(PixmapPtr ppix, PicturePtr ppict, unsigned unit)
448 {
449         NV50EXA_LOCALS(ppix);
450
451         /*XXX: Scanout buffer not tiled, someone needs to figure it out */
452         if (exaGetPixmapOffset(ppix) < pNv->EXADriverPtr->offScreenBase)
453                 NOUVEAU_FALLBACK("pixmap is scanout buffer\n");
454
455         BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
456         OUT_RING  (chan, CB_TIC | ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT));
457         BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
458         switch (ppict->format) {
459         case PICT_a8r8g8b8:
460                 OUT_RING  (chan, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
461                          NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
462                          NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEB_UNORM |
463                          NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEG_UNORM |
464                          NV50TIC_0_0_FMT_8_8_8_8);
465                 break;
466         case PICT_a8b8g8r8:
467                 OUT_RING  (chan, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM |
468                          NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
469                          NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEB_UNORM |
470                          NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEG_UNORM |
471                          NV50TIC_0_0_FMT_8_8_8_8);
472                 break;
473         case PICT_x8r8g8b8:
474                 OUT_RING  (chan, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
475                          NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
476                          NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEB_UNORM |
477                          NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEG_UNORM |
478                          NV50TIC_0_0_FMT_8_8_8_8);
479                 break;
480         case PICT_x8b8g8r8:
481                 OUT_RING  (chan, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
482                          NV50TIC_0_0_MAPR_C2 | NV50TIC_0_0_TYPER_UNORM |
483                          NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEB_UNORM |
484                          NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEG_UNORM |
485                          NV50TIC_0_0_FMT_8_8_8_8);
486                 break;
487         case PICT_r5g6b5:
488                 OUT_RING  (chan, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM |
489                          NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM |
490                          NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEB_UNORM |
491                          NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEG_UNORM |
492                          NV50TIC_0_0_FMT_5_6_5);
493                 break;
494         case PICT_a8:
495                 OUT_RING  (chan, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM |
496                          NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM |
497                          NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEB_UNORM |
498                          NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEG_UNORM |
499                          NV50TIC_0_0_FMT_8);
500                 break;
501         default:
502                 NOUVEAU_FALLBACK("invalid picture format, this SHOULD NOT HAPPEN. Expect trouble.\n");
503         }
504         OUT_PIXMAPl(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
505         OUT_RING  (chan, 0xd0005000);
506         OUT_RING  (chan, 0x00300000);
507         OUT_RING  (chan, ppix->drawable.width);
508         OUT_RING  (chan, (1 << NV50TIC_0_5_DEPTH_SHIFT) | ppix->drawable.height);
509         OUT_RING  (chan, 0x03000000);
510         OUT_PIXMAPh(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
511
512         BEGIN_RING(chan, tesla, NV50TCL_CB_ADDR, 1);
513         OUT_RING  (chan, CB_TSC | ((unit * 8) << NV50TCL_CB_ADDR_ID_SHIFT));
514         BEGIN_RING(chan, tesla, NV50TCL_CB_DATA(0) | 0x40000000, 8);
515         if (ppict->repeat) {
516                 switch (ppict->repeatType) {
517                 case RepeatPad:
518                         OUT_RING  (chan, NV50TSC_1_0_WRAPS_CLAMP |
519                                  NV50TSC_1_0_WRAPT_CLAMP |
520                                  NV50TSC_1_0_WRAPR_CLAMP | 0x00024000);
521                         break;
522                 case RepeatReflect:
523                         OUT_RING  (chan, NV50TSC_1_0_WRAPS_MIRROR_REPEAT |
524                                  NV50TSC_1_0_WRAPT_MIRROR_REPEAT |
525                                  NV50TSC_1_0_WRAPR_MIRROR_REPEAT | 0x00024000);
526                         break;
527                 case RepeatNormal:
528                 default:
529                         OUT_RING  (chan, NV50TSC_1_0_WRAPS_REPEAT |
530                                  NV50TSC_1_0_WRAPT_REPEAT |
531                                  NV50TSC_1_0_WRAPR_REPEAT | 0x00024000);
532                         break;
533                 }
534         } else {
535                 OUT_RING  (chan, NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER |
536                          NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER |
537                          NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER | 0x00024000);
538         }
539         if (ppict->filter == PictFilterBilinear) {
540                 OUT_RING  (chan, NV50TSC_1_1_MAGF_LINEAR |
541                          NV50TSC_1_1_MINF_LINEAR |
542                          NV50TSC_1_1_MIPF_NONE);
543         } else {
544                 OUT_RING  (chan, NV50TSC_1_1_MAGF_NEAREST |
545                          NV50TSC_1_1_MINF_NEAREST |
546                          NV50TSC_1_1_MIPF_NONE);
547         }
548         OUT_RING  (chan, 0x00000000);
549         OUT_RING  (chan, 0x00000000);
550         OUT_RING  (chan, 0x00000000);
551         OUT_RING  (chan, 0x00000000);
552         OUT_RING  (chan, 0x00000000);
553         OUT_RING  (chan, 0x00000000);
554
555         state->unit[unit].width = ppix->drawable.width;
556         state->unit[unit].height = ppix->drawable.height;
557         state->unit[unit].transform = ppict->transform;
558         return TRUE;
559 }
560
561 static Bool
562 NV50EXACheckBlend(int op)
563 {
564         if (op > PictOpAdd)
565                 NOUVEAU_FALLBACK("unsupported blend op %d\n", op);
566         return TRUE;
567 }
568
569 static void
570 NV50EXABlend(PixmapPtr ppix, PicturePtr ppict, int op, int component_alpha)
571 {
572         NV50EXA_LOCALS(ppix);
573         struct nv50_blend_op *b = &NV50EXABlendOp[op];
574         unsigned sblend = b->src_blend;
575         unsigned dblend = b->dst_blend;
576
577         if (b->dst_alpha) {
578                 if (!PICT_FORMAT_A(ppict->format)) {
579                         if (sblend == BF(DST_ALPHA))
580                                 sblend = BF(ONE);
581                         else
582                         if (sblend == BF(ONE_MINUS_DST_ALPHA))
583                                 sblend = BF(ZERO);
584                 } else
585                 if (ppict->format == PICT_a8) {
586                         if (sblend == BF(DST_ALPHA))
587                                 sblend = BF(DST_COLOR);
588                         else
589                         if (sblend == BF(ONE_MINUS_DST_ALPHA))
590                                 sblend = BF(ONE_MINUS_DST_COLOR);
591                 }
592         }
593
594         if (b->src_alpha && (component_alpha || ppict->format == PICT_a8)) {
595                 if (dblend == BF(SRC_ALPHA))
596                         dblend = BF(SRC_COLOR);
597                 else
598                 if (dblend == BF(ONE_MINUS_SRC_ALPHA))
599                         dblend = BF(ONE_MINUS_SRC_COLOR);
600         }
601
602         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
603                 BEGIN_RING(chan, tesla, NV50TCL_BLEND_ENABLE(0), 1);
604                 OUT_RING  (chan, 0);
605         } else {
606                 BEGIN_RING(chan, tesla, NV50TCL_BLEND_ENABLE(0), 1);
607                 OUT_RING  (chan, 1);
608                 BEGIN_RING(chan, tesla, NV50TCL_BLEND_EQUATION_RGB, 5);
609                 OUT_RING  (chan, NV50TCL_BLEND_EQUATION_RGB_FUNC_ADD);
610                 OUT_RING  (chan, sblend);
611                 OUT_RING  (chan, dblend);
612                 OUT_RING  (chan, NV50TCL_BLEND_EQUATION_ALPHA_FUNC_ADD);
613                 OUT_RING  (chan, sblend);
614                 BEGIN_RING(chan, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1);
615                 OUT_RING  (chan, dblend);
616         }
617 }
618
619 Bool
620 NV50EXACheckComposite(int op,
621                       PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict)
622 {
623         if (!NV50EXACheckBlend(op))
624                 NOUVEAU_FALLBACK("blend not supported\n");
625
626         if (!NV50EXACheckRenderTarget(pdpict))
627                 NOUVEAU_FALLBACK("render target invalid\n");
628
629         if (!NV50EXACheckTexture(pspict))
630                 NOUVEAU_FALLBACK("src picture invalid\n");
631
632         if (pmpict) {
633                 if (pmpict->componentAlpha &&
634                     PICT_FORMAT_RGB(pmpict->format) &&
635                     NV50EXABlendOp[op].src_alpha &&
636                     NV50EXABlendOp[op].src_blend != BF(ZERO))
637                         NOUVEAU_FALLBACK("component-alpha not supported\n");
638
639                 if (!NV50EXACheckTexture(pmpict))
640                         NOUVEAU_FALLBACK("mask picture invalid\n");
641         }
642
643         return TRUE;
644 }
645
646 Bool
647 NV50EXAPrepareComposite(int op,
648                         PicturePtr pspict, PicturePtr pmpict, PicturePtr pdpict,
649                         PixmapPtr pspix, PixmapPtr pmpix, PixmapPtr pdpix)
650 {
651         NV50EXA_LOCALS(pspix);
652
653         BEGIN_RING(chan, eng2d, 0x0110, 1);
654         OUT_RING  (chan, 0);
655
656         if (!NV50EXARenderTarget(pdpix, pdpict))
657                 NOUVEAU_FALLBACK("render target invalid\n");
658
659         NV50EXABlend(pdpix, pdpict, op, pmpict && pmpict->componentAlpha &&
660                      PICT_FORMAT_RGB(pmpict->format));
661
662         if (pmpict) {
663                 if (!NV50EXATexture(pspix, pspict, 0))
664                         NOUVEAU_FALLBACK("src picture invalid\n");
665                 if (!NV50EXATexture(pmpix, pmpict, 1))
666                         NOUVEAU_FALLBACK("mask picture invalid\n");
667                 state->have_mask = TRUE;
668
669                 BEGIN_RING(chan, tesla, NV50TCL_FP_START_ID, 1);
670                 if (pdpict->format == PICT_a8) {
671                         OUT_RING  (chan, PFP_C_A8);
672                 } else {
673                         if (pmpict->componentAlpha &&
674                             PICT_FORMAT_RGB(pmpict->format)) {
675                                 if (NV50EXABlendOp[op].src_alpha)
676                                         OUT_RING  (chan, PFP_CCASA);
677                                 else
678                                         OUT_RING  (chan, PFP_CCA);
679                         } else {
680                                 OUT_RING  (chan, PFP_C);
681                         }
682                 }
683         } else {
684                 if (!NV50EXATexture(pspix, pspict, 0))
685                         NOUVEAU_FALLBACK("src picture invalid\n");
686                 state->have_mask = FALSE;
687
688                 BEGIN_RING(chan, tesla, NV50TCL_FP_START_ID, 1);
689                 if (pdpict->format == PICT_a8)
690                         OUT_RING  (chan, PFP_S_A8);
691                 else
692                         OUT_RING  (chan, PFP_S);
693         }
694
695         BEGIN_RING(chan, tesla, 0x1334, 1);
696         OUT_RING  (chan, 0);
697
698         BEGIN_RING(chan, tesla, 0x1458, 1);
699         OUT_RING  (chan, 1);
700         BEGIN_RING(chan, tesla, 0x1458, 1);
701         OUT_RING  (chan, 0x203);
702
703         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1);
704         OUT_RING  (chan, NV50TCL_VERTEX_BEGIN_QUADS);
705
706         return TRUE;
707 }
708
709 #define xFixedToFloat(v) \
710         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
711 static inline void
712 NV50EXATransform(PictTransformPtr t, int x, int y, float sx, float sy,
713                  float *x_ret, float *y_ret)
714 {
715         if (t) {
716                 PictVector v;
717
718                 v.vector[0] = IntToxFixed(x);
719                 v.vector[1] = IntToxFixed(y);
720                 v.vector[2] = xFixed1;
721                 PictureTransformPoint(t, &v);
722                 *x_ret = xFixedToFloat(v.vector[0]) / sx;
723                 *y_ret = xFixedToFloat(v.vector[1]) / sy;
724         } else {
725                 *x_ret = (float)x / sx;
726                 *y_ret = (float)y / sy;
727         }
728 }
729
730 void
731 NV50EXAComposite(PixmapPtr pdpix, int sx, int sy, int mx, int my,
732                  int dx, int dy, int w, int h)
733 {
734         NV50EXA_LOCALS(pdpix);
735         float sX0, sX1, sX2, sX3, sY0, sY1, sY2, sY3;
736         unsigned dX0 = dx, dX1 = dx + w, dY0 = dy, dY1 = dy + h;
737
738         NV50EXATransform(state->unit[0].transform, sx, sy,
739                          state->unit[0].width, state->unit[0].height,
740                          &sX0, &sY0);
741         NV50EXATransform(state->unit[0].transform, sx + w, sy,
742                          state->unit[0].width, state->unit[0].height,
743                          &sX1, &sY1);
744         NV50EXATransform(state->unit[0].transform, sx + w, sy + h,
745                          state->unit[0].width, state->unit[0].height,
746                          &sX2, &sY2);
747         NV50EXATransform(state->unit[0].transform, sx, sy + h,
748                          state->unit[0].width, state->unit[0].height,
749                          &sX3, &sY3);
750
751         if (state->have_mask) {
752                 float mX0, mX1, mX2, mX3, mY0, mY1, mY2, mY3;
753
754                 NV50EXATransform(state->unit[1].transform, mx, my,
755                                  state->unit[1].width, state->unit[1].height,
756                                  &mX0, &mY0);
757                 NV50EXATransform(state->unit[1].transform, mx + w, my,
758                                  state->unit[1].width, state->unit[1].height,
759                                  &mX1, &mY1);
760                 NV50EXATransform(state->unit[1].transform, mx + w, my + h,
761                                  state->unit[1].width, state->unit[1].height,
762                                  &mX2, &mY2);
763                 NV50EXATransform(state->unit[1].transform, mx, my + h,
764                                  state->unit[1].width, state->unit[1].height,
765                                  &mX3, &mY3);
766
767                 VTX2s(pNv, sX0, sY0, mX0, mY0, dX0, dY0);
768                 VTX2s(pNv, sX1, sY1, mX1, mY1, dX1, dY0);
769                 VTX2s(pNv, sX2, sY2, mX2, mY2, dX1, dY1);
770                 VTX2s(pNv, sX3, sY3, mX3, mY3, dX0, dY1);
771         } else {
772                 VTX1s(pNv, sX0, sY0, dX0, dY0);
773                 VTX1s(pNv, sX1, sY1, dX1, dY0);
774                 VTX1s(pNv, sX2, sY2, dX1, dY1);
775                 VTX1s(pNv, sX3, sY3, dX0, dY1);
776         }
777 }
778
779 void
780 NV50EXADoneComposite(PixmapPtr pdpix)
781 {
782         NV50EXA_LOCALS(pdpix);
783
784         BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1);
785         OUT_RING  (chan, 0);
786
787         FIRE_RING (chan);
788 }
789