nv30exa: set link with image blit
[nouveau] / src / nv30_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  * Copyright 2007 Stephane Marchesin
4  * Copyright 2007 Jeremy Kolb
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24
25 #include "nv_include.h"
26 #include "nv_shaders.h"
27
28 typedef struct nv_pict_surface_format {
29         int      pict_fmt;
30         uint32_t card_fmt;
31 } nv_pict_surface_format_t;
32
33 typedef struct nv_pict_texture_format {
34         int      pict_fmt;
35         uint32_t card_fmt;
36         uint32_t card_swz;
37 } nv_pict_texture_format_t;
38
39 typedef struct nv_pict_op {
40         Bool     src_alpha;
41         Bool     dst_alpha;
42         uint32_t src_card_op;
43         uint32_t dst_card_op;
44 } nv_pict_op_t;
45
46 typedef struct nv30_exa_state {
47         Bool have_mask;
48
49         struct {
50                 PictTransformPtr transform;
51                 float width;
52                 float height;
53         } unit[2];
54 } nv30_exa_state_t;
55 static nv30_exa_state_t exa_state;
56 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
57
58 static nv_pict_surface_format_t
59 NV30SurfaceFormat[] = {
60         { PICT_a8r8g8b8 , 0x148 },
61         { PICT_a8b8g8r8 , 0x150 },
62         { PICT_x8r8g8b8 , 0x145 },
63         { PICT_x8b8g8r8 , 0x14f },
64         { PICT_r5g6b5   , 0x143 },
65         { PICT_a8       , 0x149 },
66 };
67
68 static nv_pict_surface_format_t *
69 NV30_GetPictSurfaceFormat(int format)
70 {
71         int i;
72
73         for(i=0;i<sizeof(NV30SurfaceFormat)/sizeof(NV30SurfaceFormat[0]);i++)
74         {
75                 if (NV30SurfaceFormat[i].pict_fmt == format)
76                         return &NV30SurfaceFormat[i];
77         }
78
79         return NULL;
80 }
81
82 enum {
83         NV30EXA_FPID_PASS_COL0 = 0,
84         NV30EXA_FPID_PASS_TEX0 = 1,
85         NV30EXA_FPID_COMPOSITE_MASK = 2,
86         NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
87         NV30EXA_FPID_COMPOSITE_MASK_CA = 4,
88         NV30EXA_FPID_MAX = 5
89 } NV30EXA_FPID;
90
91 static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = {
92         &nv30_fp_pass_col0,
93         &nv30_fp_pass_tex0,
94         &nv30_fp_composite_mask,
95         &nv30_fp_composite_mask_sa_ca,
96         &nv30_fp_composite_mask_ca
97 };
98
99 static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX];
100
101 static void
102 NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn)
103 {
104         int s;
105
106         for (s = 0; s < NV30EXA_FPID_MAX; s++) {
107                 nv_shader_t *def, *a8;
108
109                 def = nv40_fp_map[s];
110                 a8 = xcalloc(1, sizeof(nv_shader_t));
111                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
112                 a8->size = def->size + 4;
113                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
114                 nv40_fp_map_a8[s] = a8;
115
116                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
117                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
118                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
119                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
120                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
121         }
122 }
123
124 /* should be in nouveau_reg.h at some point.. */
125 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
126 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
127 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
128 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
129 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
130 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
131 #define NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
132 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
133 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_X           3
134 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
135 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
136 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_W           0
137 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
138 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
139 #define NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
140
141 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
142   {                                                                           \
143   PICT_##r,                                                                   \
144   (tf),                                                                       \
145   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
146   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
147   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
148   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
149   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
150   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
151   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
152   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
153   }
154
155 static nv_pict_texture_format_t
156 NV30TextureFormat[] = {
157         _(a8r8g8b8, 0x12,   S1,   S1,   S1,   S1, X, Y, Z, W),
158         _(a8b8g8r8, 0x12,   S1,   S1,   S1,   S1, Z, Y, X, W),
159         _(x8r8g8b8, 0x12,   S1,   S1,   S1,  ONE, X, Y, Z, W),
160         _(x8b8g8r8, 0x12,   S1,   S1,   S1,  ONE, Z, Y, X, W),
161         _(a1r5g5b5, 0x10,   S1,   S1,   S1,   S1, X, Y, Z, W),
162         _(x1r5g5b5, 0x10,   S1,   S1,   S1,  ONE, X, Y, Z, W),
163         _(x4r4g4b4, 0x1d,   S1,   S1,   S1,  ONE, X, Y, Z, W),
164         _(a4r4g4b4, 0x1d,   S1,   S1,   S1,   S1, X, Y, Z, W),
165         _(      a8, 0x1b, ZERO, ZERO, ZERO,   S1, X, X, X, X),
166 };
167
168
169 static nv_pict_texture_format_t *
170 NV30_GetPictTextureFormat(int format)
171 {
172         int i;
173
174         for(i=0;i<sizeof(NV30TextureFormat)/sizeof(NV30TextureFormat[0]);i++)
175         {
176                 if (NV30TextureFormat[i].pict_fmt == format)
177                         return &NV30TextureFormat[i];
178         }
179
180         return NULL;
181 }
182
183 #define NV34_TCL_PRIMITIVE_3D_BF_ZERO                                     0x0000
184 #define NV34_TCL_PRIMITIVE_3D_BF_ONE                                      0x0001
185 #define NV34_TCL_PRIMITIVE_3D_BF_SRC_COLOR                                0x0300
186 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_COLOR                      0x0301
187 #define NV34_TCL_PRIMITIVE_3D_BF_SRC_ALPHA                                0x0302
188 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_ALPHA                      0x0303
189 #define NV34_TCL_PRIMITIVE_3D_BF_DST_ALPHA                                0x0304
190 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_ALPHA                      0x0305
191 #define NV34_TCL_PRIMITIVE_3D_BF_DST_COLOR                                0x0306
192 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_COLOR                      0x0307
193 #define NV34_TCL_PRIMITIVE_3D_BF_ALPHA_SATURATE                           0x0308
194 #define BF(bf) NV34_TCL_PRIMITIVE_3D_BF_##bf
195
196 static nv_pict_op_t 
197 NV30PictOp[] = {
198 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
199 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
200 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
201 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
202 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
203 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
204 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
205 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
206 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
207 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
208 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
209 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
210 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
211 };
212
213 static nv_pict_op_t *
214 NV30_GetPictOpRec(int op)
215 {
216         if (op >= PictOpSaturate)
217                 return NULL;
218 #if 0
219         switch(op)
220         {
221                 case 0:ErrorF("Op Clear\n");break;
222                 case 1:ErrorF("Op Src\n");break;
223                 case 2:ErrorF("Op Dst\n");break;
224                 case 3:ErrorF("Op Over\n");break;
225                 case 4:ErrorF("Op OverReverse\n");break;
226                 case 5:ErrorF("Op In\n");break;
227                 case 6:ErrorF("Op InReverse\n");break;
228                 case 7:ErrorF("Op Out\n");break;
229                 case 8:ErrorF("Op OutReverse\n");break;
230                 case 9:ErrorF("Op Atop\n");break;
231                 case 10:ErrorF("Op AtopReverse\n");break;
232                 case 11:ErrorF("Op Xor\n");break;
233                 case 12:ErrorF("Op Add\n");break;
234         }
235 #endif
236         return &NV30PictOp[op];
237 }
238
239 #define FALLBACK_DEBUG 0
240 #if FALLBACK_DEBUG == 1
241 #define FALLBACK(fmt,args...) do {                                      \
242         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
243         return FALSE;                                                   \
244 } while(0)
245 #else
246 #define FALLBACK(fmt,args...) do { \
247         return FALSE;              \
248 } while(0)
249 #endif
250
251 static void
252 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
253 {
254         NVPtr pNv = NVPTR(pScrn);
255         static NVAllocRec *fp_mem = NULL;
256         static int next_hw_id_offset = 0;
257
258         if (!fp_mem) {
259                 fp_mem = NVAllocateMemory(pNv, NOUVEAU_MEM_FB, 0x1000);
260                 if (!fp_mem) {
261                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
262                                         "Couldn't alloc fragprog buffer!\n");
263                         return;
264                 }
265         }
266
267         if (!shader->hw_id) {
268                 uint32_t *map = fp_mem->map + next_hw_id_offset;
269                 int i;
270
271                 for (i = 0; i < shader->size; i++) {
272                         uint32_t data = shader->data[i];
273 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
274                         data = ((data >> 16) | ((data & 0xffff) << 16));
275 #endif
276                         map[i] = data;
277                 }
278
279                 shader->hw_id  = fp_mem->offset;
280                 shader->hw_id += next_hw_id_offset;
281
282                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
283                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
284         }
285
286         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1);
287         OUT_RING  (shader->hw_id|1);
288
289         BEGIN_RING(Nv3D, 0x1d60, 1);
290         OUT_RING  (0); /* USES_KIL (1<<7) == 0 */
291         BEGIN_RING(Nv3D, 0x1450, 1);
292         OUT_RING  (shader->card_priv.NV30FP.num_regs << 16| 4);
293         BEGIN_RING(Nv3D, 0x1d7c, 1);
294         OUT_RING  (0xffff0000);
295
296 }
297
298 static void
299 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
300                 PictFormatShort dest_format, Bool component_alpha)
301 {
302         NVPtr pNv = NVPTR(pScrn);
303         uint32_t sblend, dblend;
304
305         sblend = blend->src_card_op;
306         dblend = blend->dst_card_op;
307
308         if (blend->dst_alpha) {
309                 if (!PICT_FORMAT_A(dest_format)) {
310                         if (sblend == BF(DST_ALPHA)) {
311                                 sblend = BF(ONE);
312                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
313                                 sblend = BF(ZERO);
314                         }
315                 } else if (dest_format == PICT_a8) {
316                         if (sblend == BF(DST_ALPHA)) {
317                                 sblend = BF(DST_COLOR);
318                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
319                                 sblend = BF(ONE_MINUS_DST_COLOR);
320                         }
321                 }
322         }
323
324         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
325                 if (dblend == BF(SRC_ALPHA)) {
326                         dblend = BF(SRC_COLOR);
327                 } else if (dblend == BF(ONE_MINUS_SRC_ALPHA)) {
328                         dblend = BF(ONE_MINUS_SRC_COLOR);
329                 }
330         }
331
332         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
333                 BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
334                 OUT_RING  (0);
335         } else {
336                 BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 3);
337                 OUT_RING  (1);
338                 OUT_RING  ((sblend << 16) | sblend);
339                 OUT_RING  ((dblend << 16) | dblend);
340         }
341 }
342
343 static Bool
344 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
345 {
346         NVPtr pNv = NVPTR(pScrn);
347         nv_pict_texture_format_t *fmt;
348         uint32_t card_filter, card_repeat;
349         NV30EXA_STATE;
350
351         fmt = NV30_GetPictTextureFormat(pPict->format);
352         if (!fmt)
353                 return FALSE;
354
355         card_repeat = 3; /* repeatNone */
356
357         if (pPict->filter == PictFilterBilinear)
358                 card_filter = 2;
359         else
360                 card_filter = 1;
361
362         BEGIN_RING(Nv3D,
363                         NV34_TCL_PRIMITIVE_3D_TX_OFFSET(unit), 8);
364         OUT_RING  (NVAccelGetPixmapOffset(pPix));
365
366         OUT_RING  ((2 << 4) /* 2D */ |
367                         (fmt->card_fmt << 8) |
368                         (1 << 16) /* 1 mipmap level */ |
369                         (log2i(pPix->drawable.width)  << 20) |
370                         (log2i(pPix->drawable.height) << 24) |
371                         9);
372
373         OUT_RING  ((card_repeat <<  0) /* S */ |
374                         (card_repeat <<  8) /* T */ |
375                         (card_repeat << 16) /* R */);
376         OUT_RING  (0x40000000); /* enable */
377         OUT_RING  ((((uint32_t)exaGetPixmapPitch(pPix))<<16) | fmt->card_swz);
378
379         OUT_RING  ((card_filter << 16) /* min */ |
380                         (card_filter << 24) /* mag */ |
381                         0x2000 /* engine lock */);
382         OUT_RING  ((pPix->drawable.width << 16) | pPix->drawable.height);
383         OUT_RING  (0); /* border ARGB */
384
385         state->unit[unit].width         = (float)pPix->drawable.width;
386         state->unit[unit].height        = (float)pPix->drawable.height;
387         state->unit[unit].transform     = pPict->transform;
388
389         return TRUE;
390 }
391
392 static Bool
393 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict)
394 {
395         NVPtr pNv = NVPTR(pScrn);
396         nv_pict_surface_format_t *fmt;
397
398         fmt = NV30_GetPictSurfaceFormat(pPict->format);
399         if (!fmt) {
400                 ErrorF("AIII no format\n");
401                 return FALSE;
402         }
403
404         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
405
406         int x = pPict->pDrawable->x;
407         int y = pPict->pDrawable->y;
408         int w = pPict->pDrawable->width;
409         int h = pPict->pDrawable->height;
410         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 5);
411         OUT_RING  ((w<<16)|x);
412         OUT_RING  ((h<<16)|y);
413         OUT_RING  (fmt->card_fmt); /* format */
414         OUT_RING  (pitch << 16 | pitch);
415         OUT_RING  (NVAccelGetPixmapOffset(pPix));
416         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 2);
417         OUT_RING  ((w-1+x)<<16);
418         OUT_RING  ((h-1+y)<<16);
419
420         return TRUE;
421 }
422
423 static Bool
424 NV30EXACheckCompositeTexture(PicturePtr pPict)
425 {
426         nv_pict_texture_format_t *fmt;
427         int w = pPict->pDrawable->width;
428         int h = pPict->pDrawable->height;
429
430         if ((w > 4096) || (h>4096))
431                 FALLBACK("picture too large, %dx%d\n", w, h);
432
433         fmt = NV30_GetPictTextureFormat(pPict->format);
434         if (!fmt)
435                 FALLBACK("picture format 0x%08x not supported\n",
436                                 pPict->format);
437
438         if (pPict->filter != PictFilterNearest &&
439                         pPict->filter != PictFilterBilinear)
440                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
441
442         if (!(w==1 && h==1) && pPict->repeat && pPict->repeatType != RepeatNone)
443                 FALLBACK("repeat 0x%x not supported (surface %dx%d)\n", pPict->repeatType,w,h);
444
445         return TRUE;
446 }
447
448 Bool
449 NV30EXACheckComposite(int op, PicturePtr psPict,
450                 PicturePtr pmPict,
451                 PicturePtr pdPict)
452 {
453         nv_pict_surface_format_t *fmt;
454         nv_pict_op_t *opr;
455
456         opr = NV30_GetPictOpRec(op);
457         if (!opr)
458                 FALLBACK("unsupported blend op 0x%x\n", op);
459
460         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
461         if (!fmt)
462                 FALLBACK("dst picture format 0x%08x not supported\n",
463                                 pdPict->format);
464
465         if (!NV30EXACheckCompositeTexture(psPict))
466                 FALLBACK("src picture\n");
467         if (pmPict) {
468                 if (pmPict->componentAlpha &&
469                                 PICT_FORMAT_RGB(pmPict->format) &&
470                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
471                         FALLBACK("mask CA + SA\n");
472                 if (!NV30EXACheckCompositeTexture(pmPict))
473                         FALLBACK("mask picture\n");
474         }
475
476         return TRUE;
477 }
478
479 Bool
480 NV30EXAPrepareComposite(int op, PicturePtr psPict,
481                 PicturePtr pmPict,
482                 PicturePtr pdPict,
483                 PixmapPtr  psPix,
484                 PixmapPtr  pmPix,
485                 PixmapPtr  pdPix)
486 {
487         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
488         NVPtr pNv = NVPTR(pScrn);
489         nv_pict_op_t *blend;
490         int fpid = NV30EXA_FPID_PASS_COL0;
491         NV30EXA_STATE;
492
493         blend = NV30_GetPictOpRec(op);
494
495         NV30_SetupBlend(pScrn, blend, pdPict->format,
496                         (pmPict && pmPict->componentAlpha &&
497                          PICT_FORMAT_RGB(pmPict->format)));
498
499         NV30_SetupSurface(pScrn, pdPix, pdPict);
500         NV30EXATexture(pScrn, psPix, psPict, 0);
501
502 #if 0
503 #define printformat(f) ErrorF("(%xh %s %dbpp A%dR%dG%dB%d)",f,(f>>16)&0xf==2?"ARGB":"ABGR",(f>>24),(f&0xf000)>>12,(f&0xf00)>>8,(f&0xf0)>>4,f&0xf)
504         ErrorF("Preparecomposite src(%dx%d)",psPict->pDrawable->width,psPict->pDrawable->height);
505         printformat((psPict->format));
506         ErrorF(" dst(%dx%d)",pdPict->pDrawable->width,pdPict->pDrawable->height);
507         printformat((pdPict->format));
508         if (pmPict)
509         {
510                 ErrorF(" mask(%dx%d)",pmPict->pDrawable->width,pmPict->pDrawable->height);
511                 printformat((pmPict->format));
512         }
513         ErrorF("\n");
514 #endif
515
516         if (pmPict) {
517                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
518
519                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
520                         if (blend->src_alpha)
521                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA;
522                         else
523                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_CA;
524                 } else {
525                         fpid = NV30EXA_FPID_COMPOSITE_MASK;
526                 }
527
528                 state->have_mask = TRUE;
529         } else {
530                 fpid = NV30EXA_FPID_PASS_TEX0;
531
532                 state->have_mask = FALSE;
533         }
534
535         if (pdPict->format == PICT_a8)
536                 NV30_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
537         else
538                 NV30_LoadFragProg(pScrn, nv40_fp_map[fpid]);
539
540         BEGIN_RING(Nv3D, 0x23c, 1);
541         OUT_RING  (pmPict?3:1);
542
543         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
544         OUT_RING  (8); /* GL_QUADS */
545
546         return TRUE;
547 }
548
549 #define xFixedToFloat(v) \
550         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
551
552 static void
553 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
554                                           float *x_ret, float *y_ret)
555 {
556         PictVector v;
557
558         if (t) {
559                 v.vector[0] = IntToxFixed(x);
560                 v.vector[1] = IntToxFixed(y);
561                 v.vector[2] = xFixed1;
562                 PictureTransformPoint(t, &v);
563                 *x_ret = xFixedToFloat(v.vector[0]);
564                 *y_ret = xFixedToFloat(v.vector[1]);
565         } else {
566                 *x_ret = (float)x;
567                 *y_ret = (float)y;
568         }
569 }
570
571 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
572         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2F_X(8), 4);        \
573         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
574         OUT_RINGf ((mx)); OUT_RINGf ((my));                          \
575         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2I(0), 1);          \
576         OUT_RING  (((dy)<<16)|(dx));                                           \
577 } while(0)
578 #define CV_OUT(sx,sy,dx,dy) do {                                               \
579         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2F_X(8), 2);        \
580         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
581         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2I(0), 1);          \
582         OUT_RING  (((dy)<<16)|(dx));                                           \
583 } while(0)
584
585 void
586 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
587                                   int maskX, int maskY,
588                                   int dstX , int dstY,
589                                   int width, int height)
590 {
591         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
592         NVPtr pNv = NVPTR(pScrn);
593         float sX0, sX1, sY0, sY1;
594         float mX0, mX1, mY0, mY1;
595         NV30EXA_STATE;
596
597 #if 0
598         ErrorF("Composite [%dx%d] (%d,%d)IN(%d,%d)OP(%d,%d)\n",width,height,srcX,srcY,maskX,maskY,dstX,dstY);
599 #endif
600         NV30EXATransformCoord(state->unit[0].transform, srcX, srcY,
601                               state->unit[0].width,
602                               state->unit[0].height, &sX0, &sY0);
603         NV30EXATransformCoord(state->unit[0].transform,
604                               srcX + width, srcY + height,
605                               state->unit[0].width,
606                               state->unit[0].height, &sX1, &sY1);
607
608         if (state->have_mask) {
609                 NV30EXATransformCoord(state->unit[1].transform, maskX, maskY,
610                                       state->unit[1].width,
611                                       state->unit[1].height, &mX0, &mY0);
612                 NV30EXATransformCoord(state->unit[1].transform,
613                                       maskX + width, maskY + height,
614                                       state->unit[1].width,
615                                       state->unit[1].height, &mX1, &mY1);
616                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
617                 CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width,          dstY);
618                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
619                 CV_OUTm(sX0 , sY1 , mX0, mY1, dstX        , dstY + height);
620         } else {
621                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
622                 CV_OUT(sX1 , sY0 , dstX + width,          dstY);
623                 CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
624                 CV_OUT(sX0 , sY1 , dstX        , dstY + height);
625         }
626 }
627
628 void
629 NV30EXADoneComposite(PixmapPtr pdPix)
630 {
631         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
632         NVPtr pNv = NVPTR(pScrn);
633
634         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
635         OUT_RING  (0);
636
637         FIRE_RING();
638 }
639
640 Bool
641 NVAccelInitNV30TCL(ScrnInfoPtr pScrn)
642 {
643         NVPtr pNv = NVPTR(pScrn);
644         static int have_object = FALSE;
645         uint32_t class = 0, chipset;
646         int i;
647
648         NV30EXAHackupA8Shaders(pScrn);
649
650 #undef  NV30_TCL_PRIMITIVE_3D
651 #define NV30_TCL_PRIMITIVE_3D                 0x0397
652 #define NV30_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x00000003
653 #define NV35_TCL_PRIMITIVE_3D                 0x0497
654 #define NV35_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x000001e0
655 #define NV34_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x00000010
656
657         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
658         if ((chipset & 0xf0) != NV_ARCH_30)
659                 return TRUE;
660         chipset &= 0xf;
661
662         if (NV30_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
663                 class = NV30_TCL_PRIMITIVE_3D;
664         else if (NV35_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
665                 class = NV35_TCL_PRIMITIVE_3D;
666         else if (NV34_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
667                 class = NV34_TCL_PRIMITIVE_3D;
668         else {
669                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
670                            "NV30EXA: Unknown chipset NV3%1x\n", chipset);
671                 return FALSE;
672         }
673
674         if (!have_object) {
675                 if (!NVDmaCreateContextObject(pNv, Nv3D, class))
676                         return FALSE;
677                 have_object = TRUE;
678         }
679
680         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY0, 3);
681         OUT_RING  (NvDmaFB);
682         OUT_RING  (NvDmaFB);
683         OUT_RING  (NvDmaFB);
684         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY7, 1);
685         OUT_RING  (NvDmaFB);
686         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY3, 2);
687         OUT_RING  (NvDmaFB);
688         OUT_RING  (NvDmaFB);
689         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY8, 1);
690         OUT_RING  (NvDmaFB);
691
692         for(i = 0x2c8; i <= 0x2fc; i += 4)
693         {
694                 BEGIN_RING(Nv3D, i, 1);
695                 OUT_RING  (0x0);
696         }
697
698         BEGIN_RING(Nv3D, 0x220, 1);
699         OUT_RING  (1);
700
701         BEGIN_RING(Nv3D, 0x03b0, 1);
702         OUT_RING  (0x00100000);
703         BEGIN_RING(Nv3D, 0x1454, 1);
704         OUT_RING  (0);
705         BEGIN_RING(Nv3D, 0x1d80, 1);
706         OUT_RING  (3);
707         
708         /* NEW */
709         BEGIN_RING(Nv3D, 0x1e98, 1);
710         OUT_RING  (0);
711         BEGIN_RING(Nv3D, 0x17e0, 3);
712         OUT_RING  (0);
713         OUT_RING  (0);
714         OUT_RING  (0x3f800000);
715         BEGIN_RING(Nv3D, 0x1f80, 16);
716         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
717         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
718         OUT_RING  (0x0000ffff);
719         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
720         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
721
722         BEGIN_RING(Nv3D, 0x120, 3);
723         OUT_RING  (0);
724         OUT_RING  (1);
725         OUT_RING  (2);
726
727         BEGIN_RING(NvImageBlit, 0x120, 3);
728         OUT_RING  (0);
729         OUT_RING  (1);
730         OUT_RING  (2);
731
732         BEGIN_RING(Nv3D, 0x1d88, 1);
733         OUT_RING  (0x00001200);
734
735         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_RC_ENABLE, 1);
736         OUT_RING  (0);
737
738         /* Attempt to setup a known state.. Probably missing a heap of
739          * stuff here..
740          */
741         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_STENCIL_FRONT_ENABLE, 1);
742         OUT_RING  (0);
743         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_STENCIL_BACK_ENABLE, 1);
744         OUT_RING  (0);
745         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
746         OUT_RING  (0);
747         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 2);
748         OUT_RING  (0); /* wr disable */
749         OUT_RING  (0); /* test disable */
750         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
751         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
752         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
753         OUT_RING  (0);
754         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 5);
755         OUT_RING  (0);                          /* Blend enable */
756         OUT_RING  (0);                          /* Blend src */
757         OUT_RING  (0);                          /* Blend dst */
758         OUT_RING  (0x00000000);                 /* Blend colour */
759         OUT_RING  (0x8006);                     /* FUNC_ADD */
760         OUT_RING  (0);
761         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_COLOR_LOGIC_OP_ENABLE, 2);
762         OUT_RING  (0);
763         OUT_RING  (0x1503 /*GL_COPY*/);
764         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DITHER_ENABLE, 1);
765         OUT_RING  (1);
766         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SHADE_MODEL, 1);
767         OUT_RING  (0x1d01 /*GL_SMOOTH*/);
768         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR,2);
769         OUT_RINGf (0.0);
770         OUT_RINGf (0.0);
771         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
772         OUT_RING  (0x1b02 /*GL_FILL*/);
773         OUT_RING  (0x1b02 /*GL_FILL*/);
774         /* - Disable texture units
775          * - Set fragprog to MOVR result.color, fragment.color */
776         for (i=0;i<16;i++) {
777                 BEGIN_RING(Nv3D,
778                                 NV34_TCL_PRIMITIVE_3D_TX_ENABLE(i), 1);
779                 OUT_RING  (0);
780         }
781         /* Polygon stipple */
782         BEGIN_RING(Nv3D,
783                         NV34_TCL_PRIMITIVE_3D_POLYGON_STIPPLE_PATTERN(0), 0x20);
784         for (i=0;i<0x20;i++)
785                 OUT_RING  (0xFFFFFFFF);
786
787         /* Ok.  If you start X with the nvidia driver, kill it, and then
788          * start X with nouveau you will get black rendering instead of
789          * what you'd expect.  This fixes the problem, and it seems that
790          * it's not needed between nouveau restarts - which suggests that
791          * the 3D context (wherever it's stored?) survives somehow.
792          */
793         //BEGIN_RING(Nv3D, 0x1d60,1);
794         //OUT_RING  (0x03008000);
795
796         int w=4096;
797         int h=4096;
798         int pitch=4096*4;
799         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 5);
800         OUT_RING  (w<<16);
801         OUT_RING  (h<<16);
802         OUT_RING  (0x148); /* format */
803         OUT_RING  (pitch << 16 | pitch);
804         OUT_RING  (0x0);
805         BEGIN_RING(Nv3D, 0x0a00, 2);
806         OUT_RING  ((w<<16) | 0);
807         OUT_RING  ((h<<16) | 0);
808         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 2);
809         OUT_RING  ((w-1)<<16);
810         OUT_RING  ((h-1)<<16);
811         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SCISSOR_HORIZ, 2);
812         OUT_RING  (w<<16);
813         OUT_RING  (h<<16);
814         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
815         OUT_RING  (w<<16);
816         OUT_RING  (h<<16);
817
818         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_SCALE0_X, 8);
819         OUT_RINGf (0.0);
820         OUT_RINGf (0.0);
821         OUT_RINGf (0.0);
822         OUT_RINGf (0.0);
823         OUT_RINGf (1.0);
824         OUT_RINGf (1.0);
825         OUT_RINGf (1.0);
826         OUT_RINGf (0.0);
827
828         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_MODELVIEW_MATRIX(0), 16);
829         OUT_RINGf (1.0);
830         OUT_RINGf (0.0);
831         OUT_RINGf (0.0);
832         OUT_RINGf (0.0);
833         OUT_RINGf (0.0);
834         OUT_RINGf (1.0);
835         OUT_RINGf (0.0);
836         OUT_RINGf (0.0);
837         OUT_RINGf (0.0);
838         OUT_RINGf (0.0);
839         OUT_RINGf (1.0);
840         OUT_RINGf (0.0);
841         OUT_RINGf (0.0);
842         OUT_RINGf (0.0);
843         OUT_RINGf (0.0);
844         OUT_RINGf (1.0);
845
846         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_PROJECTION_MATRIX(0), 16);
847         OUT_RINGf (1.0);
848         OUT_RINGf (0.0);
849         OUT_RINGf (0.0);
850         OUT_RINGf (0.0);
851         OUT_RINGf (0.0);
852         OUT_RINGf (1.0);
853         OUT_RINGf (0.0);
854         OUT_RINGf (0.0);
855         OUT_RINGf (0.0);
856         OUT_RINGf (0.0);
857         OUT_RINGf (1.0);
858         OUT_RINGf (0.0);
859         OUT_RINGf (0.0);
860         OUT_RINGf (0.0);
861         OUT_RINGf (0.0);
862         OUT_RINGf (1.0);
863
864         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SCISSOR_HORIZ, 2);
865         OUT_RING  (4096<<16);
866         OUT_RING  (4096<<16);
867
868         return TRUE;
869 }
870