s/#define ABC ;/#define ABC do { ; } while(0);/
[nouveau] / src / nv30_exa.c
1 #ifdef HAVE_CONFIG_H
2 #include "config.h"
3 #endif
4
5 #ifdef ENABLE_NV30EXA
6
7 #include "nv_include.h"
8 #include "nv_shaders.h"
9
10 typedef struct nv_pict_surface_format {
11         int      pict_fmt;
12         uint32_t card_fmt;
13 } nv_pict_surface_format_t;
14
15 typedef struct nv_pict_texture_format {
16         int      pict_fmt;
17         uint32_t card_fmt;
18         uint32_t card_swz;
19 } nv_pict_texture_format_t;
20
21 typedef struct nv_pict_op {
22         Bool     src_alpha;
23         Bool     dst_alpha;
24         uint32_t src_card_op;
25         uint32_t dst_card_op;
26 } nv_pict_op_t;
27
28 typedef struct nv30_exa_state {
29         Bool have_mask;
30
31         struct {
32                 PictTransformPtr transform;
33                 float width;
34                 float height;
35         } unit[2];
36 } nv30_exa_state_t;
37 static nv30_exa_state_t exa_state;
38 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
39
40 static nv_pict_surface_format_t
41 NV30SurfaceFormat[] = {
42         { PICT_a8r8g8b8 , 0x148 },
43         { PICT_x8r8g8b8 , 0x145 },
44         { PICT_r5g6b5   , 0x143 },
45 //      { PICT_a8       , 0x149 },
46 };
47
48 static nv_pict_surface_format_t *
49 NV30_GetPictSurfaceFormat(int format)
50 {
51         int i;
52
53         for(i=0;i<sizeof(NV30SurfaceFormat)/sizeof(NV30SurfaceFormat[0]);i++)
54         {
55                 if (NV30SurfaceFormat[i].pict_fmt == format)
56                         return &NV30SurfaceFormat[i];
57         }
58
59         return NULL;
60 }
61
62 enum {
63         NV30EXA_FPID_PASS_COL0 = 0,
64         NV30EXA_FPID_PASS_TEX0 = 1,
65         NV30EXA_FPID_COMPOSITE_MASK = 2,
66         NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
67         NV30EXA_FPID_COMPOSITE_MASK_CA = 4,
68         NV30EXA_FPID_MAX = 5
69 } NV30EXA_FPID;
70
71 static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = {
72         &nv30_fp_pass_col0,
73         &nv30_fp_pass_tex0,
74         &nv30_fp_composite_mask,
75         &nv30_fp_composite_mask_sa_ca,
76         &nv30_fp_composite_mask_ca
77 };
78
79 static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX];
80
81 static void
82 NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn)
83 {
84         int s;
85
86         for (s = 0; s < NV30EXA_FPID_MAX; s++) {
87                 nv_shader_t *def, *a8;
88
89                 def = nv40_fp_map[s];
90                 a8 = xcalloc(1, sizeof(nv_shader_t));
91                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
92                 a8->size = def->size + 4;
93                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
94                 nv40_fp_map_a8[s] = a8;
95
96                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
97                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
98                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
99                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
100                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
101         }
102 }
103
104 /* should be in nouveau_reg.h at some point.. */
105 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
106 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
107 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
108 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
109 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
110 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
111 #define NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
112 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
113 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_X           3
114 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
115 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
116 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_W           0
117 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
118 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
119 #define NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
120
121 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
122   {                                                                           \
123   PICT_##r,                                                                   \
124   (tf),                                                                       \
125   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
126   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
127   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
128   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
129   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
130   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
131   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
132   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
133   }
134
135 static nv_pict_texture_format_t
136 NV30TextureFormat[] = {
137         _(a8r8g8b8, 0x12,   S1,   S1,   S1,   S1, X, Y, Z, W),
138         _(a8b8g8r8, 0x12,   S1,   S1,   S1,   S1, Z, Y, X, W),
139         _(x8r8g8b8, 0x12,   S1,   S1,   S1,  ONE, X, Y, Z, W),
140         _(x8b8g8r8, 0x12,   S1,   S1,   S1,  ONE, Z, Y, X, W),
141         _(a1r5g5b5, 0x10,   S1,   S1,   S1,   S1, X, Y, Z, W),
142         _(x1r5g5b5, 0x10,   S1,   S1,   S1,  ONE, X, Y, Z, W),
143         _(x4r4g4b4, 0x1d,   S1,   S1,   S1,  ONE, X, Y, Z, W),
144         _(a4r4g4b4, 0x1d,   S1,   S1,   S1,   S1, X, Y, Z, W),
145         _(      a8, 0x1b, ZERO, ZERO, ZERO,   S1, W, W, W, W),
146 };
147
148
149 static nv_pict_texture_format_t *
150 NV30_GetPictTextureFormat(int format)
151 {
152         int i;
153
154         for(i=0;i<sizeof(NV30TextureFormat)/sizeof(NV30TextureFormat[0]);i++)
155         {
156                 if (NV30TextureFormat[i].pict_fmt == format)
157                         return &NV30TextureFormat[i];
158         }
159
160         return NULL;
161 }
162
163 #define NV34_TCL_PRIMITIVE_3D_BF_ZERO                                     0x0000
164 #define NV34_TCL_PRIMITIVE_3D_BF_ONE                                      0x0001
165 #define NV34_TCL_PRIMITIVE_3D_BF_SRC_COLOR                                0x0300
166 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_COLOR                      0x0301
167 #define NV34_TCL_PRIMITIVE_3D_BF_SRC_ALPHA                                0x0302
168 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_ALPHA                      0x0303
169 #define NV34_TCL_PRIMITIVE_3D_BF_DST_ALPHA                                0x0304
170 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_ALPHA                      0x0305
171 #define NV34_TCL_PRIMITIVE_3D_BF_DST_COLOR                                0x0306
172 #define NV34_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_COLOR                      0x0307
173 #define NV34_TCL_PRIMITIVE_3D_BF_ALPHA_SATURATE                           0x0308
174 #define BF(bf) NV34_TCL_PRIMITIVE_3D_BF_##bf
175
176 static nv_pict_op_t 
177 NV30PictOp[] = {
178 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
179 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
180 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
181 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
182 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
183 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
184 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
185 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
186 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
187 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
188 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
189 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
190 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
191 };
192
193 static nv_pict_op_t *
194 NV30_GetPictOpRec(int op)
195 {
196         if (op >= PictOpSaturate)
197                 return NULL;
198 #if 0
199         switch(op)
200         {
201                 case 0:ErrorF("Op Clear\n");break;
202                 case 1:ErrorF("Op Src\n");break;
203                 case 2:ErrorF("Op Dst\n");break;
204                 case 3:ErrorF("Op Over\n");break;
205                 case 4:ErrorF("Op OverReverse\n");break;
206                 case 5:ErrorF("Op In\n");break;
207                 case 6:ErrorF("Op InReverse\n");break;
208                 case 7:ErrorF("Op Out\n");break;
209                 case 8:ErrorF("Op OutReverse\n");break;
210                 case 9:ErrorF("Op Atop\n");break;
211                 case 10:ErrorF("Op AtopReverse\n");break;
212                 case 11:ErrorF("Op Xor\n");break;
213                 case 12:ErrorF("Op Add\n");break;
214         }
215 #endif
216         return &NV30PictOp[op];
217 }
218
219 #define FALLBACK_DEBUG 0
220 #if FALLBACK_DEBUG == 1
221 #define FALLBACK(fmt,args...) do {                                      \
222         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
223         return FALSE;                                                   \
224 } while(0)
225 #else
226 #define FALLBACK(fmt,args...) do { \
227         return FALSE;              \
228 } while(0)
229 #endif
230
231 static void
232 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
233 {
234         NVPtr pNv = NVPTR(pScrn);
235         static NVAllocRec *fp_mem = NULL;
236         static int next_hw_id_offset = 0;
237
238         if (!fp_mem) {
239                 fp_mem = NVAllocateMemory(pNv, NOUVEAU_MEM_FB, 0x1000);
240                 if (!fp_mem) {
241                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
242                                         "Couldn't alloc fragprog buffer!\n");
243                         return;
244                 }
245         }
246
247         if (!shader->hw_id) {
248                 memcpy(fp_mem->map + next_hw_id_offset, shader->data,
249                                 shader->size *
250                                 sizeof(uint32_t));
251
252                 shader->hw_id  = fp_mem->offset;
253                 shader->hw_id += next_hw_id_offset;
254
255                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
256                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
257         }
258
259         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1);
260         OUT_RING  (shader->hw_id|1);
261
262         BEGIN_RING(Nv3D, 0x23c, 1);
263         OUT_RING  (3);
264         BEGIN_RING(Nv3D, 0x1d60, 1);
265         OUT_RING  (0); /* USES_KIL (1<<7) == 0 */
266         BEGIN_RING(Nv3D, 0x1450, 1);
267         OUT_RING  (shader->card_priv.NV30FP.num_regs << 16| 4);
268         BEGIN_RING(Nv3D, 0x1d7c, 1);
269         OUT_RING  (0xffff0000);
270
271         //BEGIN_RING(Nv3D, 0x0b00, 1);
272         //OUT_RING(0x00000004);
273
274 }
275
276 static void
277 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
278                 PictFormatShort dest_format, Bool component_alpha)
279 {
280         NVPtr pNv = NVPTR(pScrn);
281         uint32_t sblend, dblend;
282
283         sblend = blend->src_card_op;
284         dblend = blend->dst_card_op;
285
286         if (blend->dst_alpha) {
287                 if (!PICT_FORMAT_A(dest_format)) {
288                         if (sblend == BF(DST_ALPHA)) {
289                                 sblend = BF(ONE);
290                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
291                                 sblend = BF(ZERO);
292                         }
293                 } else if (dest_format == PICT_a8) {
294                         if (sblend == BF(DST_ALPHA)) {
295                                 sblend = BF(DST_COLOR);
296                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
297                                 sblend = BF(ONE_MINUS_DST_COLOR);
298                         }
299                 }
300         }
301
302         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
303                 if (dblend == BF(SRC_ALPHA)) {
304                         dblend = BF(SRC_COLOR);
305                 } else if (dblend == BF(ONE_MINUS_SRC_ALPHA)) {
306                         dblend = BF(ONE_MINUS_SRC_COLOR);
307                 }
308         }
309
310         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
311                 BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
312                 OUT_RING  (0);
313         } else {
314                 BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 5);
315                 OUT_RING  (1);
316                 OUT_RING  ((sblend << 16) | sblend);
317                 OUT_RING  ((dblend << 16) | dblend);
318                 OUT_RING  (0x00000000);                 /* Blend colour */
319                 OUT_RING  ((0x8006 << 16) | 0x8006);    /* FUNC_ADD, FUNC_ADD */
320         }
321 }
322
323 static Bool
324 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
325 {
326         NVPtr pNv = NVPTR(pScrn);
327         nv_pict_texture_format_t *fmt;
328         uint32_t card_filter, card_repeat;
329         NV30EXA_STATE;
330
331         fmt = NV30_GetPictTextureFormat(pPict->format);
332         if (!fmt)
333                 return FALSE;
334
335         card_repeat = 3; /* repeatNone */
336
337         if (pPict->filter == PictFilterBilinear)
338                 card_filter = 2;
339         else
340                 card_filter = 1;
341
342         BEGIN_RING(Nv3D,
343                         NV34_TCL_PRIMITIVE_3D_TX_OFFSET(unit), 8);
344         OUT_RING  (NVAccelGetPixmapOffset(pPix));
345
346         OUT_RING  ((2 << 4) /* 2D */ |
347                         (fmt->card_fmt << 8) |
348                         (1 << 16) /* 1 mipmap level */ |
349 /*                      (log2i(pPix->drawable.width)  << 20) |
350                         (log2i(pPix->drawable.height) << 24) |*/
351                         9);
352
353         OUT_RING  ((card_repeat <<  0) /* S */ |
354                         (card_repeat <<  8) /* T */ |
355                         (card_repeat << 16) /* R */);
356         OUT_RING  (0x40000000); /* enable */
357         OUT_RING  ((((uint32_t)exaGetPixmapPitch(pPix))<<16) | fmt->card_swz);
358
359         OUT_RING  ((card_filter << 16) /* min */ |
360                         (card_filter << 24) /* mag */ |
361                         0x3fd6 /* engine lock */);
362         OUT_RING  ((pPix->drawable.width << 16) | pPix->drawable.height);
363         OUT_RING  (0); /* border ARGB */
364
365         state->unit[unit].width         = (float)pPix->drawable.width;
366         state->unit[unit].height        = (float)pPix->drawable.height;
367         state->unit[unit].transform     = pPict->transform;
368
369         return TRUE;
370 }
371
372 static Bool
373 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict)
374 {
375         NVPtr pNv = NVPTR(pScrn);
376         nv_pict_surface_format_t *fmt;
377
378         fmt = NV30_GetPictSurfaceFormat(pPict->format);
379         if (!fmt) {
380                 ErrorF("AIII no format\n");
381                 return FALSE;
382         }
383
384         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
385
386         int w = pPict->pDrawable->width;
387         int h = pPict->pDrawable->height;
388         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 5);
389         OUT_RING  (w<<16);
390         OUT_RING  (h<<16);
391         OUT_RING  (fmt->card_fmt); /* format */
392         OUT_RING  (pitch << 16 | pitch);
393         OUT_RING  (NVAccelGetPixmapOffset(pPix));
394         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 2);
395         OUT_RING  ((w-1)<<16);
396         OUT_RING  ((h-1)<<16);
397         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
398         OUT_RING  (w<<16);
399         OUT_RING  (h<<16);
400
401         return TRUE;
402 }
403
404 static Bool
405 NV30EXACheckCompositeTexture(PicturePtr pPict)
406 {
407         nv_pict_texture_format_t *fmt;
408         int w = pPict->pDrawable->width;
409         int h = pPict->pDrawable->height;
410
411         if ((w > 4096) || (h>4096))
412                 FALLBACK("picture too large, %dx%d\n", w, h);
413
414         fmt = NV30_GetPictTextureFormat(pPict->format);
415         if (!fmt)
416                 FALLBACK("picture format 0x%08x not supported\n",
417                                 pPict->format);
418
419         if (pPict->filter != PictFilterNearest &&
420                         pPict->filter != PictFilterBilinear)
421                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
422
423         if (pPict->repeat && pPict->repeatType != RepeatNone)
424                 FALLBACK("repeat 0x%x not supported\n", pPict->repeatType);
425
426         return TRUE;
427 }
428
429 Bool
430 NV30EXACheckComposite(int op, PicturePtr psPict,
431                 PicturePtr pmPict,
432                 PicturePtr pdPict)
433 {
434         nv_pict_surface_format_t *fmt;
435         nv_pict_op_t *opr;
436
437         opr = NV30_GetPictOpRec(op);
438         if (!opr)
439                 FALLBACK("unsupported blend op 0x%x\n", op);
440
441         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
442         if (!fmt)
443                 FALLBACK("dst picture format 0x%08x not supported\n",
444                                 pdPict->format);
445
446         if (!NV30EXACheckCompositeTexture(psPict))
447                 FALLBACK("src picture\n");
448         if (pmPict) {
449                 if (pmPict->componentAlpha &&
450                                 PICT_FORMAT_RGB(pmPict->format) &&
451                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
452                         FALLBACK("mask CA + SA\n");
453                 if (!NV30EXACheckCompositeTexture(pmPict))
454                         FALLBACK("mask picture\n");
455         }
456
457         return TRUE;
458 }
459
460 void NV30_SetVtx(ScrnInfoPtr pScrn, int multitex)
461 {
462         NVPtr pNv = NVPTR(pScrn);
463
464         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ARRAY_FORMAT(0), 16);
465         OUT_RING  (0x22);
466         OUT_RING  (0x2);
467         OUT_RING  (0x2);
468         OUT_RING  (0x2);
469         OUT_RING  (0x2);
470         OUT_RING  (0x2);
471         OUT_RING  (0x2);
472         OUT_RING  (0x2);
473
474         OUT_RING  (0x22);
475         if (multitex)
476                 OUT_RING  (0x22);
477         else
478                 OUT_RING  (0x02);
479         OUT_RING  (0x2);
480         OUT_RING  (0x2);
481         OUT_RING  (0x2);
482         OUT_RING  (0x2);
483         OUT_RING  (0x2);
484         OUT_RING  (0x2);
485 }
486
487 Bool
488 NV30EXAPrepareComposite(int op, PicturePtr psPict,
489                 PicturePtr pmPict,
490                 PicturePtr pdPict,
491                 PixmapPtr  psPix,
492                 PixmapPtr  pmPix,
493                 PixmapPtr  pdPix)
494 {
495         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
496         NVPtr pNv = NVPTR(pScrn);
497         nv_pict_op_t *blend;
498         int fpid = NV30EXA_FPID_PASS_COL0;
499         NV30EXA_STATE;
500
501         blend = NV30_GetPictOpRec(op);
502
503         NV30_SetupBlend(pScrn, blend, pdPict->format,
504                         (pmPict && pmPict->componentAlpha &&
505                          PICT_FORMAT_RGB(pmPict->format)));
506
507         NV30_SetupSurface(pScrn, pdPix, pdPict);
508         NV30EXATexture(pScrn, psPix, psPict, 0);
509
510 #if 0
511 #define printformat(f) ErrorF("(%xh %dbpp A%dR%dG%dB%d)",f,(f>>24),(f&0xf000)>>12,(f&0xf00)>>8,(f&0xf0)>>4,f&0xf)
512         ErrorF("Preparecomposite src(%dx%d)",psPict->pDrawable->width,psPict->pDrawable->height);
513         printformat((psPict->format));
514         ErrorF(" dst(%dx%d)",pdPict->pDrawable->width,pdPict->pDrawable->height);
515         printformat((pdPict->format));
516         if (pmPict)
517         {
518                 ErrorF(" mask(%dx%d)",pmPict->pDrawable->width,pmPict->pDrawable->height);
519                 printformat((pmPict->format));
520         }
521         ErrorF("\n");
522 #endif
523
524         NV30_SetVtx(pScrn,pmPict?1:0);
525         if (pmPict) {
526                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
527
528                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
529                         if (blend->src_alpha)
530                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA;
531                         else
532                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_CA;
533                 } else {
534                         fpid = NV30EXA_FPID_COMPOSITE_MASK;
535                 }
536
537                 state->have_mask = TRUE;
538         } else {
539                 fpid = NV30EXA_FPID_PASS_TEX0;
540
541                 state->have_mask = FALSE;
542         }
543
544         if (pdPict->format == PICT_a8)
545                 NV30_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
546         else
547                 NV30_LoadFragProg(pScrn, nv40_fp_map[fpid]);
548
549         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
550         OUT_RING  (8); /* GL_QUADS */
551
552         return TRUE;
553 }
554
555 #define xFixedToFloat(v) \
556         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
557
558 static void
559 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
560                                           float *x_ret, float *y_ret)
561 {
562         PictVector v;
563
564         if (t) {
565                 v.vector[0] = IntToxFixed(x);
566                 v.vector[1] = IntToxFixed(y);
567                 v.vector[2] = xFixed1;
568                 PictureTransformPoint(t, &v);
569                 *x_ret = xFixedToFloat(v.vector[0]);
570                 *y_ret = xFixedToFloat(v.vector[1]);
571         } else {
572                 *x_ret = (float)x;
573                 *y_ret = (float)y;
574         }
575 }
576
577 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
578         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2F_X(8), 4);        \
579         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
580         OUT_RINGf ((mx)); OUT_RINGf ((my));                          \
581         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2I(0), 1);          \
582         OUT_RING  (((dy)<<16)|(dx));                                           \
583 } while(0)
584 #define CV_OUT(sx,sy,dx,dy) do {                                               \
585         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2F_X(8), 2);        \
586         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
587         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_ATTR_2I(0), 1);          \
588         OUT_RING  (((dy)<<16)|(dx));                                           \
589 } while(0)
590
591 void
592 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
593                                   int maskX, int maskY,
594                                   int dstX , int dstY,
595                                   int width, int height)
596 {
597         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
598         NVPtr pNv = NVPTR(pScrn);
599         float sX0, sX1, sY0, sY1;
600         float mX0, mX1, mY0, mY1;
601         NV30EXA_STATE;
602
603 #if 0
604         ErrorF("Composite [%dx%d] (%d,%d)IN(%d,%d)OP(%d,%d)\n",width,height,srcX,srcY,maskX,maskY,dstX,dstY);
605 #endif
606         NV30EXATransformCoord(state->unit[0].transform, srcX, srcY,
607                               state->unit[0].width,
608                               state->unit[0].height, &sX0, &sY0);
609         NV30EXATransformCoord(state->unit[0].transform,
610                               srcX + width, srcY + height,
611                               state->unit[0].width,
612                               state->unit[0].height, &sX1, &sY1);
613
614         if (state->have_mask) {
615                 NV30EXATransformCoord(state->unit[1].transform, maskX, maskY,
616                                       state->unit[1].width,
617                                       state->unit[1].height, &mX0, &mY0);
618                 NV30EXATransformCoord(state->unit[1].transform,
619                                       maskX + width, maskY + height,
620                                       state->unit[1].width,
621                                       state->unit[1].height, &mX1, &mY1);
622                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
623                 CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width,          dstY);
624                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
625                 CV_OUTm(sX0 , sY1 , mX0, mY1, dstX        , dstY + height);
626         } else {
627                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
628                 CV_OUT(sX1 , sY0 , dstX + width,          dstY);
629                 CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
630                 CV_OUT(sX0 , sY1 , dstX        , dstY + height);
631         }
632
633         FIRE_RING();
634 }
635
636 void
637 NV30EXADoneComposite(PixmapPtr pdPix)
638 {
639         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
640         NVPtr pNv = NVPTR(pScrn);
641
642         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
643         OUT_RING  (0);
644 }
645
646 Bool
647 NVAccelInitNV30TCL(ScrnInfoPtr pScrn)
648 {
649         NVPtr pNv = NVPTR(pScrn);
650         static int have_object = FALSE;
651         uint32_t class = 0, chipset;
652         int i;
653
654         NV30EXAHackupA8Shaders(pScrn);
655
656 #undef  NV30_TCL_PRIMITIVE_3D
657 #define NV30_TCL_PRIMITIVE_3D                 0x0397
658 #define NV30_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x00000003
659 #define NV35_TCL_PRIMITIVE_3D                 0x0497
660 #define NV35_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x000001e0
661 #define NV34_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK 0x00000010
662
663         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
664         if ((chipset & 0xf0) != NV_ARCH_30)
665                 return TRUE;
666         chipset &= 0xf;
667
668         if (NV30_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
669                 class = NV30_TCL_PRIMITIVE_3D;
670         else if (NV35_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
671                 class = NV35_TCL_PRIMITIVE_3D;
672         else if (NV34_TCL_PRIMITIVE_3D_CHIPSET_3X_MASK & (1<<chipset))
673                 class = NV34_TCL_PRIMITIVE_3D;
674         else {
675                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
676                            "NV30EXA: Unknown chipset NV3%1x\n", chipset);
677                 return FALSE;
678         }
679
680         if (!have_object) {
681                 if (!NVDmaCreateContextObject(pNv, Nv3D, class))
682                         return FALSE;
683                 have_object = TRUE;
684         }
685
686         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY0, 3);
687         OUT_RING  (NvDmaFB);
688         OUT_RING  (NvDmaFB);
689         OUT_RING  (NvDmaFB);
690         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY7, 1);
691         OUT_RING  (NvDmaFB);
692         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY3, 2);
693         OUT_RING  (NvDmaFB);
694         OUT_RING  (NvDmaFB);
695         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DMA_IN_MEMORY8, 1);
696         OUT_RING  (NvDmaFB);
697
698         for(i = 0x2c8; i <= 0x2fc; i += 4)
699         {
700                 BEGIN_RING(Nv3D, i, 1);
701                 OUT_RING  (0x0);
702         }
703
704         BEGIN_RING(Nv3D, 0x220, 1);
705         OUT_RING  (1);
706
707         BEGIN_RING(Nv3D, 0x03b0, 1);
708         OUT_RING  (0x00100000);
709         BEGIN_RING(Nv3D, 0x1454, 1);
710         OUT_RING  (0);
711         BEGIN_RING(Nv3D, 0x1d80, 1);
712         OUT_RING  (3);
713         
714         /* NEW */
715         BEGIN_RING(Nv3D, 0x1e98, 1);
716         OUT_RING  (0);
717         BEGIN_RING(Nv3D, 0x17e0, 3);
718         OUT_RING  (0);
719         OUT_RING  (0);
720         OUT_RING  (0x3f800000);
721         BEGIN_RING(Nv3D, 0x1f80, 16);
722         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
723         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
724         OUT_RING  (0x0000ffff);
725         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
726         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
727
728         BEGIN_RING(Nv3D, 0x120, 3);
729         OUT_RING  (0);
730         OUT_RING  (1);
731         OUT_RING  (2);
732
733         BEGIN_RING(Nv3D, 0x1d88, 1);
734         OUT_RING  (0x00001200);
735
736         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_RC_ENABLE, 1);
737         OUT_RING  (0);
738
739         /* Attempt to setup a known state.. Probably missing a heap of
740          * stuff here..
741          */
742         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_STENCIL_FRONT_ENABLE, 1);
743         OUT_RING  (0);
744         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_STENCIL_BACK_ENABLE, 1);
745         OUT_RING  (0);
746         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
747         OUT_RING  (0);
748         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 2);
749         OUT_RING  (0); /* wr disable */
750         OUT_RING  (0); /* test disable */
751         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
752         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
753         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
754         OUT_RING  (0);
755         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
756         OUT_RING  (0);
757         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_COLOR_LOGIC_OP_ENABLE, 2);
758         OUT_RING  (0);
759         OUT_RING  (0x1503 /*GL_COPY*/);
760         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_DITHER_ENABLE, 1);
761         OUT_RING  (1);
762         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SHADE_MODEL, 1);
763         OUT_RING  (0x1d01 /*GL_SMOOTH*/);
764         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR,2);
765         OUT_RINGf (0.0);
766         OUT_RINGf (0.0);
767         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
768         OUT_RING  (0x1b02 /*GL_FILL*/);
769         OUT_RING  (0x1b02 /*GL_FILL*/);
770         /* - Disable texture units
771          * - Set fragprog to MOVR result.color, fragment.color */
772         for (i=0;i<16;i++) {
773                 BEGIN_RING(Nv3D,
774                                 NV34_TCL_PRIMITIVE_3D_TX_ENABLE(i), 1);
775                 OUT_RING  (0);
776         }
777         /* Polygon stipple */
778         BEGIN_RING(Nv3D,
779                         NV34_TCL_PRIMITIVE_3D_POLYGON_STIPPLE_PATTERN(0), 0x20);
780         for (i=0;i<0x20;i++)
781                 OUT_RING  (0xFFFFFFFF);
782
783         /* Ok.  If you start X with the nvidia driver, kill it, and then
784          * start X with nouveau you will get black rendering instead of
785          * what you'd expect.  This fixes the problem, and it seems that
786          * it's not needed between nouveau restarts - which suggests that
787          * the 3D context (wherever it's stored?) survives somehow.
788          */
789         //BEGIN_RING(Nv3D, 0x1d60,1);
790         //OUT_RING  (0x03008000);
791
792         int w=4096;
793         int h=4096;
794         int pitch=4096*4;
795         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 5);
796         OUT_RING  (w<<16);
797         OUT_RING  (h<<16);
798         OUT_RING  (0x148); /* format */
799         OUT_RING  (pitch << 16 | pitch);
800         OUT_RING  (0x0);
801         BEGIN_RING(Nv3D, 0x0a00, 2);
802         OUT_RING  ((w<<16) | 0);
803         OUT_RING  ((h<<16) | 0);
804         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 2);
805         OUT_RING  ((w-1)<<16);
806         OUT_RING  ((h-1)<<16);
807         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SCISSOR_HORIZ, 2);
808         OUT_RING  (w<<16);
809         OUT_RING  (h<<16);
810         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
811         OUT_RING  (w<<16);
812         OUT_RING  (h<<16);
813
814         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_VIEWPORT_SCALE0_X, 8);
815         OUT_RINGf (0.0);
816         OUT_RINGf (0.0);
817         OUT_RINGf (0.0);
818         OUT_RINGf (0.0);
819         OUT_RINGf (1.0);
820         OUT_RINGf (1.0);
821         OUT_RINGf (1.0);
822         OUT_RINGf (0.0);
823
824         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_MODELVIEW_MATRIX(0), 16);
825         OUT_RINGf (1.0);
826         OUT_RINGf (0.0);
827         OUT_RINGf (0.0);
828         OUT_RINGf (0.0);
829         OUT_RINGf (0.0);
830         OUT_RINGf (1.0);
831         OUT_RINGf (0.0);
832         OUT_RINGf (0.0);
833         OUT_RINGf (0.0);
834         OUT_RINGf (0.0);
835         OUT_RINGf (1.0);
836         OUT_RINGf (0.0);
837         OUT_RINGf (0.0);
838         OUT_RINGf (0.0);
839         OUT_RINGf (0.0);
840         OUT_RINGf (1.0);
841
842         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_PROJECTION_MATRIX(0), 16);
843         OUT_RINGf (1.0);
844         OUT_RINGf (0.0);
845         OUT_RINGf (0.0);
846         OUT_RINGf (0.0);
847         OUT_RINGf (0.0);
848         OUT_RINGf (1.0);
849         OUT_RINGf (0.0);
850         OUT_RINGf (0.0);
851         OUT_RINGf (0.0);
852         OUT_RINGf (0.0);
853         OUT_RINGf (1.0);
854         OUT_RINGf (0.0);
855         OUT_RINGf (0.0);
856         OUT_RINGf (0.0);
857         OUT_RINGf (0.0);
858         OUT_RINGf (1.0);
859
860         BEGIN_RING(Nv3D, NV34_TCL_PRIMITIVE_3D_SCISSOR_HORIZ, 2);
861         OUT_RING  (4096<<16);
862         OUT_RING  (4096<<16);
863
864         return TRUE;
865 }
866
867
868
869 #endif /* ENABLE_NV30EXA */