NV30EXA: Chipset should be masked by 0xFFF0, switch to NVArch while i'm at it.
[nouveau] / src / nv30_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  * Copyright 2007 Stephane Marchesin
4  * Copyright 2007 Jeremy Kolb
5  * Copyright 2007 Patrice Mandin
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
22  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25
26 #include "nv_include.h"
27 #include "nv_shaders.h"
28
29 typedef struct nv_pict_surface_format {
30         int      pict_fmt;
31         uint32_t card_fmt;
32 } nv_pict_surface_format_t;
33
34 typedef struct nv_pict_texture_format {
35         int      pict_fmt;
36         uint32_t card_fmt;
37         uint32_t card_swz;
38 } nv_pict_texture_format_t;
39
40 typedef struct nv_pict_op {
41         Bool     src_alpha;
42         Bool     dst_alpha;
43         uint32_t src_card_op;
44         uint32_t dst_card_op;
45 } nv_pict_op_t;
46
47 typedef struct nv30_exa_state {
48         Bool have_mask;
49
50         struct {
51                 PictTransformPtr transform;
52                 float width;
53                 float height;
54         } unit[2];
55 } nv30_exa_state_t;
56 static nv30_exa_state_t exa_state;
57 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
58
59 static nv_pict_surface_format_t
60 NV30SurfaceFormat[] = {
61         { PICT_a8r8g8b8 , 0x148 },
62         { PICT_a8b8g8r8 , 0x150 },
63         { PICT_x8r8g8b8 , 0x145 },
64         { PICT_x8b8g8r8 , 0x14f },
65         { PICT_r5g6b5   , 0x143 },
66         { PICT_a8       , 0x149 },
67 };
68
69 static nv_pict_surface_format_t *
70 NV30_GetPictSurfaceFormat(int format)
71 {
72         int i;
73
74         for(i=0;i<sizeof(NV30SurfaceFormat)/sizeof(NV30SurfaceFormat[0]);i++)
75         {
76                 if (NV30SurfaceFormat[i].pict_fmt == format)
77                         return &NV30SurfaceFormat[i];
78         }
79
80         return NULL;
81 }
82
83 enum {
84         NV30EXA_FPID_PASS_COL0 = 0,
85         NV30EXA_FPID_PASS_TEX0 = 1,
86         NV30EXA_FPID_COMPOSITE_MASK = 2,
87         NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
88         NV30EXA_FPID_COMPOSITE_MASK_CA = 4,
89         NV30EXA_FPID_MAX = 5
90 } NV30EXA_FPID;
91
92 static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = {
93         &nv30_fp_pass_col0,
94         &nv30_fp_pass_tex0,
95         &nv30_fp_composite_mask,
96         &nv30_fp_composite_mask_sa_ca,
97         &nv30_fp_composite_mask_ca
98 };
99
100 static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX];
101
102 static void
103 NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn)
104 {
105         int s;
106
107         for (s = 0; s < NV30EXA_FPID_MAX; s++) {
108                 nv_shader_t *def, *a8;
109
110                 def = nv40_fp_map[s];
111                 a8 = xcalloc(1, sizeof(nv_shader_t));
112                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
113                 a8->size = def->size + 4;
114                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
115                 nv40_fp_map_a8[s] = a8;
116
117                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
118                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
119                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
120                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
121                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
122         }
123 }
124
125 /* should be in nouveau_reg.h at some point.. */
126 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
127 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
128 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
129 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
130 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
131 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
132 #define NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
133 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
134 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_X           3
135 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
136 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
137 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_W           0
138 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
139 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
140 #define NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
141
142 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
143   {                                                                           \
144   PICT_##r,                                                                   \
145   (tf),                                                                       \
146   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
147   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
148   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
149   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
150   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
151   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
152   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
153   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
154   }
155
156 static nv_pict_texture_format_t
157 NV30TextureFormat[] = {
158         _(a8r8g8b8, 0x12,   S1,   S1,   S1,   S1, X, Y, Z, W),
159         _(a8b8g8r8, 0x12,   S1,   S1,   S1,   S1, Z, Y, X, W),
160         _(x8r8g8b8, 0x12,   S1,   S1,   S1,  ONE, X, Y, Z, W),
161         _(x8b8g8r8, 0x12,   S1,   S1,   S1,  ONE, Z, Y, X, W),
162         _(a1r5g5b5, 0x10,   S1,   S1,   S1,   S1, X, Y, Z, W),
163         _(x1r5g5b5, 0x10,   S1,   S1,   S1,  ONE, X, Y, Z, W),
164         _(x4r4g4b4, 0x1d,   S1,   S1,   S1,  ONE, X, Y, Z, W),
165         _(a4r4g4b4, 0x1d,   S1,   S1,   S1,   S1, X, Y, Z, W),
166         _(      a8, 0x1b, ZERO, ZERO, ZERO,   S1, X, X, X, X),
167 };
168
169
170 static nv_pict_texture_format_t *
171 NV30_GetPictTextureFormat(int format)
172 {
173         int i;
174
175         for(i=0;i<sizeof(NV30TextureFormat)/sizeof(NV30TextureFormat[0]);i++)
176         {
177                 if (NV30TextureFormat[i].pict_fmt == format)
178                         return &NV30TextureFormat[i];
179         }
180
181         return NULL;
182 }
183
184 #define NV34TCL_BF_ZERO                                     0x0000
185 #define NV34TCL_BF_ONE                                      0x0001
186 #define NV34TCL_BF_SRC_COLOR                                0x0300
187 #define NV34TCL_BF_ONE_MINUS_SRC_COLOR                      0x0301
188 #define NV34TCL_BF_SRC_ALPHA                                0x0302
189 #define NV34TCL_BF_ONE_MINUS_SRC_ALPHA                      0x0303
190 #define NV34TCL_BF_DST_ALPHA                                0x0304
191 #define NV34TCL_BF_ONE_MINUS_DST_ALPHA                      0x0305
192 #define NV34TCL_BF_DST_COLOR                                0x0306
193 #define NV34TCL_BF_ONE_MINUS_DST_COLOR                      0x0307
194 #define NV34TCL_BF_ALPHA_SATURATE                           0x0308
195 #define BF(bf) NV34TCL_BF_##bf
196
197 static nv_pict_op_t 
198 NV30PictOp[] = {
199 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
200 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
201 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
202 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
203 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
204 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
205 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
206 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
207 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
208 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
209 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
210 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
211 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
212 };
213
214 static nv_pict_op_t *
215 NV30_GetPictOpRec(int op)
216 {
217         if (op >= PictOpSaturate)
218                 return NULL;
219 #if 0
220         switch(op)
221         {
222                 case 0:ErrorF("Op Clear\n");break;
223                 case 1:ErrorF("Op Src\n");break;
224                 case 2:ErrorF("Op Dst\n");break;
225                 case 3:ErrorF("Op Over\n");break;
226                 case 4:ErrorF("Op OverReverse\n");break;
227                 case 5:ErrorF("Op In\n");break;
228                 case 6:ErrorF("Op InReverse\n");break;
229                 case 7:ErrorF("Op Out\n");break;
230                 case 8:ErrorF("Op OutReverse\n");break;
231                 case 9:ErrorF("Op Atop\n");break;
232                 case 10:ErrorF("Op AtopReverse\n");break;
233                 case 11:ErrorF("Op Xor\n");break;
234                 case 12:ErrorF("Op Add\n");break;
235         }
236 #endif
237         return &NV30PictOp[op];
238 }
239
240 #define FALLBACK_DEBUG 0
241 #if FALLBACK_DEBUG == 1
242 #define FALLBACK(fmt,args...) do {                                      \
243         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
244         return FALSE;                                                   \
245 } while(0)
246 #else
247 #define FALLBACK(fmt,args...) do { \
248         return FALSE;              \
249 } while(0)
250 #endif
251
252 static void
253 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
254 {
255         NVPtr pNv = NVPTR(pScrn);
256         static struct nouveau_bo *fp_mem = NULL;
257         static int next_hw_id_offset = 0;
258
259         if (!fp_mem) {
260                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_PIN,
261                                    0, 0x1000, &fp_mem)) {
262                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
263                                         "Couldn't alloc fragprog buffer!\n");
264                         return;
265                 }
266
267                 if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
268                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
269                                    "Couldn't map fragprog buffer!\n");
270                 }
271         }
272
273         if (!shader->hw_id) {
274                 uint32_t *map = fp_mem->map + next_hw_id_offset;
275                 int i;
276
277                 for (i = 0; i < shader->size; i++) {
278                         uint32_t data = shader->data[i];
279 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
280                         data = ((data >> 16) | ((data & 0xffff) << 16));
281 #endif
282                         map[i] = data;
283                 }
284
285                 shader->hw_id += next_hw_id_offset;
286                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
287                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
288         }
289
290         BEGIN_RING(Nv3D, NV34TCL_FP_ACTIVE_PROGRAM, 1);
291         OUT_RELOC (fp_mem, shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
292                    NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
293                    1 /*NV30TCL_FP_ADDRESS_DMA0*/, 2 /*NV30TCL_FP_ADDRESS_DMA1*/);
294
295         BEGIN_RING(Nv3D, 0x1d60, 1);
296         OUT_RING  (0); /* USES_KIL (1<<7) == 0 */
297         BEGIN_RING(Nv3D, 0x1450, 1);
298         OUT_RING  (shader->card_priv.NV30FP.num_regs << 16| 4);
299         BEGIN_RING(Nv3D, 0x1d7c, 1);
300         OUT_RING  (0xffff0000);
301
302 }
303
304 static void
305 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
306                 PictFormatShort dest_format, Bool component_alpha)
307 {
308         NVPtr pNv = NVPTR(pScrn);
309         uint32_t sblend, dblend;
310
311         sblend = blend->src_card_op;
312         dblend = blend->dst_card_op;
313
314         if (blend->dst_alpha) {
315                 if (!PICT_FORMAT_A(dest_format)) {
316                         if (sblend == BF(DST_ALPHA)) {
317                                 sblend = BF(ONE);
318                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
319                                 sblend = BF(ZERO);
320                         }
321                 } else if (dest_format == PICT_a8) {
322                         if (sblend == BF(DST_ALPHA)) {
323                                 sblend = BF(DST_COLOR);
324                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
325                                 sblend = BF(ONE_MINUS_DST_COLOR);
326                         }
327                 }
328         }
329
330         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
331                 if (dblend == BF(SRC_ALPHA)) {
332                         dblend = BF(SRC_COLOR);
333                 } else if (dblend == BF(ONE_MINUS_SRC_ALPHA)) {
334                         dblend = BF(ONE_MINUS_SRC_COLOR);
335                 }
336         }
337
338         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
339                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 1);
340                 OUT_RING  (0);
341         } else {
342                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 3);
343                 OUT_RING  (1);
344                 OUT_RING  ((sblend << 16) | sblend);
345                 OUT_RING  ((dblend << 16) | dblend);
346         }
347 }
348
349 static Bool
350 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
351 {
352         NVPtr pNv = NVPTR(pScrn);
353         nv_pict_texture_format_t *fmt;
354         uint32_t card_filter, card_repeat;
355         NV30EXA_STATE;
356
357         fmt = NV30_GetPictTextureFormat(pPict->format);
358         if (!fmt)
359                 return FALSE;
360
361         card_repeat = 3; /* repeatNone */
362
363         if (pPict->filter == PictFilterBilinear)
364                 card_filter = 2;
365         else
366                 card_filter = 1;
367
368         BEGIN_RING(Nv3D, NV34TCL_TX_OFFSET(unit), 8);
369         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
370
371         OUT_RING  ((2 << 4) /* 2D */ |
372                         (fmt->card_fmt << 8) |
373                         (1 << 16) /* 1 mipmap level */ |
374                         (log2i(pPix->drawable.width)  << 20) |
375                         (log2i(pPix->drawable.height) << 24) |
376                         9);
377
378         OUT_RING  ((card_repeat <<  0) /* S */ |
379                         (card_repeat <<  8) /* T */ |
380                         (card_repeat << 16) /* R */);
381         OUT_RING  (0x40000000); /* enable */
382         OUT_RING  ((((uint32_t)exaGetPixmapPitch(pPix))<<16) | fmt->card_swz);
383
384         OUT_RING  ((card_filter << 16) /* min */ |
385                         (card_filter << 24) /* mag */ |
386                         0x2000 /* engine lock */);
387         OUT_RING  ((pPix->drawable.width << 16) | pPix->drawable.height);
388         OUT_RING  (0); /* border ARGB */
389
390         state->unit[unit].width         = (float)pPix->drawable.width;
391         state->unit[unit].height        = (float)pPix->drawable.height;
392         state->unit[unit].transform     = pPict->transform;
393
394         return TRUE;
395 }
396
397 static Bool
398 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict)
399 {
400         NVPtr pNv = NVPTR(pScrn);
401         nv_pict_surface_format_t *fmt;
402
403         fmt = NV30_GetPictSurfaceFormat(pPict->format);
404         if (!fmt) {
405                 ErrorF("AIII no format\n");
406                 return FALSE;
407         }
408
409         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
410
411         int x = pPict->pDrawable->x;
412         int y = pPict->pDrawable->y;
413         int w = pPict->pDrawable->width;
414         int h = pPict->pDrawable->height;
415         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 5);
416         OUT_RING  ((w<<16)|x);
417         OUT_RING  ((h<<16)|y);
418         OUT_RING  (fmt->card_fmt); /* format */
419         OUT_RING  (pitch << 16 | pitch);
420         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
421         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
422         OUT_RING  ((w-1+x)<<16);
423         OUT_RING  ((h-1+y)<<16);
424         if (pNv->NVArch == 0x30 || pNv->NVArch == 0x31) {
425                 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
426                 OUT_RING((y<<16)|x);
427         }
428
429         return TRUE;
430 }
431
432 static Bool
433 NV30EXACheckCompositeTexture(PicturePtr pPict)
434 {
435         nv_pict_texture_format_t *fmt;
436         int w = pPict->pDrawable->width;
437         int h = pPict->pDrawable->height;
438
439         if ((w > 4096) || (h>4096))
440                 FALLBACK("picture too large, %dx%d\n", w, h);
441
442         fmt = NV30_GetPictTextureFormat(pPict->format);
443         if (!fmt)
444                 FALLBACK("picture format 0x%08x not supported\n",
445                                 pPict->format);
446
447         if (pPict->filter != PictFilterNearest &&
448                         pPict->filter != PictFilterBilinear)
449                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
450
451         if (!(w==1 && h==1) && pPict->repeat && pPict->repeatType != RepeatNone)
452                 FALLBACK("repeat 0x%x not supported (surface %dx%d)\n", pPict->repeatType,w,h);
453
454         return TRUE;
455 }
456
457 Bool
458 NV30EXACheckComposite(int op, PicturePtr psPict,
459                 PicturePtr pmPict,
460                 PicturePtr pdPict)
461 {
462         nv_pict_surface_format_t *fmt;
463         nv_pict_op_t *opr;
464
465         opr = NV30_GetPictOpRec(op);
466         if (!opr)
467                 FALLBACK("unsupported blend op 0x%x\n", op);
468
469         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
470         if (!fmt)
471                 FALLBACK("dst picture format 0x%08x not supported\n",
472                                 pdPict->format);
473
474         if (!NV30EXACheckCompositeTexture(psPict))
475                 FALLBACK("src picture\n");
476         if (pmPict) {
477                 if (pmPict->componentAlpha &&
478                                 PICT_FORMAT_RGB(pmPict->format) &&
479                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
480                         FALLBACK("mask CA + SA\n");
481                 if (!NV30EXACheckCompositeTexture(pmPict))
482                         FALLBACK("mask picture\n");
483         }
484
485         return TRUE;
486 }
487
488 Bool
489 NV30EXAPrepareComposite(int op, PicturePtr psPict,
490                 PicturePtr pmPict,
491                 PicturePtr pdPict,
492                 PixmapPtr  psPix,
493                 PixmapPtr  pmPix,
494                 PixmapPtr  pdPix)
495 {
496         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
497         NVPtr pNv = NVPTR(pScrn);
498         nv_pict_op_t *blend;
499         int fpid = NV30EXA_FPID_PASS_COL0;
500         NV30EXA_STATE;
501
502         blend = NV30_GetPictOpRec(op);
503
504         NV30_SetupBlend(pScrn, blend, pdPict->format,
505                         (pmPict && pmPict->componentAlpha &&
506                          PICT_FORMAT_RGB(pmPict->format)));
507
508         NV30_SetupSurface(pScrn, pdPix, pdPict);
509         NV30EXATexture(pScrn, psPix, psPict, 0);
510
511 #if 0
512 #define printformat(f) ErrorF("(%xh %s %dbpp A%dR%dG%dB%d)",f,(f>>16)&0xf==2?"ARGB":"ABGR",(f>>24),(f&0xf000)>>12,(f&0xf00)>>8,(f&0xf0)>>4,f&0xf)
513         ErrorF("Preparecomposite src(%dx%d)",psPict->pDrawable->width,psPict->pDrawable->height);
514         printformat((psPict->format));
515         ErrorF(" dst(%dx%d)",pdPict->pDrawable->width,pdPict->pDrawable->height);
516         printformat((pdPict->format));
517         if (pmPict)
518         {
519                 ErrorF(" mask(%dx%d)",pmPict->pDrawable->width,pmPict->pDrawable->height);
520                 printformat((pmPict->format));
521         }
522         ErrorF("\n");
523 #endif
524
525         if (pmPict) {
526                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
527
528                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
529                         if (blend->src_alpha)
530                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA;
531                         else
532                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_CA;
533                 } else {
534                         fpid = NV30EXA_FPID_COMPOSITE_MASK;
535                 }
536
537                 state->have_mask = TRUE;
538         } else {
539                 fpid = NV30EXA_FPID_PASS_TEX0;
540
541                 state->have_mask = FALSE;
542         }
543
544         if (pdPict->format == PICT_a8)
545                 NV30_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
546         else
547                 NV30_LoadFragProg(pScrn, nv40_fp_map[fpid]);
548
549         BEGIN_RING(Nv3D, 0x23c, 1);
550         OUT_RING  (pmPict?3:1);
551
552         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
553         OUT_RING  (8); /* GL_QUADS */
554
555         return TRUE;
556 }
557
558 #define xFixedToFloat(v) \
559         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
560
561 static void
562 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
563                                           float *x_ret, float *y_ret)
564 {
565         PictVector v;
566
567         if (t) {
568                 v.vector[0] = IntToxFixed(x);
569                 v.vector[1] = IntToxFixed(y);
570                 v.vector[2] = xFixed1;
571                 PictureTransformPoint(t, &v);
572                 *x_ret = xFixedToFloat(v.vector[0]);
573                 *y_ret = xFixedToFloat(v.vector[1]);
574         } else {
575                 *x_ret = (float)x;
576                 *y_ret = (float)y;
577         }
578 }
579
580 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
581         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2F_X(8), 4);        \
582         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
583         OUT_RINGf ((mx)); OUT_RINGf ((my));                          \
584         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2I(0), 1);          \
585         OUT_RING  (((dy)<<16)|(dx));                                           \
586 } while(0)
587 #define CV_OUT(sx,sy,dx,dy) do {                                               \
588         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2F_X(8), 2);        \
589         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
590         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2I(0), 1);          \
591         OUT_RING  (((dy)<<16)|(dx));                                           \
592 } while(0)
593
594 void
595 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
596                                   int maskX, int maskY,
597                                   int dstX , int dstY,
598                                   int width, int height)
599 {
600         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
601         NVPtr pNv = NVPTR(pScrn);
602         float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
603         float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
604         NV30EXA_STATE;
605
606 #if 0
607         ErrorF("Composite [%dx%d] (%d,%d)IN(%d,%d)OP(%d,%d)\n",width,height,srcX,srcY,maskX,maskY,dstX,dstY);
608 #endif
609         NV30EXATransformCoord(state->unit[0].transform, srcX, srcY,
610                               state->unit[0].width,
611                               state->unit[0].height, &sX0, &sY0);
612         NV30EXATransformCoord(state->unit[0].transform,
613                               srcX + width, srcY,
614                               state->unit[0].width,
615                               state->unit[0].height, &sX1, &sY1);
616         NV30EXATransformCoord(state->unit[0].transform,
617                               srcX + width, srcY + height,
618                               state->unit[0].width,
619                               state->unit[0].height, &sX2, &sY2);
620         NV30EXATransformCoord(state->unit[0].transform,
621                               srcX, srcY + height,
622                               state->unit[0].width,
623                               state->unit[0].height, &sX3, &sY3);
624
625         if (state->have_mask) {
626                 NV30EXATransformCoord(state->unit[1].transform, maskX, maskY,
627                                       state->unit[1].width,
628                                       state->unit[1].height, &mX0, &mY0);
629                 NV30EXATransformCoord(state->unit[1].transform,
630                                       maskX + width, maskY,
631                                       state->unit[1].width,
632                                       state->unit[1].height, &mX1, &mY1);
633                 NV30EXATransformCoord(state->unit[1].transform,
634                                       maskX + width, maskY + height,
635                                       state->unit[1].width,
636                                       state->unit[1].height, &mX2, &mY2);
637                 NV30EXATransformCoord(state->unit[1].transform,
638                                       maskX, maskY + height,
639                                       state->unit[1].width,
640                                       state->unit[1].height, &mX3, &mY3);
641
642                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
643                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width,          dstY);
644                 CV_OUTm(sX2 , sY2 , mX2, mY2, dstX + width, dstY + height);
645                 CV_OUTm(sX3 , sY3 , mX3, mY3, dstX        , dstY + height);
646         } else {
647                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
648                 CV_OUT(sX1 , sY1 , dstX + width,          dstY);
649                 CV_OUT(sX2 , sY2 , dstX + width, dstY + height);
650                 CV_OUT(sX3 , sY3 , dstX        , dstY + height);
651         }
652 }
653
654 void
655 NV30EXADoneComposite(PixmapPtr pdPix)
656 {
657         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
658         NVPtr pNv = NVPTR(pScrn);
659
660         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
661         OUT_RING  (0);
662
663         FIRE_RING();
664 }
665
666 Bool
667 NVAccelInitNV30TCL(ScrnInfoPtr pScrn)
668 {
669         NVPtr pNv = NVPTR(pScrn);
670         uint32_t class = 0, chipset;
671         int i;
672
673         NV30EXAHackupA8Shaders(pScrn);
674
675 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
676 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
677 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
678
679         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
680         if ((chipset & 0xf0) != NV_ARCH_30)
681                 return TRUE;
682         chipset &= 0xf;
683
684         if (NV30TCL_CHIPSET_3X_MASK & (1<<chipset))
685                 class = NV30TCL;
686         else if (NV35TCL_CHIPSET_3X_MASK & (1<<chipset))
687                 class = NV35TCL;
688         else if (NV34TCL_CHIPSET_3X_MASK & (1<<chipset))
689                 class = NV34TCL;
690         else {
691                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
692                            "NV30EXA: Unknown chipset NV3%1x\n", chipset);
693                 return FALSE;
694         }
695
696
697         if (!pNv->Nv3D) {
698                 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
699                         return FALSE;
700         }
701
702         BEGIN_RING(Nv3D, NV34TCL_DMA_TEXTURE0, 3);
703         OUT_RING  (pNv->chan->vram->handle);
704         OUT_RING  (pNv->chan->gart->handle);
705         OUT_RING  (pNv->chan->vram->handle);
706         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY7, 1);
707         OUT_RING  (pNv->chan->vram->handle);
708         BEGIN_RING(Nv3D, NV34TCL_DMA_COLOR0, 2);
709         OUT_RING  (pNv->chan->vram->handle);
710         OUT_RING  (pNv->chan->vram->handle);
711         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY8, 1);
712         OUT_RING  (pNv->chan->vram->handle);
713
714         for (i=1; i<8; i++) {
715                 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 2);
716                 OUT_RING  (0);
717                 OUT_RING  (0);
718         }
719
720         BEGIN_RING(Nv3D, 0x220, 1);
721         OUT_RING  (1);
722
723         BEGIN_RING(Nv3D, 0x03b0, 1);
724         OUT_RING  (0x00100000);
725         BEGIN_RING(Nv3D, 0x1454, 1);
726         OUT_RING  (0);
727         BEGIN_RING(Nv3D, 0x1d80, 1);
728         OUT_RING  (3);
729         BEGIN_RING(Nv3D, 0x1450, 1);
730         OUT_RING  (0x00030004);
731         
732         /* NEW */
733         BEGIN_RING(Nv3D, 0x1e98, 1);
734         OUT_RING  (0);
735         BEGIN_RING(Nv3D, 0x17e0, 3);
736         OUT_RING  (0);
737         OUT_RING  (0);
738         OUT_RING  (0x3f800000);
739         BEGIN_RING(Nv3D, 0x1f80, 16);
740         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
741         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
742         OUT_RING  (0x0000ffff);
743         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
744         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
745
746         BEGIN_RING(Nv3D, 0x120, 3);
747         OUT_RING  (0);
748         OUT_RING  (1);
749         OUT_RING  (2);
750
751         BEGIN_RING(NvImageBlit, 0x120, 3);
752         OUT_RING  (0);
753         OUT_RING  (1);
754         OUT_RING  (2);
755
756         BEGIN_RING(Nv3D, 0x1d88, 1);
757         OUT_RING  (0x00001200);
758
759         BEGIN_RING(Nv3D, NV34TCL_RC_ENABLE, 1);
760         OUT_RING  (0);
761
762         /* Attempt to setup a known state.. Probably missing a heap of
763          * stuff here..
764          */
765         BEGIN_RING(Nv3D, NV34TCL_STENCIL_FRONT_ENABLE, 1);
766         OUT_RING  (0);
767         BEGIN_RING(Nv3D, NV34TCL_STENCIL_BACK_ENABLE, 1);
768         OUT_RING  (0);
769         BEGIN_RING(Nv3D, NV34TCL_ALPHA_FUNC_ENABLE, 1);
770         OUT_RING  (0);
771         BEGIN_RING(Nv3D, NV34TCL_DEPTH_WRITE_ENABLE, 2);
772         OUT_RING  (0); /* wr disable */
773         OUT_RING  (0); /* test disable */
774         BEGIN_RING(Nv3D, NV34TCL_COLOR_MASK, 1);
775         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
776         BEGIN_RING(Nv3D, NV34TCL_CULL_FACE_ENABLE, 1);
777         OUT_RING  (0);
778         BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 5);
779         OUT_RING  (0);                          /* Blend enable */
780         OUT_RING  (0);                          /* Blend src */
781         OUT_RING  (0);                          /* Blend dst */
782         OUT_RING  (0x00000000);                 /* Blend colour */
783         OUT_RING  (0x8006);                     /* FUNC_ADD */
784         BEGIN_RING(Nv3D, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
785         OUT_RING  (0);
786         OUT_RING  (0x1503 /*GL_COPY*/);
787         BEGIN_RING(Nv3D, NV34TCL_DITHER_ENABLE, 1);
788         OUT_RING  (1);
789         BEGIN_RING(Nv3D, NV34TCL_SHADE_MODEL, 1);
790         OUT_RING  (0x1d01 /*GL_SMOOTH*/);
791         BEGIN_RING(Nv3D, NV34TCL_POLYGON_OFFSET_FACTOR,2);
792         OUT_RINGf (0.0);
793         OUT_RINGf (0.0);
794         BEGIN_RING(Nv3D, NV34TCL_POLYGON_MODE_FRONT, 2);
795         OUT_RING  (0x1b02 /*GL_FILL*/);
796         OUT_RING  (0x1b02 /*GL_FILL*/);
797         /* - Disable texture units
798          * - Set fragprog to MOVR result.color, fragment.color */
799         for (i=0;i<4;i++) {
800                 BEGIN_RING(Nv3D, NV34TCL_TX_ENABLE(i), 1);
801                 OUT_RING  (0);
802         }
803         /* Polygon stipple */
804         BEGIN_RING(Nv3D, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
805         for (i=0;i<0x20;i++)
806                 OUT_RING  (0xFFFFFFFF);
807
808         BEGIN_RING(Nv3D, NV34TCL_DEPTH_RANGE_NEAR, 2);
809         OUT_RINGf  (0.0);
810         OUT_RINGf  (1.0);
811
812         /* Ok.  If you start X with the nvidia driver, kill it, and then
813          * start X with nouveau you will get black rendering instead of
814          * what you'd expect.  This fixes the problem, and it seems that
815          * it's not needed between nouveau restarts - which suggests that
816          * the 3D context (wherever it's stored?) survives somehow.
817          */
818         //BEGIN_RING(Nv3D, 0x1d60,1);
819         //OUT_RING  (0x03008000);
820
821         int w=4096;
822         int h=4096;
823         int pitch=4096*4;
824         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 5);
825         OUT_RING  (w<<16);
826         OUT_RING  (h<<16);
827         OUT_RING  (0x148); /* format */
828         OUT_RING  (pitch << 16 | pitch);
829         OUT_RING  (0x0);
830         BEGIN_RING(Nv3D, 0x0a00, 2);
831         OUT_RING  ((w<<16) | 0);
832         OUT_RING  ((h<<16) | 0);
833         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
834         OUT_RING  ((w-1)<<16);
835         OUT_RING  ((h-1)<<16);
836         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
837         OUT_RING  (w<<16);
838         OUT_RING  (h<<16);
839         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 2);
840         OUT_RING  (w<<16);
841         OUT_RING  (h<<16);
842
843         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_SCALE0_X, 8);
844         OUT_RINGf (0.0);
845         OUT_RINGf (0.0);
846         OUT_RINGf (0.0);
847         OUT_RINGf (0.0);
848         OUT_RINGf (1.0);
849         OUT_RINGf (1.0);
850         OUT_RINGf (1.0);
851         OUT_RINGf (0.0);
852
853         BEGIN_RING(Nv3D, NV34TCL_MODELVIEW_MATRIX(0), 16);
854         OUT_RINGf (1.0);
855         OUT_RINGf (0.0);
856         OUT_RINGf (0.0);
857         OUT_RINGf (0.0);
858         OUT_RINGf (0.0);
859         OUT_RINGf (1.0);
860         OUT_RINGf (0.0);
861         OUT_RINGf (0.0);
862         OUT_RINGf (0.0);
863         OUT_RINGf (0.0);
864         OUT_RINGf (1.0);
865         OUT_RINGf (0.0);
866         OUT_RINGf (0.0);
867         OUT_RINGf (0.0);
868         OUT_RINGf (0.0);
869         OUT_RINGf (1.0);
870
871         BEGIN_RING(Nv3D, NV34TCL_PROJECTION_MATRIX(0), 16);
872         OUT_RINGf (1.0);
873         OUT_RINGf (0.0);
874         OUT_RINGf (0.0);
875         OUT_RINGf (0.0);
876         OUT_RINGf (0.0);
877         OUT_RINGf (1.0);
878         OUT_RINGf (0.0);
879         OUT_RINGf (0.0);
880         OUT_RINGf (0.0);
881         OUT_RINGf (0.0);
882         OUT_RINGf (1.0);
883         OUT_RINGf (0.0);
884         OUT_RINGf (0.0);
885         OUT_RINGf (0.0);
886         OUT_RINGf (0.0);
887         OUT_RINGf (1.0);
888
889         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
890         OUT_RING  (4096<<16);
891         OUT_RING  (4096<<16);
892
893         return TRUE;
894 }
895