nv30: more header name changes...
[nouveau] / src / nv30_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  * Copyright 2007 Stephane Marchesin
4  * Copyright 2007 Jeremy Kolb
5  * Copyright 2007 Patrice Mandin
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
22  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25
26 #include "nv_include.h"
27 #include "nv30_shaders.h"
28
29 typedef struct nv_pict_surface_format {
30         int      pict_fmt;
31         uint32_t card_fmt;
32 } nv_pict_surface_format_t;
33
34 typedef struct nv_pict_texture_format {
35         int      pict_fmt;
36         uint32_t card_fmt;
37         uint32_t card_swz;
38 } nv_pict_texture_format_t;
39
40 typedef struct nv_pict_op {
41         Bool     src_alpha;
42         Bool     dst_alpha;
43         uint32_t src_card_op;
44         uint32_t dst_card_op;
45 } nv_pict_op_t;
46
47 typedef struct nv30_exa_state {
48         Bool have_mask;
49
50         struct {
51                 PictTransformPtr transform;
52                 float width;
53                 float height;
54         } unit[2];
55 } nv30_exa_state_t;
56 static nv30_exa_state_t exa_state;
57 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
58
59 static nv_pict_surface_format_t
60 NV30SurfaceFormat[] = {
61         { PICT_a8r8g8b8 , 0x148 },
62         { PICT_a8b8g8r8 , 0x150 },
63         { PICT_x8r8g8b8 , 0x145 },
64         { PICT_x8b8g8r8 , 0x14f },
65         { PICT_r5g6b5   , 0x143 },
66         { PICT_a8       , 0x149 },
67 };
68
69 static nv_pict_surface_format_t *
70 NV30_GetPictSurfaceFormat(int format)
71 {
72         int i;
73
74         for(i=0;i<sizeof(NV30SurfaceFormat)/sizeof(NV30SurfaceFormat[0]);i++)
75         {
76                 if (NV30SurfaceFormat[i].pict_fmt == format)
77                         return &NV30SurfaceFormat[i];
78         }
79
80         return NULL;
81 }
82
83 enum {
84         NV30EXA_FPID_PASS_COL0 = 0,
85         NV30EXA_FPID_PASS_TEX0 = 1,
86         NV30EXA_FPID_COMPOSITE_MASK = 2,
87         NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
88         NV30EXA_FPID_COMPOSITE_MASK_CA = 4,
89         NV30EXA_FPID_MAX = 5
90 } NV30EXA_FPID;
91
92 static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = {
93         &nv30_fp_pass_col0,
94         &nv30_fp_pass_tex0,
95         &nv30_fp_composite_mask,
96         &nv30_fp_composite_mask_sa_ca,
97         &nv30_fp_composite_mask_ca
98 };
99
100 static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX];
101
102 static void
103 NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn)
104 {
105         int s;
106
107         for (s = 0; s < NV30EXA_FPID_MAX; s++) {
108                 nv_shader_t *def, *a8;
109
110                 def = nv40_fp_map[s];
111                 a8 = xcalloc(1, sizeof(nv_shader_t));
112                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
113                 a8->size = def->size + 4;
114                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
115                 nv40_fp_map_a8[s] = a8;
116
117                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
118                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
119                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
120                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
121                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
122         }
123 }
124
125 /* should be in nouveau_reg.h at some point.. */
126 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
127 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
128 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
129 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
130 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
131 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
132 #define NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
133 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
134 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_X           3
135 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
136 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
137 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_W           0
138 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
139 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
140 #define NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
141
142 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
143   {                                                                           \
144   PICT_##r,                                                                   \
145   (tf),                                                                       \
146   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
147   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
148   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
149   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
150   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
151   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
152   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
153   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
154   }
155
156 static nv_pict_texture_format_t
157 NV30TextureFormat[] = {
158         _(a8r8g8b8, 0x12,   S1,   S1,   S1,   S1, X, Y, Z, W),
159         _(a8b8g8r8, 0x12,   S1,   S1,   S1,   S1, Z, Y, X, W),
160         _(x8r8g8b8, 0x12,   S1,   S1,   S1,  ONE, X, Y, Z, W),
161         _(x8b8g8r8, 0x12,   S1,   S1,   S1,  ONE, Z, Y, X, W),
162         _(a1r5g5b5, 0x10,   S1,   S1,   S1,   S1, X, Y, Z, W),
163         _(x1r5g5b5, 0x10,   S1,   S1,   S1,  ONE, X, Y, Z, W),
164         _(x4r4g4b4, 0x1d,   S1,   S1,   S1,  ONE, X, Y, Z, W),
165         _(a4r4g4b4, 0x1d,   S1,   S1,   S1,   S1, X, Y, Z, W),
166         _(      a8, 0x1b, ZERO, ZERO, ZERO,   S1, X, X, X, X),
167 };
168
169
170 static nv_pict_texture_format_t *
171 NV30_GetPictTextureFormat(int format)
172 {
173         int i;
174
175         for(i=0;i<sizeof(NV30TextureFormat)/sizeof(NV30TextureFormat[0]);i++)
176         {
177                 if (NV30TextureFormat[i].pict_fmt == format)
178                         return &NV30TextureFormat[i];
179         }
180
181         return NULL;
182 }
183
184 #define NV34TCL_BF_ZERO                                     0x0000
185 #define NV34TCL_BF_ONE                                      0x0001
186 #define NV34TCL_BF_SRC_COLOR                                0x0300
187 #define NV34TCL_BF_ONE_MINUS_SRC_COLOR                      0x0301
188 #define NV34TCL_BF_SRC_ALPHA                                0x0302
189 #define NV34TCL_BF_ONE_MINUS_SRC_ALPHA                      0x0303
190 #define NV34TCL_BF_DST_ALPHA                                0x0304
191 #define NV34TCL_BF_ONE_MINUS_DST_ALPHA                      0x0305
192 #define NV34TCL_BF_DST_COLOR                                0x0306
193 #define NV34TCL_BF_ONE_MINUS_DST_COLOR                      0x0307
194 #define NV34TCL_BF_ALPHA_SATURATE                           0x0308
195 #define BF(bf) NV34TCL_BF_##bf
196
197 static nv_pict_op_t 
198 NV30PictOp[] = {
199 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
200 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
201 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
202 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
203 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
204 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
205 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
206 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
207 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
208 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
209 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
210 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
211 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
212 };
213
214 static nv_pict_op_t *
215 NV30_GetPictOpRec(int op)
216 {
217         if (op >= PictOpSaturate)
218                 return NULL;
219 #if 0
220         switch(op)
221         {
222                 case 0:ErrorF("Op Clear\n");break;
223                 case 1:ErrorF("Op Src\n");break;
224                 case 2:ErrorF("Op Dst\n");break;
225                 case 3:ErrorF("Op Over\n");break;
226                 case 4:ErrorF("Op OverReverse\n");break;
227                 case 5:ErrorF("Op In\n");break;
228                 case 6:ErrorF("Op InReverse\n");break;
229                 case 7:ErrorF("Op Out\n");break;
230                 case 8:ErrorF("Op OutReverse\n");break;
231                 case 9:ErrorF("Op Atop\n");break;
232                 case 10:ErrorF("Op AtopReverse\n");break;
233                 case 11:ErrorF("Op Xor\n");break;
234                 case 12:ErrorF("Op Add\n");break;
235         }
236 #endif
237         return &NV30PictOp[op];
238 }
239
240 static void
241 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
242                 PictFormatShort dest_format, Bool component_alpha)
243 {
244         NVPtr pNv = NVPTR(pScrn);
245         uint32_t sblend, dblend;
246
247         sblend = blend->src_card_op;
248         dblend = blend->dst_card_op;
249
250         if (blend->dst_alpha) {
251                 if (!PICT_FORMAT_A(dest_format)) {
252                         if (sblend == BF(DST_ALPHA)) {
253                                 sblend = BF(ONE);
254                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
255                                 sblend = BF(ZERO);
256                         }
257                 } else if (dest_format == PICT_a8) {
258                         if (sblend == BF(DST_ALPHA)) {
259                                 sblend = BF(DST_COLOR);
260                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
261                                 sblend = BF(ONE_MINUS_DST_COLOR);
262                         }
263                 }
264         }
265
266         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
267                 if (dblend == BF(SRC_ALPHA)) {
268                         dblend = BF(SRC_COLOR);
269                 } else if (dblend == BF(ONE_MINUS_SRC_ALPHA)) {
270                         dblend = BF(ONE_MINUS_SRC_COLOR);
271                 }
272         }
273
274         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
275                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 1);
276                 OUT_RING  (0);
277         } else {
278                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 3);
279                 OUT_RING  (1);
280                 OUT_RING  ((sblend << 16) | sblend);
281                 OUT_RING  ((dblend << 16) | dblend);
282         }
283 }
284
285 static Bool
286 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
287 {
288         NVPtr pNv = NVPTR(pScrn);
289         nv_pict_texture_format_t *fmt;
290         uint32_t card_filter, card_repeat;
291         NV30EXA_STATE;
292
293         fmt = NV30_GetPictTextureFormat(pPict->format);
294         if (!fmt)
295                 return FALSE;
296
297         card_repeat = 3; /* repeatNone */
298
299         if (pPict->filter == PictFilterBilinear)
300                 card_filter = 2;
301         else
302                 card_filter = 1;
303
304         BEGIN_RING(Nv3D, NV34TCL_TX_OFFSET(unit), 8);
305         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
306
307         OUT_RING  (NV34TCL_TX_FORMAT_DIMS_2D |
308                         (fmt->card_fmt << NV34TCL_TX_FORMAT_FORMAT_SHIFT) |
309                         (1 << NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT) |
310                         (log2i(pPix->drawable.width)  << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT) |
311                         (log2i(pPix->drawable.height) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT) |
312                         8 |
313                         NV34TCL_TX_FORMAT_DMA0);
314
315         OUT_RING  ((card_repeat << NV34TCL_TX_WRAP_S_SHIFT) |
316                         (card_repeat << NV34TCL_TX_WRAP_T_SHIFT) |
317                         (card_repeat << NV34TCL_TX_WRAP_R_SHIFT));
318         OUT_RING  (NV34TCL_TX_ENABLE_ENABLE);
319         OUT_RING  ((((uint32_t)exaGetPixmapPitch(pPix)) << NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT ) | 
320                         fmt->card_swz);
321
322         OUT_RING  ((card_filter << NV34TCL_TX_FILTER_MINIFY_SHIFT) /* min */ |
323                         (card_filter << NV34TCL_TX_FILTER_MAGNIFY_SHIFT) /* mag */ |
324                         0x2000 /* engine lock */);
325         OUT_RING  ((pPix->drawable.width << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pPix->drawable.height);
326         OUT_RING  (0); /* border ARGB */
327
328         state->unit[unit].width         = (float)pPix->drawable.width;
329         state->unit[unit].height        = (float)pPix->drawable.height;
330         state->unit[unit].transform     = pPict->transform;
331
332         return TRUE;
333 }
334
335 static Bool
336 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict)
337 {
338         NVPtr pNv = NVPTR(pScrn);
339         nv_pict_surface_format_t *fmt;
340
341         fmt = NV30_GetPictSurfaceFormat(pPict->format);
342         if (!fmt) {
343                 ErrorF("AIII no format\n");
344                 return FALSE;
345         }
346
347         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
348
349         BEGIN_RING(Nv3D, NV34TCL_RT_FORMAT, 3);
350         OUT_RING  (fmt->card_fmt); /* format */
351         OUT_RING  (pitch << 16 | pitch);
352         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
353
354         return TRUE;
355 }
356
357 static Bool
358 NV30EXACheckCompositeTexture(PicturePtr pPict)
359 {
360         nv_pict_texture_format_t *fmt;
361         int w = pPict->pDrawable->width;
362         int h = pPict->pDrawable->height;
363
364         if ((w > 4096) || (h>4096))
365                 NOUVEAU_FALLBACK("picture too large, %dx%d\n", w, h);
366
367         fmt = NV30_GetPictTextureFormat(pPict->format);
368         if (!fmt)
369                 NOUVEAU_FALLBACK("picture format 0x%08x not supported\n",
370                                 pPict->format);
371
372         if (pPict->filter != PictFilterNearest &&
373                         pPict->filter != PictFilterBilinear)
374                 NOUVEAU_FALLBACK("filter 0x%x not supported\n", pPict->filter);
375
376         if (!(w==1 && h==1) && pPict->repeat && pPict->repeatType != RepeatNone)
377                 NOUVEAU_FALLBACK("repeat 0x%x not supported (surface %dx%d)\n",
378                                  pPict->repeatType,w,h);
379
380         return TRUE;
381 }
382
383 Bool
384 NV30EXACheckComposite(int op, PicturePtr psPict,
385                 PicturePtr pmPict,
386                 PicturePtr pdPict)
387 {
388         nv_pict_surface_format_t *fmt;
389         nv_pict_op_t *opr;
390
391         opr = NV30_GetPictOpRec(op);
392         if (!opr)
393                 NOUVEAU_FALLBACK("unsupported blend op 0x%x\n", op);
394
395         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
396         if (!fmt)
397                 NOUVEAU_FALLBACK("dst picture format 0x%08x not supported\n",
398                                 pdPict->format);
399
400         if (!NV30EXACheckCompositeTexture(psPict))
401                 NOUVEAU_FALLBACK("src picture\n");
402         if (pmPict) {
403                 if (pmPict->componentAlpha &&
404                                 PICT_FORMAT_RGB(pmPict->format) &&
405                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
406                         NOUVEAU_FALLBACK("mask CA + SA\n");
407                 if (!NV30EXACheckCompositeTexture(pmPict))
408                         NOUVEAU_FALLBACK("mask picture\n");
409         }
410
411         return TRUE;
412 }
413
414 Bool
415 NV30EXAPrepareComposite(int op, PicturePtr psPict,
416                 PicturePtr pmPict,
417                 PicturePtr pdPict,
418                 PixmapPtr  psPix,
419                 PixmapPtr  pmPix,
420                 PixmapPtr  pdPix)
421 {
422         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
423         NVPtr pNv = NVPTR(pScrn);
424         nv_pict_op_t *blend;
425         int fpid = NV30EXA_FPID_PASS_COL0;
426         NV30EXA_STATE;
427
428         blend = NV30_GetPictOpRec(op);
429
430         NV30_SetupBlend(pScrn, blend, pdPict->format,
431                         (pmPict && pmPict->componentAlpha &&
432                          PICT_FORMAT_RGB(pmPict->format)));
433
434         NV30_SetupSurface(pScrn, pdPix, pdPict);
435         NV30EXATexture(pScrn, psPix, psPict, 0);
436
437 #if 0
438 #define printformat(f) ErrorF("(%xh %s %dbpp A%dR%dG%dB%d)",f,(f>>16)&0xf==2?"ARGB":"ABGR",(f>>24),(f&0xf000)>>12,(f&0xf00)>>8,(f&0xf0)>>4,f&0xf)
439         ErrorF("Preparecomposite src(%dx%d)",psPict->pDrawable->width,psPict->pDrawable->height);
440         printformat((psPict->format));
441         ErrorF(" dst(%dx%d)",pdPict->pDrawable->width,pdPict->pDrawable->height);
442         printformat((pdPict->format));
443         if (pmPict)
444         {
445                 ErrorF(" mask(%dx%d)",pmPict->pDrawable->width,pmPict->pDrawable->height);
446                 printformat((pmPict->format));
447         }
448         ErrorF("\n");
449 #endif
450
451         if (pmPict) {
452                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
453
454                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
455                         if (blend->src_alpha)
456                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA;
457                         else
458                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_CA;
459                 } else {
460                         fpid = NV30EXA_FPID_COMPOSITE_MASK;
461                 }
462
463                 state->have_mask = TRUE;
464         } else {
465                 fpid = NV30EXA_FPID_PASS_TEX0;
466
467                 state->have_mask = FALSE;
468         }
469
470         if (pdPict->format == PICT_a8)
471                 NV30_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
472         else
473                 NV30_LoadFragProg(pScrn, nv40_fp_map[fpid]);
474
475         BEGIN_RING(Nv3D, 0x23c, 1);
476         OUT_RING  (pmPict?3:1);
477
478         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
479         OUT_RING  (NV34TCL_VERTEX_BEGIN_END_TRIANGLES);
480
481         return TRUE;
482 }
483
484 #define xFixedToFloat(v) \
485         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
486
487 static void
488 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
489                                           float *x_ret, float *y_ret)
490 {
491         PictVector v;
492
493         if (t) {
494                 v.vector[0] = IntToxFixed(x);
495                 v.vector[1] = IntToxFixed(y);
496                 v.vector[2] = xFixed1;
497                 PictureTransformPoint(t, &v);
498                 *x_ret = xFixedToFloat(v.vector[0]);
499                 *y_ret = xFixedToFloat(v.vector[1]);
500         } else {
501                 *x_ret = (float)x;
502                 *y_ret = (float)y;
503         }
504 }
505
506 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
507         BEGIN_RING(Nv3D, NV34TCL_VTX_ATTR_2F_X(8), 4);                         \
508         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
509         OUT_RINGf ((mx)); OUT_RINGf ((my));                                    \
510         BEGIN_RING(Nv3D, NV34TCL_VTX_ATTR_2I(0), 1);                           \
511         OUT_RING  (((dy)<<16)|(dx));                                           \
512 } while(0)
513 #define CV_OUT(sx,sy,dx,dy) do {                                               \
514         BEGIN_RING(Nv3D, NV34TCL_VTX_ATTR_2F_X(8), 2);                         \
515         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
516         BEGIN_RING(Nv3D, NV34TCL_VTX_ATTR_2I(0), 1);                           \
517         OUT_RING  (((dy)<<16)|(dx));                                           \
518 } while(0)
519
520 void
521 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
522                                   int maskX, int maskY,
523                                   int dstX , int dstY,
524                                   int width, int height)
525 {
526         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
527         NVPtr pNv = NVPTR(pScrn);
528         float sX0, sX1, sX2, sY0, sY1, sY2;
529         float mX0, mX1, mX2, mY0, mY1, mY2;
530         NV30EXA_STATE;
531
532         /* We're drawing a triangle, we need to scissor it to a quad. */
533         /* The scissors are here for a good reason, we don't get the full image, but just a part. */
534         /* Handling the cliprects is done for us already. */
535         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
536         OUT_RING  ((width << 16) | dstX);
537         OUT_RING  ((height << 16) | dstY);
538
539 #if 0
540         ErrorF("Composite [%dx%d] (%d,%d)IN(%d,%d)OP(%d,%d)\n",width,height,srcX,srcY,maskX,maskY,dstX,dstY);
541 #endif
542         NV30EXATransformCoord(state->unit[0].transform, 
543                                 srcX, srcY - height,
544                                 state->unit[0].width,
545                                 state->unit[0].height, &sX0, &sY0);
546         NV30EXATransformCoord(state->unit[0].transform,
547                                 srcX, srcY + height,
548                                 state->unit[0].width,
549                                 state->unit[0].height, &sX1, &sY1);
550         NV30EXATransformCoord(state->unit[0].transform,
551                                 srcX + 2*width, srcY + height,
552                                 state->unit[0].width,
553                                 state->unit[0].height, &sX2, &sY2);
554
555         if (state->have_mask) {
556                 NV30EXATransformCoord(state->unit[1].transform, 
557                                         maskX, maskY - height,
558                                         state->unit[1].width,
559                                         state->unit[1].height, &mX0, &mY0);
560                 NV30EXATransformCoord(state->unit[1].transform,
561                                         maskX, maskY + height,
562                                         state->unit[1].width,
563                                         state->unit[1].height, &mX1, &mY1);
564                 NV30EXATransformCoord(state->unit[1].transform,
565                                         maskX + 2*width, maskY + height,
566                                         state->unit[1].width,
567                                         state->unit[1].height, &mX2, &mY2);
568
569                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX                      ,       dstY - height);
570                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX                      ,       dstY + height);
571                 CV_OUTm(sX2 , sY2 , mX2, mY2, dstX + 2*width    ,       dstY + height);
572         } else {
573                 CV_OUT(sX0 , sY0 , dstX                 ,       dstY - height);
574                 CV_OUT(sX1 , sY1 , dstX                 ,       dstY + height);
575                 CV_OUT(sX2 , sY2 , dstX + 2*width       ,       dstY + height);
576         }
577 }
578
579 void
580 NV30EXADoneComposite(PixmapPtr pdPix)
581 {
582         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
583         NVPtr pNv = NVPTR(pScrn);
584
585         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
586         OUT_RING  (0);
587
588         FIRE_RING();
589 }
590
591 Bool
592 NVAccelInitNV30TCL(ScrnInfoPtr pScrn)
593 {
594         NVPtr pNv = NVPTR(pScrn);
595         uint32_t class = 0, chipset;
596         int i;
597
598         NV30EXAHackupA8Shaders(pScrn);
599
600 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
601 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
602 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
603
604         chipset = (nvReadMC(pNv, NV_PMC_BOOT_0) >> 20) & 0xff;
605         if ((chipset & 0xf0) != NV_ARCH_30)
606                 return TRUE;
607         chipset &= 0xf;
608
609         if (NV30TCL_CHIPSET_3X_MASK & (1<<chipset))
610                 class = NV30TCL;
611         else if (NV35TCL_CHIPSET_3X_MASK & (1<<chipset))
612                 class = NV35TCL;
613         else if (NV34TCL_CHIPSET_3X_MASK & (1<<chipset))
614                 class = NV34TCL;
615         else {
616                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
617                            "NV30EXA: Unknown chipset NV3%1x\n", chipset);
618                 return FALSE;
619         }
620
621
622         if (!pNv->Nv3D) {
623                 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
624                         return FALSE;
625         }
626
627         BEGIN_RING(Nv3D, NV34TCL_DMA_TEXTURE0, 3);
628         OUT_RING  (pNv->chan->vram->handle);
629         OUT_RING  (pNv->chan->gart->handle);
630         OUT_RING  (pNv->chan->vram->handle);
631         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY7, 1);
632         OUT_RING  (pNv->chan->vram->handle);
633         BEGIN_RING(Nv3D, NV34TCL_DMA_COLOR0, 2);
634         OUT_RING  (pNv->chan->vram->handle);
635         OUT_RING  (pNv->chan->vram->handle);
636         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY8, 1);
637         OUT_RING  (pNv->chan->vram->handle);
638
639         for (i=1; i<8; i++) {
640                 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 2);
641                 OUT_RING  (0);
642                 OUT_RING  (0);
643         }
644
645         BEGIN_RING(Nv3D, 0x220, 1);
646         OUT_RING  (1);
647
648         BEGIN_RING(Nv3D, 0x03b0, 1);
649         OUT_RING  (0x00100000);
650         BEGIN_RING(Nv3D, 0x1454, 1);
651         OUT_RING  (0);
652         BEGIN_RING(Nv3D, 0x1d80, 1);
653         OUT_RING  (3);
654         BEGIN_RING(Nv3D, 0x1450, 1);
655         OUT_RING  (0x00030004);
656
657         /* NEW */
658         BEGIN_RING(Nv3D, 0x1e98, 1);
659         OUT_RING  (0);
660         BEGIN_RING(Nv3D, 0x17e0, 3);
661         OUT_RING  (0);
662         OUT_RING  (0);
663         OUT_RING  (0x3f800000);
664         BEGIN_RING(Nv3D, 0x1f80, 16);
665         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
666         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
667         OUT_RING  (0x0000ffff);
668         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
669         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
670
671         BEGIN_RING(Nv3D, 0x120, 3);
672         OUT_RING  (0);
673         OUT_RING  (1);
674         OUT_RING  (2);
675
676         BEGIN_RING(NvImageBlit, 0x120, 3);
677         OUT_RING  (0);
678         OUT_RING  (1);
679         OUT_RING  (2);
680
681         BEGIN_RING(Nv3D, 0x1d88, 1);
682         OUT_RING  (0x00001200);
683
684         BEGIN_RING(Nv3D, NV34TCL_RC_ENABLE, 1);
685         OUT_RING  (0);
686
687         /* Attempt to setup a known state.. Probably missing a heap of
688          * stuff here..
689          */
690         BEGIN_RING(Nv3D, NV34TCL_STENCIL_FRONT_ENABLE, 1);
691         OUT_RING  (0);
692         BEGIN_RING(Nv3D, NV34TCL_STENCIL_BACK_ENABLE, 1);
693         OUT_RING  (0);
694         BEGIN_RING(Nv3D, NV34TCL_ALPHA_FUNC_ENABLE, 1);
695         OUT_RING  (0);
696         BEGIN_RING(Nv3D, NV34TCL_DEPTH_WRITE_ENABLE, 2);
697         OUT_RING  (0); /* wr disable */
698         OUT_RING  (0); /* test disable */
699         BEGIN_RING(Nv3D, NV34TCL_COLOR_MASK, 1);
700         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
701         BEGIN_RING(Nv3D, NV34TCL_CULL_FACE_ENABLE, 1);
702         OUT_RING  (0);
703         BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 5);
704         OUT_RING  (0);                          /* Blend enable */
705         OUT_RING  (0);                          /* Blend src */
706         OUT_RING  (0);                          /* Blend dst */
707         OUT_RING  (0x00000000);                 /* Blend colour */
708         OUT_RING  (0x8006);                     /* FUNC_ADD */
709         BEGIN_RING(Nv3D, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
710         OUT_RING  (0);
711         OUT_RING  (0x1503 /*GL_COPY*/);
712         BEGIN_RING(Nv3D, NV34TCL_DITHER_ENABLE, 1);
713         OUT_RING  (1);
714         BEGIN_RING(Nv3D, NV34TCL_SHADE_MODEL, 1);
715         OUT_RING  (0x1d01 /*GL_SMOOTH*/);
716         BEGIN_RING(Nv3D, NV34TCL_POLYGON_OFFSET_FACTOR,2);
717         OUT_RINGf (0.0);
718         OUT_RINGf (0.0);
719         BEGIN_RING(Nv3D, NV34TCL_POLYGON_MODE_FRONT, 2);
720         OUT_RING  (0x1b02 /*GL_FILL*/);
721         OUT_RING  (0x1b02 /*GL_FILL*/);
722         /* - Disable texture units
723          * - Set fragprog to MOVR result.color, fragment.color */
724         for (i=0;i<4;i++) {
725                 BEGIN_RING(Nv3D, NV34TCL_TX_ENABLE(i), 1);
726                 OUT_RING  (0);
727         }
728         /* Polygon stipple */
729         BEGIN_RING(Nv3D, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
730         for (i=0;i<0x20;i++)
731                 OUT_RING  (0xFFFFFFFF);
732
733         BEGIN_RING(Nv3D, NV34TCL_DEPTH_RANGE_NEAR, 2);
734         OUT_RINGf  (0.0);
735         OUT_RINGf  (1.0);
736
737         /* Ok.  If you start X with the nvidia driver, kill it, and then
738          * start X with nouveau you will get black rendering instead of
739          * what you'd expect.  This fixes the problem, and it seems that
740          * it's not needed between nouveau restarts - which suggests that
741          * the 3D context (wherever it's stored?) survives somehow.
742          */
743         //BEGIN_RING(Nv3D, 0x1d60,1);
744         //OUT_RING  (0x03008000);
745
746         int w=4096;
747         int h=4096;
748         int pitch=4096*4;
749         BEGIN_RING(Nv3D, NV34TCL_RT_HORIZ, 5);
750         OUT_RING  (w<<16);
751         OUT_RING  (h<<16);
752         OUT_RING  (0x148); /* format */
753         OUT_RING  (pitch << 16 | pitch);
754         OUT_RING  (0x0);
755         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
756         OUT_RING  (0);
757         BEGIN_RING(Nv3D, 0x0a00, 2);
758         OUT_RING  ((w<<16) | 0);
759         OUT_RING  ((h<<16) | 0);
760         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
761         OUT_RING  ((w-1)<<16);
762         OUT_RING  ((h-1)<<16);
763         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
764         OUT_RING  (w<<16);
765         OUT_RING  (h<<16);
766         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 2);
767         OUT_RING  (w<<16);
768         OUT_RING  (h<<16);
769
770         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_TRANSLATE_X, 8);
771         OUT_RINGf (0.0);
772         OUT_RINGf (0.0);
773         OUT_RINGf (0.0);
774         OUT_RINGf (0.0);
775         OUT_RINGf (1.0);
776         OUT_RINGf (1.0);
777         OUT_RINGf (1.0);
778         OUT_RINGf (0.0);
779
780         BEGIN_RING(Nv3D, NV34TCL_MODELVIEW_MATRIX(0), 16);
781         OUT_RINGf (1.0);
782         OUT_RINGf (0.0);
783         OUT_RINGf (0.0);
784         OUT_RINGf (0.0);
785         OUT_RINGf (0.0);
786         OUT_RINGf (1.0);
787         OUT_RINGf (0.0);
788         OUT_RINGf (0.0);
789         OUT_RINGf (0.0);
790         OUT_RINGf (0.0);
791         OUT_RINGf (1.0);
792         OUT_RINGf (0.0);
793         OUT_RINGf (0.0);
794         OUT_RINGf (0.0);
795         OUT_RINGf (0.0);
796         OUT_RINGf (1.0);
797
798         BEGIN_RING(Nv3D, NV34TCL_PROJECTION_MATRIX(0), 16);
799         OUT_RINGf (1.0);
800         OUT_RINGf (0.0);
801         OUT_RINGf (0.0);
802         OUT_RINGf (0.0);
803         OUT_RINGf (0.0);
804         OUT_RINGf (1.0);
805         OUT_RINGf (0.0);
806         OUT_RINGf (0.0);
807         OUT_RINGf (0.0);
808         OUT_RINGf (0.0);
809         OUT_RINGf (1.0);
810         OUT_RINGf (0.0);
811         OUT_RINGf (0.0);
812         OUT_RINGf (0.0);
813         OUT_RINGf (0.0);
814         OUT_RINGf (1.0);
815
816         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
817         OUT_RING  (4096<<16);
818         OUT_RING  (4096<<16);
819
820         return TRUE;
821 }
822