Fix my nv10 cursor.
[nouveau] / src / nv30_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  * Copyright 2007 Stephane Marchesin
4  * Copyright 2007 Jeremy Kolb
5  * Copyright 2007 Patrice Mandin
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
22  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25
26 #include "nv_include.h"
27 #include "nv30_shaders.h"
28
29 typedef struct nv_pict_surface_format {
30         int      pict_fmt;
31         uint32_t card_fmt;
32 } nv_pict_surface_format_t;
33
34 typedef struct nv_pict_texture_format {
35         int      pict_fmt;
36         uint32_t card_fmt;
37         uint32_t card_swz;
38 } nv_pict_texture_format_t;
39
40 typedef struct nv_pict_op {
41         Bool     src_alpha;
42         Bool     dst_alpha;
43         uint32_t src_card_op;
44         uint32_t dst_card_op;
45 } nv_pict_op_t;
46
47 typedef struct nv30_exa_state {
48         Bool have_mask;
49
50         struct {
51                 PictTransformPtr transform;
52                 float width;
53                 float height;
54         } unit[2];
55 } nv30_exa_state_t;
56 static nv30_exa_state_t exa_state;
57 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
58
59 static nv_pict_surface_format_t
60 NV30SurfaceFormat[] = {
61         { PICT_a8r8g8b8 , 0x148 },
62         { PICT_a8b8g8r8 , 0x150 },
63         { PICT_x8r8g8b8 , 0x145 },
64         { PICT_x8b8g8r8 , 0x14f },
65         { PICT_r5g6b5   , 0x143 },
66         { PICT_a8       , 0x149 },
67 };
68
69 static nv_pict_surface_format_t *
70 NV30_GetPictSurfaceFormat(int format)
71 {
72         int i;
73
74         for(i=0;i<sizeof(NV30SurfaceFormat)/sizeof(NV30SurfaceFormat[0]);i++)
75         {
76                 if (NV30SurfaceFormat[i].pict_fmt == format)
77                         return &NV30SurfaceFormat[i];
78         }
79
80         return NULL;
81 }
82
83 enum {
84         NV30EXA_FPID_PASS_COL0 = 0,
85         NV30EXA_FPID_PASS_TEX0 = 1,
86         NV30EXA_FPID_COMPOSITE_MASK = 2,
87         NV30EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
88         NV30EXA_FPID_COMPOSITE_MASK_CA = 4,
89         NV30EXA_FPID_MAX = 5
90 } NV30EXA_FPID;
91
92 static nv_shader_t *nv40_fp_map[NV30EXA_FPID_MAX] = {
93         &nv30_fp_pass_col0,
94         &nv30_fp_pass_tex0,
95         &nv30_fp_composite_mask,
96         &nv30_fp_composite_mask_sa_ca,
97         &nv30_fp_composite_mask_ca
98 };
99
100 static nv_shader_t *nv40_fp_map_a8[NV30EXA_FPID_MAX];
101
102 static void
103 NV30EXAHackupA8Shaders(ScrnInfoPtr pScrn)
104 {
105         int s;
106
107         for (s = 0; s < NV30EXA_FPID_MAX; s++) {
108                 nv_shader_t *def, *a8;
109
110                 def = nv40_fp_map[s];
111                 a8 = xcalloc(1, sizeof(nv_shader_t));
112                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
113                 a8->size = def->size + 4;
114                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
115                 nv40_fp_map_a8[s] = a8;
116
117                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
118                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
119                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
120                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
121                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
122         }
123 }
124
125 /* should be in nouveau_reg.h at some point.. */
126 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
127 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
128 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
129 #define NV34TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
130 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
131 #define NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
132 #define NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
133 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
134 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_X           3
135 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
136 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
137 #define NV34TCL_TX_SWIZZLE_UNIT_S1_X_W           0
138 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
139 #define NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
140 #define NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
141
142 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
143   {                                                                           \
144   PICT_##r,                                                                   \
145   (tf),                                                                       \
146   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV34TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
147   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV34TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
148   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV34TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
149   (NV34TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV34TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
150   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV34TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
151   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV34TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
152   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV34TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
153   (NV34TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV34TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
154   }
155
156 static nv_pict_texture_format_t
157 NV30TextureFormat[] = {
158         _(a8r8g8b8, 0x12,   S1,   S1,   S1,   S1, X, Y, Z, W),
159         _(a8b8g8r8, 0x12,   S1,   S1,   S1,   S1, Z, Y, X, W),
160         _(x8r8g8b8, 0x12,   S1,   S1,   S1,  ONE, X, Y, Z, W),
161         _(x8b8g8r8, 0x12,   S1,   S1,   S1,  ONE, Z, Y, X, W),
162         _(a1r5g5b5, 0x10,   S1,   S1,   S1,   S1, X, Y, Z, W),
163         _(x1r5g5b5, 0x10,   S1,   S1,   S1,  ONE, X, Y, Z, W),
164         _(x4r4g4b4, 0x1d,   S1,   S1,   S1,  ONE, X, Y, Z, W),
165         _(a4r4g4b4, 0x1d,   S1,   S1,   S1,   S1, X, Y, Z, W),
166         _(      a8, 0x1b, ZERO, ZERO, ZERO,   S1, X, X, X, X),
167 };
168
169
170 static nv_pict_texture_format_t *
171 NV30_GetPictTextureFormat(int format)
172 {
173         int i;
174
175         for(i=0;i<sizeof(NV30TextureFormat)/sizeof(NV30TextureFormat[0]);i++)
176         {
177                 if (NV30TextureFormat[i].pict_fmt == format)
178                         return &NV30TextureFormat[i];
179         }
180
181         return NULL;
182 }
183
184 #define NV34TCL_BF_ZERO                                     0x0000
185 #define NV34TCL_BF_ONE                                      0x0001
186 #define NV34TCL_BF_SRC_COLOR                                0x0300
187 #define NV34TCL_BF_ONE_MINUS_SRC_COLOR                      0x0301
188 #define NV34TCL_BF_SRC_ALPHA                                0x0302
189 #define NV34TCL_BF_ONE_MINUS_SRC_ALPHA                      0x0303
190 #define NV34TCL_BF_DST_ALPHA                                0x0304
191 #define NV34TCL_BF_ONE_MINUS_DST_ALPHA                      0x0305
192 #define NV34TCL_BF_DST_COLOR                                0x0306
193 #define NV34TCL_BF_ONE_MINUS_DST_COLOR                      0x0307
194 #define NV34TCL_BF_ALPHA_SATURATE                           0x0308
195 #define BF(bf) NV34TCL_BF_##bf
196
197 static nv_pict_op_t 
198 NV30PictOp[] = {
199 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
200 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
201 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
202 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
203 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
204 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
205 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
206 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
207 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
208 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
209 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
210 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
211 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
212 };
213
214 static nv_pict_op_t *
215 NV30_GetPictOpRec(int op)
216 {
217         if (op >= PictOpSaturate)
218                 return NULL;
219 #if 0
220         switch(op)
221         {
222                 case 0:ErrorF("Op Clear\n");break;
223                 case 1:ErrorF("Op Src\n");break;
224                 case 2:ErrorF("Op Dst\n");break;
225                 case 3:ErrorF("Op Over\n");break;
226                 case 4:ErrorF("Op OverReverse\n");break;
227                 case 5:ErrorF("Op In\n");break;
228                 case 6:ErrorF("Op InReverse\n");break;
229                 case 7:ErrorF("Op Out\n");break;
230                 case 8:ErrorF("Op OutReverse\n");break;
231                 case 9:ErrorF("Op Atop\n");break;
232                 case 10:ErrorF("Op AtopReverse\n");break;
233                 case 11:ErrorF("Op Xor\n");break;
234                 case 12:ErrorF("Op Add\n");break;
235         }
236 #endif
237         return &NV30PictOp[op];
238 }
239
240 #define FALLBACK_DEBUG 0
241 #if FALLBACK_DEBUG == 1
242 #define FALLBACK(fmt,args...) do {                                      \
243         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
244         return FALSE;                                                   \
245 } while(0)
246 #else
247 #define FALLBACK(fmt,args...) do { \
248         return FALSE;              \
249 } while(0)
250 #endif
251
252 static void
253 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
254                 PictFormatShort dest_format, Bool component_alpha)
255 {
256         NVPtr pNv = NVPTR(pScrn);
257         uint32_t sblend, dblend;
258
259         sblend = blend->src_card_op;
260         dblend = blend->dst_card_op;
261
262         if (blend->dst_alpha) {
263                 if (!PICT_FORMAT_A(dest_format)) {
264                         if (sblend == BF(DST_ALPHA)) {
265                                 sblend = BF(ONE);
266                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
267                                 sblend = BF(ZERO);
268                         }
269                 } else if (dest_format == PICT_a8) {
270                         if (sblend == BF(DST_ALPHA)) {
271                                 sblend = BF(DST_COLOR);
272                         } else if (sblend == BF(ONE_MINUS_DST_ALPHA)) {
273                                 sblend = BF(ONE_MINUS_DST_COLOR);
274                         }
275                 }
276         }
277
278         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
279                 if (dblend == BF(SRC_ALPHA)) {
280                         dblend = BF(SRC_COLOR);
281                 } else if (dblend == BF(ONE_MINUS_SRC_ALPHA)) {
282                         dblend = BF(ONE_MINUS_SRC_COLOR);
283                 }
284         }
285
286         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
287                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 1);
288                 OUT_RING  (0);
289         } else {
290                 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 3);
291                 OUT_RING  (1);
292                 OUT_RING  ((sblend << 16) | sblend);
293                 OUT_RING  ((dblend << 16) | dblend);
294         }
295 }
296
297 static Bool
298 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
299 {
300         NVPtr pNv = NVPTR(pScrn);
301         nv_pict_texture_format_t *fmt;
302         uint32_t card_filter, card_repeat;
303         NV30EXA_STATE;
304
305         fmt = NV30_GetPictTextureFormat(pPict->format);
306         if (!fmt)
307                 return FALSE;
308
309         card_repeat = 3; /* repeatNone */
310
311         if (pPict->filter == PictFilterBilinear)
312                 card_filter = 2;
313         else
314                 card_filter = 1;
315
316         BEGIN_RING(Nv3D, NV34TCL_TX_OFFSET(unit), 8);
317         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
318
319         OUT_RING  (NV34TCL_TX_FORMAT_DIMS_2D |
320                         (fmt->card_fmt << NV34TCL_TX_FORMAT_FORMAT_SHIFT) |
321                         (1 << NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT) |
322                         (log2i(pPix->drawable.width)  << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT) |
323                         (log2i(pPix->drawable.height) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT) |
324                         8 |
325                         NV34TCL_TX_FORMAT_DMA0);
326
327         OUT_RING  ((card_repeat << NV34TCL_TX_WRAP_S_SHIFT) |
328                         (card_repeat << NV34TCL_TX_WRAP_T_SHIFT) |
329                         (card_repeat << NV34TCL_TX_WRAP_R_SHIFT));
330         OUT_RING  (NV34TCL_TX_ENABLE_ENABLE);
331         OUT_RING  ((((uint32_t)exaGetPixmapPitch(pPix)) << NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT ) | 
332                         fmt->card_swz);
333
334         OUT_RING  ((card_filter << NV34TCL_TX_FILTER_MINIFY_SHIFT) /* min */ |
335                         (card_filter << NV34TCL_TX_FILTER_MAGNIFY_SHIFT) /* mag */ |
336                         0x2000 /* engine lock */);
337         OUT_RING  ((pPix->drawable.width << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | pPix->drawable.height);
338         OUT_RING  (0); /* border ARGB */
339
340         state->unit[unit].width         = (float)pPix->drawable.width;
341         state->unit[unit].height        = (float)pPix->drawable.height;
342         state->unit[unit].transform     = pPict->transform;
343
344         return TRUE;
345 }
346
347 static Bool
348 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict)
349 {
350         NVPtr pNv = NVPTR(pScrn);
351         nv_pict_surface_format_t *fmt;
352
353         fmt = NV30_GetPictSurfaceFormat(pPict->format);
354         if (!fmt) {
355                 ErrorF("AIII no format\n");
356                 return FALSE;
357         }
358
359         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
360
361         BEGIN_RING(Nv3D, NV34TCL_RT_FORMAT, 3);
362         OUT_RING  (fmt->card_fmt); /* format */
363         OUT_RING  (pitch << 16 | pitch);
364         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
365
366         return TRUE;
367 }
368
369 static Bool
370 NV30EXACheckCompositeTexture(PicturePtr pPict)
371 {
372         nv_pict_texture_format_t *fmt;
373         int w = pPict->pDrawable->width;
374         int h = pPict->pDrawable->height;
375
376         if ((w > 4096) || (h>4096))
377                 FALLBACK("picture too large, %dx%d\n", w, h);
378
379         fmt = NV30_GetPictTextureFormat(pPict->format);
380         if (!fmt)
381                 FALLBACK("picture format 0x%08x not supported\n",
382                                 pPict->format);
383
384         if (pPict->filter != PictFilterNearest &&
385                         pPict->filter != PictFilterBilinear)
386                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
387
388         if (!(w==1 && h==1) && pPict->repeat && pPict->repeatType != RepeatNone)
389                 FALLBACK("repeat 0x%x not supported (surface %dx%d)\n", pPict->repeatType,w,h);
390
391         return TRUE;
392 }
393
394 Bool
395 NV30EXACheckComposite(int op, PicturePtr psPict,
396                 PicturePtr pmPict,
397                 PicturePtr pdPict)
398 {
399         nv_pict_surface_format_t *fmt;
400         nv_pict_op_t *opr;
401
402         opr = NV30_GetPictOpRec(op);
403         if (!opr)
404                 FALLBACK("unsupported blend op 0x%x\n", op);
405
406         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
407         if (!fmt)
408                 FALLBACK("dst picture format 0x%08x not supported\n",
409                                 pdPict->format);
410
411         if (!NV30EXACheckCompositeTexture(psPict))
412                 FALLBACK("src picture\n");
413         if (pmPict) {
414                 if (pmPict->componentAlpha &&
415                                 PICT_FORMAT_RGB(pmPict->format) &&
416                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
417                         FALLBACK("mask CA + SA\n");
418                 if (!NV30EXACheckCompositeTexture(pmPict))
419                         FALLBACK("mask picture\n");
420         }
421
422         return TRUE;
423 }
424
425 Bool
426 NV30EXAPrepareComposite(int op, PicturePtr psPict,
427                 PicturePtr pmPict,
428                 PicturePtr pdPict,
429                 PixmapPtr  psPix,
430                 PixmapPtr  pmPix,
431                 PixmapPtr  pdPix)
432 {
433         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
434         NVPtr pNv = NVPTR(pScrn);
435         nv_pict_op_t *blend;
436         int fpid = NV30EXA_FPID_PASS_COL0;
437         NV30EXA_STATE;
438
439         blend = NV30_GetPictOpRec(op);
440
441         NV30_SetupBlend(pScrn, blend, pdPict->format,
442                         (pmPict && pmPict->componentAlpha &&
443                          PICT_FORMAT_RGB(pmPict->format)));
444
445         NV30_SetupSurface(pScrn, pdPix, pdPict);
446         NV30EXATexture(pScrn, psPix, psPict, 0);
447
448 #if 0
449 #define printformat(f) ErrorF("(%xh %s %dbpp A%dR%dG%dB%d)",f,(f>>16)&0xf==2?"ARGB":"ABGR",(f>>24),(f&0xf000)>>12,(f&0xf00)>>8,(f&0xf0)>>4,f&0xf)
450         ErrorF("Preparecomposite src(%dx%d)",psPict->pDrawable->width,psPict->pDrawable->height);
451         printformat((psPict->format));
452         ErrorF(" dst(%dx%d)",pdPict->pDrawable->width,pdPict->pDrawable->height);
453         printformat((pdPict->format));
454         if (pmPict)
455         {
456                 ErrorF(" mask(%dx%d)",pmPict->pDrawable->width,pmPict->pDrawable->height);
457                 printformat((pmPict->format));
458         }
459         ErrorF("\n");
460 #endif
461
462         if (pmPict) {
463                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
464
465                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
466                         if (blend->src_alpha)
467                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_SA_CA;
468                         else
469                                 fpid = NV30EXA_FPID_COMPOSITE_MASK_CA;
470                 } else {
471                         fpid = NV30EXA_FPID_COMPOSITE_MASK;
472                 }
473
474                 state->have_mask = TRUE;
475         } else {
476                 fpid = NV30EXA_FPID_PASS_TEX0;
477
478                 state->have_mask = FALSE;
479         }
480
481         if (pdPict->format == PICT_a8)
482                 NV30_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
483         else
484                 NV30_LoadFragProg(pScrn, nv40_fp_map[fpid]);
485
486         BEGIN_RING(Nv3D, 0x23c, 1);
487         OUT_RING  (pmPict?3:1);
488
489         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
490         OUT_RING  (NV34TCL_VERTEX_BEGIN_END_TRIANGLES);
491
492         return TRUE;
493 }
494
495 #define xFixedToFloat(v) \
496         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
497
498 static void
499 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
500                                           float *x_ret, float *y_ret)
501 {
502         PictVector v;
503
504         if (t) {
505                 v.vector[0] = IntToxFixed(x);
506                 v.vector[1] = IntToxFixed(y);
507                 v.vector[2] = xFixed1;
508                 PictureTransformPoint(t, &v);
509                 *x_ret = xFixedToFloat(v.vector[0]);
510                 *y_ret = xFixedToFloat(v.vector[1]);
511         } else {
512                 *x_ret = (float)x;
513                 *y_ret = (float)y;
514         }
515 }
516
517 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
518         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2F_X(8), 4);        \
519         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
520         OUT_RINGf ((mx)); OUT_RINGf ((my));                          \
521         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2I(0), 1);          \
522         OUT_RING  (((dy)<<16)|(dx));                                           \
523 } while(0)
524 #define CV_OUT(sx,sy,dx,dy) do {                                               \
525         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2F_X(8), 2);        \
526         OUT_RINGf ((sx)); OUT_RINGf ((sy));                          \
527         BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2I(0), 1);          \
528         OUT_RING  (((dy)<<16)|(dx));                                           \
529 } while(0)
530
531 void
532 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
533                                   int maskX, int maskY,
534                                   int dstX , int dstY,
535                                   int width, int height)
536 {
537         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
538         NVPtr pNv = NVPTR(pScrn);
539         float sX0, sX1, sX2, sY0, sY1, sY2;
540         float mX0, mX1, mX2, mY0, mY1, mY2;
541         NV30EXA_STATE;
542
543         /* We're drawing a triangle, we need to scissor it to a quad. */
544         /* The scissors are here for a good reason, we don't get the full image, but just a part. */
545         /* Handling the cliprects is done for us already. */
546         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
547         OUT_RING  ((width << 16) | dstX);
548         OUT_RING  ((height << 16) | dstY);
549
550 #if 0
551         ErrorF("Composite [%dx%d] (%d,%d)IN(%d,%d)OP(%d,%d)\n",width,height,srcX,srcY,maskX,maskY,dstX,dstY);
552 #endif
553         NV30EXATransformCoord(state->unit[0].transform, 
554                                 srcX, srcY - height,
555                                 state->unit[0].width,
556                                 state->unit[0].height, &sX0, &sY0);
557         NV30EXATransformCoord(state->unit[0].transform,
558                                 srcX, srcY + height,
559                                 state->unit[0].width,
560                                 state->unit[0].height, &sX1, &sY1);
561         NV30EXATransformCoord(state->unit[0].transform,
562                                 srcX + 2*width, srcY + height,
563                                 state->unit[0].width,
564                                 state->unit[0].height, &sX2, &sY2);
565
566         if (state->have_mask) {
567                 NV30EXATransformCoord(state->unit[1].transform, 
568                                         maskX, maskY - height,
569                                         state->unit[1].width,
570                                         state->unit[1].height, &mX0, &mY0);
571                 NV30EXATransformCoord(state->unit[1].transform,
572                                         maskX, maskY + height,
573                                         state->unit[1].width,
574                                         state->unit[1].height, &mX1, &mY1);
575                 NV30EXATransformCoord(state->unit[1].transform,
576                                         maskX + 2*width, maskY + height,
577                                         state->unit[1].width,
578                                         state->unit[1].height, &mX2, &mY2);
579
580                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX                      ,       dstY - height);
581                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX                      ,       dstY + height);
582                 CV_OUTm(sX2 , sY2 , mX2, mY2, dstX + 2*width    ,       dstY + height);
583         } else {
584                 CV_OUT(sX0 , sY0 , dstX                 ,       dstY - height);
585                 CV_OUT(sX1 , sY1 , dstX                 ,       dstY + height);
586                 CV_OUT(sX2 , sY2 , dstX + 2*width       ,       dstY + height);
587         }
588 }
589
590 void
591 NV30EXADoneComposite(PixmapPtr pdPix)
592 {
593         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
594         NVPtr pNv = NVPTR(pScrn);
595
596         BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
597         OUT_RING  (0);
598
599         FIRE_RING();
600 }
601
602 Bool
603 NVAccelInitNV30TCL(ScrnInfoPtr pScrn)
604 {
605         NVPtr pNv = NVPTR(pScrn);
606         uint32_t class = 0, chipset;
607         int i;
608
609         NV30EXAHackupA8Shaders(pScrn);
610
611 #define NV30TCL_CHIPSET_3X_MASK 0x00000003
612 #define NV35TCL_CHIPSET_3X_MASK 0x000001e0
613 #define NV34TCL_CHIPSET_3X_MASK 0x00000010
614
615         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
616         if ((chipset & 0xf0) != NV_ARCH_30)
617                 return TRUE;
618         chipset &= 0xf;
619
620         if (NV30TCL_CHIPSET_3X_MASK & (1<<chipset))
621                 class = NV30TCL;
622         else if (NV35TCL_CHIPSET_3X_MASK & (1<<chipset))
623                 class = NV35TCL;
624         else if (NV34TCL_CHIPSET_3X_MASK & (1<<chipset))
625                 class = NV34TCL;
626         else {
627                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
628                            "NV30EXA: Unknown chipset NV3%1x\n", chipset);
629                 return FALSE;
630         }
631
632
633         if (!pNv->Nv3D) {
634                 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
635                         return FALSE;
636         }
637
638         BEGIN_RING(Nv3D, NV34TCL_DMA_TEXTURE0, 3);
639         OUT_RING  (pNv->chan->vram->handle);
640         OUT_RING  (pNv->chan->gart->handle);
641         OUT_RING  (pNv->chan->vram->handle);
642         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY7, 1);
643         OUT_RING  (pNv->chan->vram->handle);
644         BEGIN_RING(Nv3D, NV34TCL_DMA_COLOR0, 2);
645         OUT_RING  (pNv->chan->vram->handle);
646         OUT_RING  (pNv->chan->vram->handle);
647         BEGIN_RING(Nv3D, NV34TCL_DMA_IN_MEMORY8, 1);
648         OUT_RING  (pNv->chan->vram->handle);
649
650         for (i=1; i<8; i++) {
651                 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 2);
652                 OUT_RING  (0);
653                 OUT_RING  (0);
654         }
655
656         BEGIN_RING(Nv3D, 0x220, 1);
657         OUT_RING  (1);
658
659         BEGIN_RING(Nv3D, 0x03b0, 1);
660         OUT_RING  (0x00100000);
661         BEGIN_RING(Nv3D, 0x1454, 1);
662         OUT_RING  (0);
663         BEGIN_RING(Nv3D, 0x1d80, 1);
664         OUT_RING  (3);
665         BEGIN_RING(Nv3D, 0x1450, 1);
666         OUT_RING  (0x00030004);
667
668         /* NEW */
669         BEGIN_RING(Nv3D, 0x1e98, 1);
670         OUT_RING  (0);
671         BEGIN_RING(Nv3D, 0x17e0, 3);
672         OUT_RING  (0);
673         OUT_RING  (0);
674         OUT_RING  (0x3f800000);
675         BEGIN_RING(Nv3D, 0x1f80, 16);
676         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
677         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
678         OUT_RING  (0x0000ffff);
679         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
680         OUT_RING  (0); OUT_RING  (0); OUT_RING  (0); 
681
682         BEGIN_RING(Nv3D, 0x120, 3);
683         OUT_RING  (0);
684         OUT_RING  (1);
685         OUT_RING  (2);
686
687         BEGIN_RING(NvImageBlit, 0x120, 3);
688         OUT_RING  (0);
689         OUT_RING  (1);
690         OUT_RING  (2);
691
692         BEGIN_RING(Nv3D, 0x1d88, 1);
693         OUT_RING  (0x00001200);
694
695         BEGIN_RING(Nv3D, NV34TCL_RC_ENABLE, 1);
696         OUT_RING  (0);
697
698         /* Attempt to setup a known state.. Probably missing a heap of
699          * stuff here..
700          */
701         BEGIN_RING(Nv3D, NV34TCL_STENCIL_FRONT_ENABLE, 1);
702         OUT_RING  (0);
703         BEGIN_RING(Nv3D, NV34TCL_STENCIL_BACK_ENABLE, 1);
704         OUT_RING  (0);
705         BEGIN_RING(Nv3D, NV34TCL_ALPHA_FUNC_ENABLE, 1);
706         OUT_RING  (0);
707         BEGIN_RING(Nv3D, NV34TCL_DEPTH_WRITE_ENABLE, 2);
708         OUT_RING  (0); /* wr disable */
709         OUT_RING  (0); /* test disable */
710         BEGIN_RING(Nv3D, NV34TCL_COLOR_MASK, 1);
711         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
712         BEGIN_RING(Nv3D, NV34TCL_CULL_FACE_ENABLE, 1);
713         OUT_RING  (0);
714         BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 5);
715         OUT_RING  (0);                          /* Blend enable */
716         OUT_RING  (0);                          /* Blend src */
717         OUT_RING  (0);                          /* Blend dst */
718         OUT_RING  (0x00000000);                 /* Blend colour */
719         OUT_RING  (0x8006);                     /* FUNC_ADD */
720         BEGIN_RING(Nv3D, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2);
721         OUT_RING  (0);
722         OUT_RING  (0x1503 /*GL_COPY*/);
723         BEGIN_RING(Nv3D, NV34TCL_DITHER_ENABLE, 1);
724         OUT_RING  (1);
725         BEGIN_RING(Nv3D, NV34TCL_SHADE_MODEL, 1);
726         OUT_RING  (0x1d01 /*GL_SMOOTH*/);
727         BEGIN_RING(Nv3D, NV34TCL_POLYGON_OFFSET_FACTOR,2);
728         OUT_RINGf (0.0);
729         OUT_RINGf (0.0);
730         BEGIN_RING(Nv3D, NV34TCL_POLYGON_MODE_FRONT, 2);
731         OUT_RING  (0x1b02 /*GL_FILL*/);
732         OUT_RING  (0x1b02 /*GL_FILL*/);
733         /* - Disable texture units
734          * - Set fragprog to MOVR result.color, fragment.color */
735         for (i=0;i<4;i++) {
736                 BEGIN_RING(Nv3D, NV34TCL_TX_ENABLE(i), 1);
737                 OUT_RING  (0);
738         }
739         /* Polygon stipple */
740         BEGIN_RING(Nv3D, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
741         for (i=0;i<0x20;i++)
742                 OUT_RING  (0xFFFFFFFF);
743
744         BEGIN_RING(Nv3D, NV34TCL_DEPTH_RANGE_NEAR, 2);
745         OUT_RINGf  (0.0);
746         OUT_RINGf  (1.0);
747
748         /* Ok.  If you start X with the nvidia driver, kill it, and then
749          * start X with nouveau you will get black rendering instead of
750          * what you'd expect.  This fixes the problem, and it seems that
751          * it's not needed between nouveau restarts - which suggests that
752          * the 3D context (wherever it's stored?) survives somehow.
753          */
754         //BEGIN_RING(Nv3D, 0x1d60,1);
755         //OUT_RING  (0x03008000);
756
757         int w=4096;
758         int h=4096;
759         int pitch=4096*4;
760         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 5);
761         OUT_RING  (w<<16);
762         OUT_RING  (h<<16);
763         OUT_RING  (0x148); /* format */
764         OUT_RING  (pitch << 16 | pitch);
765         OUT_RING  (0x0);
766         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
767         OUT_RING  (0);
768         BEGIN_RING(Nv3D, 0x0a00, 2);
769         OUT_RING  ((w<<16) | 0);
770         OUT_RING  ((h<<16) | 0);
771         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
772         OUT_RING  ((w-1)<<16);
773         OUT_RING  ((h-1)<<16);
774         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
775         OUT_RING  (w<<16);
776         OUT_RING  (h<<16);
777         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 2);
778         OUT_RING  (w<<16);
779         OUT_RING  (h<<16);
780
781         BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_SCALE0_X, 8);
782         OUT_RINGf (0.0);
783         OUT_RINGf (0.0);
784         OUT_RINGf (0.0);
785         OUT_RINGf (0.0);
786         OUT_RINGf (1.0);
787         OUT_RINGf (1.0);
788         OUT_RINGf (1.0);
789         OUT_RINGf (0.0);
790
791         BEGIN_RING(Nv3D, NV34TCL_MODELVIEW_MATRIX(0), 16);
792         OUT_RINGf (1.0);
793         OUT_RINGf (0.0);
794         OUT_RINGf (0.0);
795         OUT_RINGf (0.0);
796         OUT_RINGf (0.0);
797         OUT_RINGf (1.0);
798         OUT_RINGf (0.0);
799         OUT_RINGf (0.0);
800         OUT_RINGf (0.0);
801         OUT_RINGf (0.0);
802         OUT_RINGf (1.0);
803         OUT_RINGf (0.0);
804         OUT_RINGf (0.0);
805         OUT_RINGf (0.0);
806         OUT_RINGf (0.0);
807         OUT_RINGf (1.0);
808
809         BEGIN_RING(Nv3D, NV34TCL_PROJECTION_MATRIX(0), 16);
810         OUT_RINGf (1.0);
811         OUT_RINGf (0.0);
812         OUT_RINGf (0.0);
813         OUT_RINGf (0.0);
814         OUT_RINGf (0.0);
815         OUT_RINGf (1.0);
816         OUT_RINGf (0.0);
817         OUT_RINGf (0.0);
818         OUT_RINGf (0.0);
819         OUT_RINGf (0.0);
820         OUT_RINGf (1.0);
821         OUT_RINGf (0.0);
822         OUT_RINGf (0.0);
823         OUT_RINGf (0.0);
824         OUT_RINGf (0.0);
825         OUT_RINGf (1.0);
826
827         BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
828         OUT_RING  (4096<<16);
829         OUT_RING  (4096<<16);
830
831         return TRUE;
832 }
833