Revert a G70 workaround, this may BREAK your setup.
[nouveau] / src / nv40_exa.c
1 #ifdef HAVE_CONFIG_H
2 #include "config.h"
3 #endif
4
5 #ifdef ENABLE_NV30EXA
6
7 #include "nv_include.h"
8 #include "nv_shaders.h"
9
10 typedef struct nv_pict_surface_format {
11         int      pict_fmt;
12         uint32_t card_fmt;
13 } nv_pict_surface_format_t;
14
15 typedef struct nv_pict_texture_format {
16         int      pict_fmt;
17         uint32_t card_fmt;
18         uint32_t card_swz;
19 } nv_pict_texture_format_t;
20
21 typedef struct nv_pict_op {
22         Bool     src_alpha;
23         Bool     dst_alpha;
24         uint32_t src_card_op;
25         uint32_t dst_card_op;
26 } nv_pict_op_t;
27
28 typedef struct nv40_exa_state {
29         Bool have_mask;
30
31         struct {
32                 PictTransformPtr transform;
33                 float width;
34                 float height;
35         } unit[2];
36 } nv40_exa_state_t;
37 static nv40_exa_state_t exa_state;
38 #define NV40EXA_STATE nv40_exa_state_t *state = &exa_state
39
40 static nv_pict_surface_format_t
41 NV40SurfaceFormat[] = {
42         { PICT_a8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 },
43         { PICT_x8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 },
44         { PICT_r5g6b5   , NV40TCL_RT_FORMAT_COLOR_R5G6B5   },
45         { PICT_a8       , NV40TCL_RT_FORMAT_COLOR_B8       },
46         { -1, ~0 }
47 };
48
49 static nv_pict_surface_format_t *
50 NV40_GetPictSurfaceFormat(int format)
51 {
52         int i = 0;
53
54         while (NV40SurfaceFormat[i].pict_fmt != -1) {
55                 if (NV40SurfaceFormat[i].pict_fmt == format)
56                         return &NV40SurfaceFormat[i];
57                 i++;
58         }
59
60         return NULL;
61 }
62
63 enum {
64         NV40EXA_FPID_PASS_COL0 = 0,
65         NV40EXA_FPID_PASS_TEX0 = 1,
66         NV40EXA_FPID_COMPOSITE_MASK = 2,
67         NV40EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
68         NV40EXA_FPID_COMPOSITE_MASK_CA = 4,
69         NV40EXA_FPID_MAX = 5
70 } NV40EXA_FPID;
71
72 static nv_shader_t *nv40_fp_map[NV40EXA_FPID_MAX] = {
73         &nv30_fp_pass_col0,
74         &nv30_fp_pass_tex0,
75         &nv30_fp_composite_mask,
76         &nv30_fp_composite_mask_sa_ca,
77         &nv30_fp_composite_mask_ca
78 };
79
80 static nv_shader_t *nv40_fp_map_a8[NV40EXA_FPID_MAX];
81
82 static void
83 NV40EXAHackupA8Shaders(ScrnInfoPtr pScrn)
84 {
85         int s;
86
87         for (s = 0; s < NV40EXA_FPID_MAX; s++) {
88                 nv_shader_t *def, *a8;
89
90                 def = nv40_fp_map[s];
91                 a8 = xcalloc(1, sizeof(nv_shader_t));
92                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
93                 a8->size = def->size + 4;
94                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
95                 nv40_fp_map_a8[s] = a8;
96
97                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
98                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
99                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
100                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
101                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
102         }
103 }
104
105 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
106   {                                                                            \
107   PICT_##r, NV40TCL_TEX_FORMAT_FORMAT_##tf,                                    \
108   NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y |          \
109   NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w |          \
110   NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y |          \
111   NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w,           \
112   }
113 static nv_pict_texture_format_t
114 NV40TextureFormat[] = {
115         _(a8r8g8b8, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
116         _(x8r8g8b8, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
117         _(x8b8g8r8, A8R8G8B8,   S1,   S1,   S1,  ONE, Z, Y, X, W),
118         _(a1r5g5b5, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
119         _(x1r5g5b5, A1R5G5B5,   S1,   S1,   S1,  ONE, X, Y, Z, W),
120         _(  r5g6b5,   R5G6B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
121         _(      a8,       L8, ZERO, ZERO, ZERO,   S1, X, X, X, X),
122         { -1, ~0, ~0 }
123 };
124 #undef _
125
126 static nv_pict_texture_format_t *
127 NV40_GetPictTextureFormat(int format)
128 {
129         int i = 0;
130
131         while (NV40TextureFormat[i].pict_fmt != -1) {
132                 if (NV40TextureFormat[i].pict_fmt == format)
133                         return &NV40TextureFormat[i];
134                 i++;
135         }
136
137         return NULL;
138 }
139
140 #define SF(bf) (NV40TCL_BLEND_FUNC_SRC_RGB_##bf |                              \
141                 NV40TCL_BLEND_FUNC_SRC_ALPHA_##bf)
142 #define DF(bf) (NV40TCL_BLEND_FUNC_DST_RGB_##bf |                              \
143                 NV40TCL_BLEND_FUNC_DST_ALPHA_##bf)
144 static nv_pict_op_t 
145 NV40PictOp[] = {
146 /* Clear       */ { 0, 0, SF(               ZERO), DF(               ZERO) },
147 /* Src         */ { 0, 0, SF(                ONE), DF(               ZERO) },
148 /* Dst         */ { 0, 0, SF(               ZERO), DF(                ONE) },
149 /* Over        */ { 1, 0, SF(                ONE), DF(ONE_MINUS_SRC_ALPHA) },
150 /* OverReverse */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(                ONE) },
151 /* In          */ { 0, 1, SF(          DST_ALPHA), DF(               ZERO) },
152 /* InReverse   */ { 1, 0, SF(               ZERO), DF(          SRC_ALPHA) },
153 /* Out         */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(               ZERO) },
154 /* OutReverse  */ { 1, 0, SF(               ZERO), DF(ONE_MINUS_SRC_ALPHA) },
155 /* Atop        */ { 1, 1, SF(          DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
156 /* AtopReverse */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(          SRC_ALPHA) },
157 /* Xor         */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
158 /* Add         */ { 0, 0, SF(                ONE), DF(                ONE) }
159 };
160
161 static nv_pict_op_t *
162 NV40_GetPictOpRec(int op)
163 {
164         if (op >= PictOpSaturate)
165                 return NULL;
166         return &NV40PictOp[op];
167 }
168
169 #if 0
170 #define FALLBACK(fmt,args...) do {                                      \
171         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
172         return FALSE;                                                   \
173 } while(0)
174 #else
175 #define FALLBACK(fmt,args...) do { \
176         return FALSE;              \
177 } while(0)
178 #endif
179
180 static void
181 NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
182 {
183         NVPtr pNv = NVPTR(pScrn);
184         static int next_hw_id = 0;
185         int i;
186
187         if (!shader->hw_id) {
188                 shader->hw_id = next_hw_id;
189
190                 BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_FROM_ID, 1);
191                 OUT_RING  ((shader->hw_id));
192                 for (i=0; i<shader->size; i+=4) {
193                         BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_INST(0), 4);
194                         OUT_RING  (shader->data[i + 0]);
195                         OUT_RING  (shader->data[i + 1]);
196                         OUT_RING  (shader->data[i + 2]);
197                         OUT_RING  (shader->data[i + 3]);
198                         next_hw_id++;
199                 }
200         }
201
202         BEGIN_RING(Nv3D, NV40TCL_VP_START_FROM_ID, 1);
203         OUT_RING  ((shader->hw_id));
204
205         BEGIN_RING(Nv3D, NV40TCL_VP_ATTRIB_EN, 2);
206         OUT_RING  (shader->card_priv.NV30VP.vp_in_reg);
207         OUT_RING  (shader->card_priv.NV30VP.vp_out_reg);
208 }
209
210 static void
211 NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
212 {
213         NVPtr pNv = NVPTR(pScrn);
214         static NVAllocRec *fp_mem = NULL;
215         static int next_hw_id_offset = 0;
216
217         if (!fp_mem) {
218                 fp_mem = NVAllocateMemory(pNv, NOUVEAU_MEM_FB, 0x1000);
219                 if (!fp_mem) {
220                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
221                                    "Couldn't alloc fragprog buffer!\n");
222                         return;
223                 }
224         }
225
226         if (!shader->hw_id) {
227                 memcpy(fp_mem->map + next_hw_id_offset, shader->data,
228                                                         shader->size *
229                                                         sizeof(uint32_t));
230
231                 shader->hw_id  = fp_mem->offset;
232                 shader->hw_id += next_hw_id_offset;
233
234                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
235                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
236         }
237
238         BEGIN_RING(Nv3D, NV40TCL_FP_ADDRESS, 1);
239         OUT_RING  (shader->hw_id | NV40TCL_FP_ADDRESS_DMA0);
240         BEGIN_RING(Nv3D, NV40TCL_FP_CONTROL, 1);
241         OUT_RING  (shader->card_priv.NV30FP.num_regs <<
242                    NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT);
243 }
244
245 static void
246 NV40_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
247                 PictFormatShort dest_format, Bool component_alpha)
248 {
249         NVPtr pNv = NVPTR(pScrn);
250         uint32_t sblend, dblend;
251
252         sblend = blend->src_card_op;
253         dblend = blend->dst_card_op;
254
255         if (blend->dst_alpha) {
256                 if (!PICT_FORMAT_A(dest_format)) {
257                         if (sblend == SF(DST_ALPHA)) {
258                                 sblend = SF(ONE);
259                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
260                                 sblend = SF(ZERO);
261                         }
262                 } else if (dest_format == PICT_a8) {
263                         if (sblend == SF(DST_ALPHA)) {
264                                 sblend = SF(DST_COLOR);
265                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
266                                 sblend = SF(ONE_MINUS_DST_COLOR);
267                         }
268                 }
269         }
270
271         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
272                 if (dblend == DF(SRC_ALPHA)) {
273                         dblend = DF(SRC_COLOR);
274                 } else if (dblend == DF(ONE_MINUS_SRC_ALPHA)) {
275                         dblend = DF(ONE_MINUS_SRC_COLOR);
276                 }
277         }
278
279         if (sblend == SF(ONE) && dblend == DF(ZERO)) {
280                 BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
281                 OUT_RING  (0);
282         } else {
283                 BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 5);
284                 OUT_RING  (1);
285                 OUT_RING  (sblend);
286                 OUT_RING  (dblend);
287                 OUT_RING  (0x00000000);
288                 OUT_RING  (NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD |
289                            NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD);
290         }
291 }
292
293 static Bool
294 NV40EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
295 {
296         NVPtr pNv = NVPTR(pScrn);
297         nv_pict_texture_format_t *fmt;
298         NV40EXA_STATE;
299
300         fmt = NV40_GetPictTextureFormat(pPict->format);
301         if (!fmt)
302                 return FALSE;
303
304         BEGIN_RING(Nv3D, NV40TCL_TEX_OFFSET(unit), 8);
305         OUT_RING  (NVAccelGetPixmapOffset(pPix));
306         OUT_RING  (fmt->card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
307                    NV40TCL_TEX_FORMAT_DIMS_2D | NV40TCL_TEX_FORMAT_DMA0 |
308                    NV40TCL_TEX_FORMAT_NO_BORDER | (0x8000) |
309                    (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT));
310         if (pPict->repeat && pPict->repeatType == RepeatNormal) {
311                 OUT_RING  (NV40TCL_TEX_WRAP_S_REPEAT |
312                            NV40TCL_TEX_WRAP_T_REPEAT |
313                            NV40TCL_TEX_WRAP_R_REPEAT);
314         } else {
315                 OUT_RING  (NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE |
316                            NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
317                            NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
318         }
319         OUT_RING  (NV40TCL_TEX_ENABLE_ENABLE);
320         OUT_RING  (fmt->card_swz);
321         if (pPict->filter == PictFilterBilinear) {
322                 OUT_RING  (NV40TCL_TEX_FILTER_MIN_LINEAR |
323                            NV40TCL_TEX_FILTER_MAG_LINEAR |
324                            0x3fd6);
325         } else {
326                 OUT_RING  (NV40TCL_TEX_FILTER_MIN_NEAREST |
327                            NV40TCL_TEX_FILTER_MAG_NEAREST |
328                            0x3fd6);
329         }
330         OUT_RING  ((pPix->drawable.width << 16) | pPix->drawable.height);
331         OUT_RING  (0); /* border ARGB */
332         BEGIN_RING(Nv3D, NV40TCL_TEX_SIZE1(unit), 1);
333         OUT_RING  ((1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
334                    (uint32_t)exaGetPixmapPitch(pPix));
335
336         state->unit[unit].width         = (float)pPix->drawable.width;
337         state->unit[unit].height        = (float)pPix->drawable.height;
338         state->unit[unit].transform     = pPict->transform;
339
340         return TRUE;
341 }
342
343 static Bool
344 NV40_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
345 {
346         NVPtr pNv = NVPTR(pScrn);
347         nv_pict_surface_format_t *fmt;
348
349         fmt = NV40_GetPictSurfaceFormat(format);
350         if (!fmt) {
351                 ErrorF("AIII no format\n");
352                 return FALSE;
353         }
354
355         uint32_t pitch = (uint32_t)exaGetPixmapPitch(pPix);
356
357         BEGIN_RING(Nv3D, NV40TCL_RT_FORMAT, 3);
358         OUT_RING  (NV40TCL_RT_FORMAT_TYPE_LINEAR |
359                    NV40TCL_RT_FORMAT_ZETA_Z24S8 |
360                    fmt->card_fmt);
361         OUT_RING  (pitch);
362         OUT_RING  (NVAccelGetPixmapOffset(pPix));
363
364         return TRUE;
365 }
366
367 static Bool
368 NV40EXACheckCompositeTexture(PicturePtr pPict)
369 {
370         nv_pict_texture_format_t *fmt;
371         int w = pPict->pDrawable->width;
372         int h = pPict->pDrawable->height;
373
374         if ((w > 4096) || (h > 4096))
375                 FALLBACK("picture too large, %dx%d\n", w, h);
376
377         fmt = NV40_GetPictTextureFormat(pPict->format);
378         if (!fmt)
379                 FALLBACK("picture format 0x%08x not supported\n",
380                                 pPict->format);
381
382         if (pPict->filter != PictFilterNearest &&
383             pPict->filter != PictFilterBilinear)
384                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
385
386         if (pPict->repeat &&
387             (pPict->repeat != RepeatNormal && pPict->repeatType != RepeatNone))
388                 FALLBACK("repeat 0x%x not supported\n", pPict->repeatType);
389
390         return TRUE;
391 }
392
393 Bool
394 NV40EXACheckComposite(int op, PicturePtr psPict,
395                               PicturePtr pmPict,
396                               PicturePtr pdPict)
397 {
398         nv_pict_surface_format_t *fmt;
399         nv_pict_op_t *opr;
400
401         opr = NV40_GetPictOpRec(op);
402         if (!opr)
403                 FALLBACK("unsupported blend op 0x%x\n", op);
404
405         fmt = NV40_GetPictSurfaceFormat(pdPict->format);
406         if (!fmt)
407                 FALLBACK("dst picture format 0x%08x not supported\n",
408                                 pdPict->format);
409
410         if (!NV40EXACheckCompositeTexture(psPict))
411                 FALLBACK("src picture\n");
412         if (pmPict) {
413                 if (pmPict->componentAlpha && 
414                     PICT_FORMAT_RGB(pmPict->format) &&
415                     opr->src_alpha && opr->src_card_op != SF(ZERO))
416                         FALLBACK("mask CA + SA\n");
417                 if (!NV40EXACheckCompositeTexture(pmPict))
418                         FALLBACK("mask picture\n");
419         }
420
421         return TRUE;
422 }
423
424 Bool
425 NV40EXAPrepareComposite(int op, PicturePtr psPict,
426                                 PicturePtr pmPict,
427                                 PicturePtr pdPict,
428                                 PixmapPtr  psPix,
429                                 PixmapPtr  pmPix,
430                                 PixmapPtr  pdPix)
431 {
432         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
433         NVPtr pNv = NVPTR(pScrn);
434         nv_pict_op_t *blend;
435         int fpid = NV40EXA_FPID_PASS_COL0;
436         NV40EXA_STATE;
437
438         blend = NV40_GetPictOpRec(op);
439
440         NV40_SetupBlend(pScrn, blend, pdPict->format,
441                         (pmPict && pmPict->componentAlpha &&
442                          PICT_FORMAT_RGB(pmPict->format)));
443
444         NV40_SetupSurface(pScrn, pdPix, pdPict->format);
445         NV40EXATexture(pScrn, psPix, psPict, 0);
446
447         NV40_LoadVtxProg(pScrn, &nv40_vp_exa_render);
448         if (pmPict) {
449                 NV40EXATexture(pScrn, pmPix, pmPict, 1);
450
451                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
452                         if (blend->src_alpha)
453                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_SA_CA;
454                         else
455                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_CA;
456                 } else {
457                         fpid = NV40EXA_FPID_COMPOSITE_MASK;
458                 }
459
460                 state->have_mask = TRUE;
461         } else {
462                 fpid = NV40EXA_FPID_PASS_TEX0;
463
464                 state->have_mask = FALSE;
465         }
466
467         if (pdPict->format == PICT_a8)
468                 NV40_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
469         else
470                 NV40_LoadFragProg(pScrn, nv40_fp_map[fpid]);
471
472         /* Appears to be some kind of cache flush, needed here at least
473          * sometimes.. funky text rendering otherwise :)
474          */
475         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
476         OUT_RING  (2);
477         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
478         OUT_RING  (1);
479
480         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
481         OUT_RING  (NV40TCL_BEGIN_END_QUADS);
482
483         return TRUE;
484 }
485
486 #define xFixedToFloat(v) \
487         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
488
489 static void
490 NV40EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
491                                           float *x_ret, float *y_ret)
492 {
493         PictVector v;
494
495         if (t) {
496                 v.vector[0] = IntToxFixed(x);
497                 v.vector[1] = IntToxFixed(y);
498                 v.vector[2] = xFixed1;
499                 PictureTransformPoint(t, &v);
500                 *x_ret = xFixedToFloat(v.vector[0]) / sx;
501                 *y_ret = xFixedToFloat(v.vector[1]) / sy;
502         } else {
503                 *x_ret = (float)x / sx;
504                 *y_ret = (float)y / sy;
505         }
506 }
507
508 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
509         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 4);                         \
510         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
511         OUT_RINGf ((mx)); OUT_RINGf ((my));                                    \
512         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1);                           \
513         OUT_RING  (((dy)<<16)|(dx));                                           \
514 } while(0)
515 #define CV_OUT(sx,sy,dx,dy) do {                                               \
516         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 2);                         \
517         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
518         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1);                           \
519         OUT_RING  (((dy)<<16)|(dx));                                           \
520 } while(0)
521
522 void
523 NV40EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
524                                   int maskX, int maskY,
525                                   int dstX , int dstY,
526                                   int width, int height)
527 {
528         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
529         NVPtr pNv = NVPTR(pScrn);
530         float sX0, sX1, sY0, sY1;
531         float mX0, mX1, mY0, mY1;
532         NV40EXA_STATE;
533
534         NV40EXATransformCoord(state->unit[0].transform, srcX, srcY,
535                               state->unit[0].width,
536                               state->unit[0].height, &sX0, &sY0);
537         NV40EXATransformCoord(state->unit[0].transform,
538                               srcX + width, srcY + height,
539                               state->unit[0].width,
540                               state->unit[0].height, &sX1, &sY1);
541
542         if (state->have_mask) {
543                 NV40EXATransformCoord(state->unit[1].transform, maskX, maskY,
544                                       state->unit[1].width,
545                                       state->unit[1].height, &mX0, &mY0);
546                 NV40EXATransformCoord(state->unit[1].transform,
547                                       maskX + width, maskY + height,
548                                       state->unit[1].width,
549                                       state->unit[1].height, &mX1, &mY1);
550                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
551                 CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width,          dstY);
552                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
553                 CV_OUTm(sX0 , sY1 , mX0, mY1, dstX        , dstY + height);
554         } else {
555                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
556                 CV_OUT(sX1 , sY0 , dstX + width,          dstY);
557                 CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
558                 CV_OUT(sX0 , sY1 , dstX        , dstY + height);
559         }
560
561         FIRE_RING();
562 }
563
564 void
565 NV40EXADoneComposite(PixmapPtr pdPix)
566 {
567         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
568         NVPtr pNv = NVPTR(pScrn);
569
570         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
571         OUT_RING  (NV40TCL_BEGIN_END_STOP);
572 }
573
574 #define NV40TCL_CHIPSET_4X_MASK 0x00000baf
575 #define NV44TCL_CHIPSET_4X_MASK 0x00005450
576 Bool
577 NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
578 {
579         NVPtr pNv = NVPTR(pScrn);
580         static int have_object = FALSE;
581         uint32_t class = 0, chipset;
582         int i;
583
584         NV40EXAHackupA8Shaders(pScrn);
585
586         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
587         if ((chipset & 0xf0) != NV_ARCH_40)
588                 return TRUE;
589         chipset &= 0xf;
590
591         if (NV40TCL_CHIPSET_4X_MASK & (1<<chipset))
592                 class = NV40TCL;
593         else if (NV44TCL_CHIPSET_4X_MASK & (1<<chipset))
594                 class = NV44TCL;
595         else {
596                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
597                            "NV40EXA: Unknown chipset NV4%1x\n", chipset);
598                 return FALSE;
599         }
600
601         if (!have_object) {
602                 if (!NVDmaCreateContextObject(pNv, Nv3D, class))
603                         return FALSE;
604                 have_object = TRUE;
605         }
606
607         BEGIN_RING(Nv3D, NV40TCL_DMA_NOTIFY, 1);
608         OUT_RING  (NvDmaNotifier0);
609         BEGIN_RING(Nv3D, NV40TCL_DMA_TEXTURE0, 1);
610         OUT_RING  (NvDmaFB);
611         BEGIN_RING(Nv3D, NV40TCL_DMA_COLOR0, 2);
612         OUT_RING  (NvDmaFB);
613         OUT_RING  (NvDmaFB);
614
615         /* voodoo */
616         BEGIN_RING(Nv3D, 0x1ea4, 3);
617         OUT_RING  (0x00000010);
618         OUT_RING  (0x01000100);
619         OUT_RING  (0xff800006);
620         BEGIN_RING(Nv3D, 0x1fc4, 1);
621         OUT_RING  (0x06144321);
622         BEGIN_RING(Nv3D, 0x1fc8, 2);
623         OUT_RING  (0xedcba987);
624         OUT_RING  (0x00000021);
625         BEGIN_RING(Nv3D, 0x1fd0, 1);
626         OUT_RING  (0x00171615);
627         BEGIN_RING(Nv3D, 0x1fd4, 1);
628         OUT_RING  (0x001b1a19);
629         BEGIN_RING(Nv3D, 0x1ef8, 1);
630         OUT_RING  (0x0020ffff);
631         BEGIN_RING(Nv3D, 0x1d64, 1);
632         OUT_RING  (0x00d30000);
633         BEGIN_RING(Nv3D, 0x1e94, 1);
634         OUT_RING  (0x00000001);
635
636         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
637         OUT_RINGf (0.0);
638         OUT_RINGf (0.0);
639         OUT_RINGf (0.0);
640         OUT_RINGf (0.0);
641         OUT_RINGf (1.0);
642         OUT_RINGf (1.0);
643         OUT_RINGf (1.0);
644         OUT_RINGf (0.0);
645
646         /* default 3D state */
647         /*XXX: replace with the same state that the DRI emits on startup */
648         BEGIN_RING(Nv3D, NV40TCL_STENCIL_FRONT_ENABLE, 1);
649         OUT_RING  (0);
650         BEGIN_RING(Nv3D, NV40TCL_STENCIL_BACK_ENABLE, 1);
651         OUT_RING  (0);
652         BEGIN_RING(Nv3D, NV40TCL_ALPHA_TEST_ENABLE, 1);
653         OUT_RING  (0);
654         BEGIN_RING(Nv3D, NV40TCL_DEPTH_WRITE_ENABLE, 2);
655         OUT_RING  (0);
656         OUT_RING  (0); 
657         BEGIN_RING(Nv3D, NV40TCL_COLOR_MASK, 1);
658         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
659         BEGIN_RING(Nv3D, NV40TCL_CULL_FACE_ENABLE, 1);
660         OUT_RING  (0);
661         BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
662         OUT_RING  (0);
663         BEGIN_RING(Nv3D, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
664         OUT_RING  (0);
665         OUT_RING  (NV40TCL_COLOR_LOGIC_OP_COPY);
666         BEGIN_RING(Nv3D, NV40TCL_DITHER_ENABLE, 1);
667         OUT_RING  (0);
668         BEGIN_RING(Nv3D, NV40TCL_SHADE_MODEL, 1);
669         OUT_RING  (NV40TCL_SHADE_MODEL_SMOOTH);
670         BEGIN_RING(Nv3D, NV40TCL_POLYGON_OFFSET_FACTOR,2);
671         OUT_RINGf (0.0);
672         OUT_RINGf (0.0);
673         BEGIN_RING(Nv3D, NV40TCL_POLYGON_MODE_FRONT, 2);
674         OUT_RING  (NV40TCL_POLYGON_MODE_FRONT_FILL);
675         OUT_RING  (NV40TCL_POLYGON_MODE_BACK_FILL);
676         BEGIN_RING(Nv3D, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
677         for (i=0;i<0x20;i++)
678                 OUT_RING  (0xFFFFFFFF);
679         for (i=0;i<16;i++) {
680                 BEGIN_RING(Nv3D, NV40TCL_TEX_ENABLE(i), 1);
681                 OUT_RING  (0);
682         }
683
684         BEGIN_RING(Nv3D, 0x1d78, 1);
685         OUT_RING  (0x110);
686
687         BEGIN_RING(Nv3D, NV40TCL_RT_ENABLE, 1);
688         OUT_RING  (NV40TCL_RT_ENABLE_COLOR0);
689
690         BEGIN_RING(Nv3D, NV40TCL_RT_HORIZ, 2);
691         OUT_RING  ((4096 << 16));
692         OUT_RING  ((4096 << 16));
693         BEGIN_RING(Nv3D, NV40TCL_SCISSOR_HORIZ, 2);
694         OUT_RING  ((4096 << 16));
695         OUT_RING  ((4096 << 16));
696         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_HORIZ, 2);
697         OUT_RING  ((4096 << 16));
698         OUT_RING  ((4096 << 16));
699         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
700         OUT_RING  ((4095 << 16));
701         OUT_RING  ((4095 << 16));
702
703         return TRUE;
704 }
705
706 #endif /* ENABLE_NV30EXA */