Initial DRI2 support.
[nouveau] / src / nv40_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22
23 #include "nv_include.h"
24 #include "nv30_shaders.h"
25
26 typedef struct nv_pict_surface_format {
27         int      pict_fmt;
28         uint32_t card_fmt;
29 } nv_pict_surface_format_t;
30
31 typedef struct nv_pict_texture_format {
32         int      pict_fmt;
33         uint32_t card_fmt;
34         uint32_t card_swz;
35 } nv_pict_texture_format_t;
36
37 typedef struct nv_pict_op {
38         Bool     src_alpha;
39         Bool     dst_alpha;
40         uint32_t src_card_op;
41         uint32_t dst_card_op;
42 } nv_pict_op_t;
43
44 typedef struct nv40_exa_state {
45         Bool have_mask;
46
47         struct {
48                 PictTransformPtr transform;
49                 float width;
50                 float height;
51         } unit[2];
52 } nv40_exa_state_t;
53 static nv40_exa_state_t exa_state;
54 #define NV40EXA_STATE nv40_exa_state_t *state = &exa_state
55
56 static nv_pict_surface_format_t
57 NV40SurfaceFormat[] = {
58         { PICT_a8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 },
59         { PICT_x8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 },
60         { PICT_r5g6b5   , NV40TCL_RT_FORMAT_COLOR_R5G6B5   },
61         { PICT_a8       , NV40TCL_RT_FORMAT_COLOR_B8       },
62         { -1, ~0 }
63 };
64
65 static nv_pict_surface_format_t *
66 NV40_GetPictSurfaceFormat(int format)
67 {
68         int i = 0;
69
70         while (NV40SurfaceFormat[i].pict_fmt != -1) {
71                 if (NV40SurfaceFormat[i].pict_fmt == format)
72                         return &NV40SurfaceFormat[i];
73                 i++;
74         }
75
76         return NULL;
77 }
78
79 enum {
80         NV40EXA_FPID_PASS_COL0 = 0,
81         NV40EXA_FPID_PASS_TEX0 = 1,
82         NV40EXA_FPID_COMPOSITE_MASK = 2,
83         NV40EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
84         NV40EXA_FPID_COMPOSITE_MASK_CA = 4,
85         NV40EXA_FPID_MAX = 5
86 } NV40EXA_FPID;
87
88 static nv_shader_t *nv40_fp_map[NV40EXA_FPID_MAX] = {
89         &nv30_fp_pass_col0,
90         &nv30_fp_pass_tex0,
91         &nv30_fp_composite_mask,
92         &nv30_fp_composite_mask_sa_ca,
93         &nv30_fp_composite_mask_ca
94 };
95
96 static nv_shader_t *nv40_fp_map_a8[NV40EXA_FPID_MAX];
97
98 static void
99 NV40EXAHackupA8Shaders(ScrnInfoPtr pScrn)
100 {
101         int s;
102
103         for (s = 0; s < NV40EXA_FPID_MAX; s++) {
104                 nv_shader_t *def, *a8;
105
106                 def = nv40_fp_map[s];
107                 a8 = xcalloc(1, sizeof(nv_shader_t));
108                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
109                 a8->size = def->size + 4;
110                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
111                 nv40_fp_map_a8[s] = a8;
112
113                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
114                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
115                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
116                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
117                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
118         }
119 }
120
121 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
122   {                                                                            \
123   PICT_##r, NV40TCL_TEX_FORMAT_FORMAT_##tf,                                    \
124   NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y |          \
125   NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w |          \
126   NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y |          \
127   NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w,           \
128   }
129 static nv_pict_texture_format_t
130 NV40TextureFormat[] = {
131         _(a8r8g8b8, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
132         _(x8r8g8b8, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
133         _(x8b8g8r8, A8R8G8B8,   S1,   S1,   S1,  ONE, Z, Y, X, W),
134         _(a1r5g5b5, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
135         _(x1r5g5b5, A1R5G5B5,   S1,   S1,   S1,  ONE, X, Y, Z, W),
136         _(  r5g6b5,   R5G6B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
137         _(      a8,       L8, ZERO, ZERO, ZERO,   S1, X, X, X, X),
138         { -1, ~0, ~0 }
139 };
140 #undef _
141
142 static nv_pict_texture_format_t *
143 NV40_GetPictTextureFormat(int format)
144 {
145         int i = 0;
146
147         while (NV40TextureFormat[i].pict_fmt != -1) {
148                 if (NV40TextureFormat[i].pict_fmt == format)
149                         return &NV40TextureFormat[i];
150                 i++;
151         }
152
153         return NULL;
154 }
155
156 #define SF(bf) (NV40TCL_BLEND_FUNC_SRC_RGB_##bf |                              \
157                 NV40TCL_BLEND_FUNC_SRC_ALPHA_##bf)
158 #define DF(bf) (NV40TCL_BLEND_FUNC_DST_RGB_##bf |                              \
159                 NV40TCL_BLEND_FUNC_DST_ALPHA_##bf)
160 static nv_pict_op_t 
161 NV40PictOp[] = {
162 /* Clear       */ { 0, 0, SF(               ZERO), DF(               ZERO) },
163 /* Src         */ { 0, 0, SF(                ONE), DF(               ZERO) },
164 /* Dst         */ { 0, 0, SF(               ZERO), DF(                ONE) },
165 /* Over        */ { 1, 0, SF(                ONE), DF(ONE_MINUS_SRC_ALPHA) },
166 /* OverReverse */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(                ONE) },
167 /* In          */ { 0, 1, SF(          DST_ALPHA), DF(               ZERO) },
168 /* InReverse   */ { 1, 0, SF(               ZERO), DF(          SRC_ALPHA) },
169 /* Out         */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(               ZERO) },
170 /* OutReverse  */ { 1, 0, SF(               ZERO), DF(ONE_MINUS_SRC_ALPHA) },
171 /* Atop        */ { 1, 1, SF(          DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
172 /* AtopReverse */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(          SRC_ALPHA) },
173 /* Xor         */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
174 /* Add         */ { 0, 0, SF(                ONE), DF(                ONE) }
175 };
176
177 static nv_pict_op_t *
178 NV40_GetPictOpRec(int op)
179 {
180         if (op >= PictOpSaturate)
181                 return NULL;
182         return &NV40PictOp[op];
183 }
184
185 static void
186 NV40_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
187                 PictFormatShort dest_format, Bool component_alpha)
188 {
189         NVPtr pNv = NVPTR(pScrn);
190         struct nouveau_channel *chan = pNv->chan;
191         struct nouveau_grobj *curie = pNv->Nv3D;
192         uint32_t sblend, dblend;
193
194         sblend = blend->src_card_op;
195         dblend = blend->dst_card_op;
196
197         if (blend->dst_alpha) {
198                 if (!PICT_FORMAT_A(dest_format)) {
199                         if (sblend == SF(DST_ALPHA)) {
200                                 sblend = SF(ONE);
201                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
202                                 sblend = SF(ZERO);
203                         }
204                 } else if (dest_format == PICT_a8) {
205                         if (sblend == SF(DST_ALPHA)) {
206                                 sblend = SF(DST_COLOR);
207                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
208                                 sblend = SF(ONE_MINUS_DST_COLOR);
209                         }
210                 }
211         }
212
213         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
214                 if (dblend == DF(SRC_ALPHA)) {
215                         dblend = DF(SRC_COLOR);
216                 } else if (dblend == DF(ONE_MINUS_SRC_ALPHA)) {
217                         dblend = DF(ONE_MINUS_SRC_COLOR);
218                 }
219         }
220
221         if (sblend == SF(ONE) && dblend == DF(ZERO)) {
222                 BEGIN_RING(chan, curie, NV40TCL_BLEND_ENABLE, 1);
223                 OUT_RING  (chan, 0);
224         } else {
225                 BEGIN_RING(chan, curie, NV40TCL_BLEND_ENABLE, 5);
226                 OUT_RING  (chan, 1);
227                 OUT_RING  (chan, sblend);
228                 OUT_RING  (chan, dblend);
229                 OUT_RING  (chan, 0x00000000);
230                 OUT_RING  (chan, NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD |
231                            NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD);
232         }
233 }
234
235 static Bool
236 NV40EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
237 {
238         NVPtr pNv = NVPTR(pScrn);
239         struct nouveau_channel *chan = pNv->chan;
240         struct nouveau_grobj *curie = pNv->Nv3D;
241         struct nouveau_pixmap *tex = nouveau_pixmap(pPix);
242         nv_pict_texture_format_t *fmt;
243         NV40EXA_STATE;
244
245         fmt = NV40_GetPictTextureFormat(pPict->format);
246         if (!fmt)
247                 return FALSE;
248
249         BEGIN_RING(chan, curie, NV40TCL_TEX_OFFSET(unit), 8);
250         OUT_RELOCl(chan, tex->bo, 0,
251                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
252         OUT_RELOCd(chan, tex->bo, fmt->card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
253                          NV40TCL_TEX_FORMAT_DIMS_2D | 0x8000 |
254                          NV40TCL_TEX_FORMAT_NO_BORDER |
255                          (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
256                          NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD,
257                          NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
258         if (pPict->repeat) {
259                 switch(pPict->repeatType) {
260                 case RepeatPad:
261                         OUT_RING  (chan, NV40TCL_TEX_WRAP_S_CLAMP | 
262                                          NV40TCL_TEX_WRAP_T_CLAMP |
263                                          NV40TCL_TEX_WRAP_R_CLAMP);
264                         break;
265                 case RepeatReflect:
266                         OUT_RING  (chan, NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT |
267                                          NV40TCL_TEX_WRAP_T_MIRRORED_REPEAT |
268                                          NV40TCL_TEX_WRAP_R_MIRRORED_REPEAT);
269                         break;
270                 case RepeatNormal:
271                 default:
272                         OUT_RING  (chan, NV40TCL_TEX_WRAP_S_REPEAT |
273                                          NV40TCL_TEX_WRAP_T_REPEAT |
274                                          NV40TCL_TEX_WRAP_R_REPEAT);
275                         break;
276                 }
277         } else {
278                 OUT_RING  (chan, NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER |
279                                  NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER |
280                                  NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER);
281         }
282         OUT_RING  (chan, NV40TCL_TEX_ENABLE_ENABLE);
283         OUT_RING  (chan, fmt->card_swz);
284         if (pPict->filter == PictFilterBilinear) {
285                 OUT_RING  (chan, NV40TCL_TEX_FILTER_MIN_LINEAR |
286                                  NV40TCL_TEX_FILTER_MAG_LINEAR | 0x3fd6);
287         } else {
288                 OUT_RING  (chan, NV40TCL_TEX_FILTER_MIN_NEAREST |
289                                  NV40TCL_TEX_FILTER_MAG_NEAREST | 0x3fd6);
290         }
291         OUT_RING  (chan, (pPix->drawable.width << 16) | pPix->drawable.height);
292         OUT_RING  (chan, 0); /* border ARGB */
293         BEGIN_RING(chan, curie, NV40TCL_TEX_SIZE1(unit), 1);
294         OUT_RING  (chan, (1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
295                          (uint32_t)exaGetPixmapPitch(pPix));
296
297         state->unit[unit].width         = (float)pPix->drawable.width;
298         state->unit[unit].height        = (float)pPix->drawable.height;
299         state->unit[unit].transform     = pPict->transform;
300         return TRUE;
301 }
302
303 static Bool
304 NV40_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
305 {
306         NVPtr pNv = NVPTR(pScrn);
307         struct nouveau_channel *chan = pNv->chan;
308         struct nouveau_grobj *curie = pNv->Nv3D;
309         struct nouveau_pixmap *rt = nouveau_pixmap(pPix);
310         nv_pict_surface_format_t *fmt;
311
312         fmt = NV40_GetPictSurfaceFormat(format);
313         if (!fmt) {
314                 ErrorF("AIII no format\n");
315                 return FALSE;
316         }
317
318         BEGIN_RING(chan, curie, NV40TCL_RT_FORMAT, 3);
319         OUT_RING  (chan, NV40TCL_RT_FORMAT_TYPE_LINEAR |
320                    NV40TCL_RT_FORMAT_ZETA_Z24S8 |
321                    fmt->card_fmt);
322         OUT_RING  (chan, exaGetPixmapPitch(pPix));
323         OUT_RELOCl(chan, rt->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
324
325         return TRUE;
326 }
327
328 static Bool
329 NV40EXACheckCompositeTexture(PicturePtr pPict)
330 {
331         nv_pict_texture_format_t *fmt;
332         int w = pPict->pDrawable->width;
333         int h = pPict->pDrawable->height;
334
335         if ((w > 4096) || (h > 4096))
336                 NOUVEAU_FALLBACK("picture too large, %dx%d\n", w, h);
337
338         fmt = NV40_GetPictTextureFormat(pPict->format);
339         if (!fmt)
340                 NOUVEAU_FALLBACK("picture format 0x%08x not supported\n",
341                                 pPict->format);
342
343         if (pPict->filter != PictFilterNearest &&
344             pPict->filter != PictFilterBilinear)
345                 NOUVEAU_FALLBACK("filter 0x%x not supported\n", pPict->filter);
346
347         return TRUE;
348 }
349
350 Bool
351 NV40EXACheckComposite(int op, PicturePtr psPict,
352                               PicturePtr pmPict,
353                               PicturePtr pdPict)
354 {
355         nv_pict_surface_format_t *fmt;
356         nv_pict_op_t *opr;
357
358         opr = NV40_GetPictOpRec(op);
359         if (!opr)
360                 NOUVEAU_FALLBACK("unsupported blend op 0x%x\n", op);
361
362         fmt = NV40_GetPictSurfaceFormat(pdPict->format);
363         if (!fmt)
364                 NOUVEAU_FALLBACK("dst picture format 0x%08x not supported\n",
365                                 pdPict->format);
366
367         if (!NV40EXACheckCompositeTexture(psPict))
368                 NOUVEAU_FALLBACK("src picture\n");
369         if (pmPict) {
370                 if (pmPict->componentAlpha && 
371                     PICT_FORMAT_RGB(pmPict->format) &&
372                     opr->src_alpha && opr->src_card_op != SF(ZERO))
373                         NOUVEAU_FALLBACK("mask CA + SA\n");
374                 if (!NV40EXACheckCompositeTexture(pmPict))
375                         NOUVEAU_FALLBACK("mask picture\n");
376         }
377
378         return TRUE;
379 }
380
381 Bool
382 NV40EXAPrepareComposite(int op, PicturePtr psPict,
383                                 PicturePtr pmPict,
384                                 PicturePtr pdPict,
385                                 PixmapPtr  psPix,
386                                 PixmapPtr  pmPix,
387                                 PixmapPtr  pdPix)
388 {
389         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
390         NVPtr pNv = NVPTR(pScrn);
391         struct nouveau_channel *chan = pNv->chan;
392         struct nouveau_grobj *curie = pNv->Nv3D;
393         nv_pict_op_t *blend;
394         int fpid = NV40EXA_FPID_PASS_COL0;
395         NV40EXA_STATE;
396
397         blend = NV40_GetPictOpRec(op);
398
399         NV40_SetupBlend(pScrn, blend, pdPict->format,
400                         (pmPict && pmPict->componentAlpha &&
401                          PICT_FORMAT_RGB(pmPict->format)));
402
403         NV40_SetupSurface(pScrn, pdPix, pdPict->format);
404         NV40EXATexture(pScrn, psPix, psPict, 0);
405
406         NV40_LoadVtxProg(pScrn, &nv40_vp_exa_render);
407         if (pmPict) {
408                 NV40EXATexture(pScrn, pmPix, pmPict, 1);
409
410                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
411                         if (blend->src_alpha)
412                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_SA_CA;
413                         else
414                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_CA;
415                 } else {
416                         fpid = NV40EXA_FPID_COMPOSITE_MASK;
417                 }
418
419                 state->have_mask = TRUE;
420         } else {
421                 fpid = NV40EXA_FPID_PASS_TEX0;
422
423                 state->have_mask = FALSE;
424         }
425
426         if (pdPict->format == PICT_a8)
427                 NV40_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
428         else
429                 NV40_LoadFragProg(pScrn, nv40_fp_map[fpid]);
430
431         /* Appears to be some kind of cache flush, needed here at least
432          * sometimes.. funky text rendering otherwise :)
433          */
434         BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
435         OUT_RING  (chan, 2);
436         BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
437         OUT_RING  (chan, 1);
438
439         BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
440         OUT_RING  (chan, NV40TCL_BEGIN_END_TRIANGLES);
441
442         return TRUE;
443 }
444
445 #define xFixedToFloat(v) \
446         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
447
448 static inline void
449 NV40EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
450                                           float *x_ret, float *y_ret)
451 {
452         if (t) {
453                 PictVector v;
454                 v.vector[0] = IntToxFixed(x);
455                 v.vector[1] = IntToxFixed(y);
456                 v.vector[2] = xFixed1;
457                 PictureTransformPoint(t, &v);
458                 *x_ret = xFixedToFloat(v.vector[0]) / sx;
459                 *y_ret = xFixedToFloat(v.vector[1]) / sy;
460         } else {
461                 *x_ret = (float)x / sx;
462                 *y_ret = (float)y / sy;
463         }
464 }
465
466 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
467         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(8), 4);                  \
468         OUT_RINGf (chan, (sx)); OUT_RINGf (chan, (sy));                        \
469         OUT_RINGf (chan, (mx)); OUT_RINGf (chan, (my));                        \
470         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2I(0), 1);                    \
471         OUT_RING  (chan, ((dy)<<16)|(dx));                                     \
472 } while(0)
473 #define CV_OUT(sx,sy,dx,dy) do {                                               \
474         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(8), 2);                  \
475         OUT_RINGf (chan, (sx)); OUT_RINGf (chan, (sy));                        \
476         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2I(0), 1);                    \
477         OUT_RING  (chan, ((dy)<<16)|(dx));                                     \
478 } while(0)
479
480 void
481 NV40EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
482                                   int maskX, int maskY,
483                                   int dstX , int dstY,
484                                   int width, int height)
485 {
486         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
487         NVPtr pNv = NVPTR(pScrn);
488         struct nouveau_channel *chan = pNv->chan;
489         struct nouveau_grobj *curie = pNv->Nv3D;
490         float sX0, sX1, sX2, sY0, sY1, sY2;
491         float mX0, mX1, mX2, mY0, mY1, mY2;
492         NV40EXA_STATE;
493
494         /* We're drawing a triangle, we need to scissor it to a quad. */
495         /* The scissors are here for a good reason, we don't get the full
496          * image, but just a part.
497          */
498         /* Handling the cliprects is done for us already. */
499         BEGIN_RING(chan, curie, NV40TCL_SCISSOR_HORIZ, 2);
500         OUT_RING  (chan, (width << 16) | dstX);
501         OUT_RING  (chan, (height << 16) | dstY);
502
503         NV40EXATransformCoord(state->unit[0].transform, srcX, srcY - height,
504                               state->unit[0].width, state->unit[0].height,
505                               &sX0, &sY0);
506         NV40EXATransformCoord(state->unit[0].transform, srcX, srcY + height,
507                               state->unit[0].width, state->unit[0].height,
508                               &sX1, &sY1);
509         NV40EXATransformCoord(state->unit[0].transform,
510                               srcX + 2*width, srcY + height,
511                               state->unit[0].width,
512                               state->unit[0].height, &sX2, &sY2);
513
514         if (state->have_mask) {
515                 NV40EXATransformCoord(state->unit[1].transform,
516                                       maskX, maskY - height,
517                                       state->unit[1].width,
518                                       state->unit[1].height, &mX0, &mY0);
519                 NV40EXATransformCoord(state->unit[1].transform,
520                                       maskX, maskY + height,
521                                       state->unit[1].width,
522                                       state->unit[1].height, &mX1, &mY1);
523                 NV40EXATransformCoord(state->unit[1].transform,
524                                       maskX + 2*width, maskY + height,
525                                       state->unit[1].width,
526                                       state->unit[1].height, &mX2, &mY2);
527
528                 CV_OUTm(sX0, sY0, mX0, mY0, dstX, dstY - height);
529                 CV_OUTm(sX1, sY1, mX1, mY1, dstX, dstY + height);
530                 CV_OUTm(sX2, sY2, mX2, mY2, dstX + 2*width, dstY + height);
531         } else {
532                 CV_OUT(sX0, sY0, dstX, dstY - height);
533                 CV_OUT(sX1, sY1, dstX, dstY + height);
534                 CV_OUT(sX2, sY2, dstX + 2*width, dstY + height);
535         }
536 }
537
538 void
539 NV40EXADoneComposite(PixmapPtr pdPix)
540 {
541         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
542         NVPtr pNv = NVPTR(pScrn);
543         struct nouveau_channel *chan = pNv->chan;
544         struct nouveau_grobj *curie = pNv->Nv3D;
545
546         BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
547         OUT_RING  (chan, NV40TCL_BEGIN_END_STOP);
548 }
549
550 #define NV40TCL_CHIPSET_4X_MASK 0x00000baf
551 #define NV44TCL_CHIPSET_4X_MASK 0x00005450
552 Bool
553 NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
554 {
555         NVPtr pNv = NVPTR(pScrn);
556         struct nouveau_channel *chan = pNv->chan;
557         struct nouveau_grobj *curie;
558         uint32_t class = 0, chipset;
559         int next_hw_id = 0, next_hw_offset = 0, i;
560
561         if (!nv40_fp_map_a8[0])
562                 NV40EXAHackupA8Shaders(pScrn);
563
564         chipset = (nvReadMC(pNv, NV_PMC_BOOT_0) >> 20) & 0xff;
565
566         if ( (chipset & 0xf0) == NV_ARCH_40) {
567                 chipset &= 0xf;
568                 if (NV40TCL_CHIPSET_4X_MASK & (1<<chipset))
569                         class = NV40TCL;
570                 else if (NV44TCL_CHIPSET_4X_MASK & (1<<chipset))
571                         class = NV44TCL;
572                 else {
573                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
574                                         "NV40EXA: Unknown chipset NV4%1x\n", chipset);
575                         return FALSE;
576                 }
577         } else if ( (chipset & 0xf0) == 0x60) {
578                 class = NV44TCL;
579         } else
580                 return TRUE;
581
582         if (!pNv->Nv3D) {
583                 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
584                         return FALSE;
585         }
586         curie = pNv->Nv3D;
587
588         if (!pNv->shader_mem) {
589                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
590                                    0, 0x1000, &pNv->shader_mem)) {
591                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
592                                    "Couldn't alloc fragprog buffer!\n");
593                         nouveau_grobj_free(&pNv->Nv3D);
594                         return FALSE;
595                 }
596                 if (nouveau_bo_map(pNv->shader_mem, NOUVEAU_BO_RDWR)) {
597                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
598                                    "Couldn't map fragprog buffer!\n");
599                         nouveau_grobj_free(&pNv->Nv3D);
600                         return FALSE;
601                 }
602         }
603
604         BEGIN_RING(chan, curie, NV40TCL_DMA_NOTIFY, 1);
605         OUT_RING  (chan, pNv->notify0->handle);
606         BEGIN_RING(chan, curie, NV40TCL_DMA_TEXTURE0, 2);
607         OUT_RING  (chan, chan->vram->handle);
608         OUT_RING  (chan, chan->gart->handle);
609         BEGIN_RING(chan, curie, NV40TCL_DMA_COLOR0, 2);
610         OUT_RING  (chan, chan->vram->handle);
611         OUT_RING  (chan, chan->vram->handle);
612
613         /* voodoo */
614         BEGIN_RING(chan, curie, 0x1ea4, 3);
615         OUT_RING  (chan, 0x00000010);
616         OUT_RING  (chan, 0x01000100);
617         OUT_RING  (chan, 0xff800006);
618         BEGIN_RING(chan, curie, 0x1fc4, 1);
619         OUT_RING  (chan, 0x06144321);
620         BEGIN_RING(chan, curie, 0x1fc8, 2);
621         OUT_RING  (chan, 0xedcba987);
622         OUT_RING  (chan, 0x00000021);
623         BEGIN_RING(chan, curie, 0x1fd0, 1);
624         OUT_RING  (chan, 0x00171615);
625         BEGIN_RING(chan, curie, 0x1fd4, 1);
626         OUT_RING  (chan, 0x001b1a19);
627         BEGIN_RING(chan, curie, 0x1ef8, 1);
628         OUT_RING  (chan, 0x0020ffff);
629         BEGIN_RING(chan, curie, 0x1d64, 1);
630         OUT_RING  (chan, 0x00d30000);
631         BEGIN_RING(chan, curie, 0x1e94, 1);
632         OUT_RING  (chan, 0x00000001);
633
634         /* This removes the the stair shaped tearing that i get. */
635         /* Verified on one G70 card that it doesn't cause regressions for people without the problem. */
636         /* The blob sets this up by default for NV43. */
637         BEGIN_RING(chan, curie, 0x1450, 1);
638         OUT_RING  (chan, 0x0000000F);
639
640         BEGIN_RING(chan, curie, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
641         OUT_RINGf (chan, 0.0);
642         OUT_RINGf (chan, 0.0);
643         OUT_RINGf (chan, 0.0);
644         OUT_RINGf (chan, 0.0);
645         OUT_RINGf (chan, 1.0);
646         OUT_RINGf (chan, 1.0);
647         OUT_RINGf (chan, 1.0);
648         OUT_RINGf (chan, 0.0);
649
650         /* default 3D state */
651         /*XXX: replace with the same state that the DRI emits on startup */
652         BEGIN_RING(chan, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1);
653         OUT_RING  (chan, 0);
654         BEGIN_RING(chan, curie, NV40TCL_STENCIL_BACK_ENABLE, 1);
655         OUT_RING  (chan, 0);
656         BEGIN_RING(chan, curie, NV40TCL_ALPHA_TEST_ENABLE, 1);
657         OUT_RING  (chan, 0);
658         BEGIN_RING(chan, curie, NV40TCL_DEPTH_WRITE_ENABLE, 2);
659         OUT_RING  (chan, 0);
660         OUT_RING  (chan, 0); 
661         BEGIN_RING(chan, curie, NV40TCL_COLOR_MASK, 1);
662         OUT_RING  (chan, 0x01010101); /* TR,TR,TR,TR */
663         BEGIN_RING(chan, curie, NV40TCL_CULL_FACE_ENABLE, 1);
664         OUT_RING  (chan, 0);
665         BEGIN_RING(chan, curie, NV40TCL_BLEND_ENABLE, 1);
666         OUT_RING  (chan, 0);
667         BEGIN_RING(chan, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
668         OUT_RING  (chan, 0);
669         OUT_RING  (chan, NV40TCL_COLOR_LOGIC_OP_COPY);
670         BEGIN_RING(chan, curie, NV40TCL_DITHER_ENABLE, 1);
671         OUT_RING  (chan, 0);
672         BEGIN_RING(chan, curie, NV40TCL_SHADE_MODEL, 1);
673         OUT_RING  (chan, NV40TCL_SHADE_MODEL_SMOOTH);
674         BEGIN_RING(chan, curie, NV40TCL_POLYGON_OFFSET_FACTOR,2);
675         OUT_RINGf (chan, 0.0);
676         OUT_RINGf (chan, 0.0);
677         BEGIN_RING(chan, curie, NV40TCL_POLYGON_MODE_FRONT, 2);
678         OUT_RING  (chan, NV40TCL_POLYGON_MODE_FRONT_FILL);
679         OUT_RING  (chan, NV40TCL_POLYGON_MODE_BACK_FILL);
680         BEGIN_RING(chan, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
681         for (i=0;i<0x20;i++)
682                 OUT_RING  (chan, 0xFFFFFFFF);
683         for (i=0;i<16;i++) {
684                 BEGIN_RING(chan, curie, NV40TCL_TEX_ENABLE(i), 1);
685                 OUT_RING  (chan, 0);
686         }
687
688         BEGIN_RING(chan, curie, 0x1d78, 1);
689         OUT_RING  (chan, 0x110);
690
691         BEGIN_RING(chan, curie, NV40TCL_RT_ENABLE, 1);
692         OUT_RING  (chan, NV40TCL_RT_ENABLE_COLOR0);
693
694         BEGIN_RING(chan, curie, NV40TCL_RT_HORIZ, 2);
695         OUT_RING  (chan, (4096 << 16));
696         OUT_RING  (chan, (4096 << 16));
697         BEGIN_RING(chan, curie, NV40TCL_SCISSOR_HORIZ, 2);
698         OUT_RING  (chan, (4096 << 16));
699         OUT_RING  (chan, (4096 << 16));
700         BEGIN_RING(chan, curie, NV40TCL_VIEWPORT_HORIZ, 2);
701         OUT_RING  (chan, (4096 << 16));
702         OUT_RING  (chan, (4096 << 16));
703         BEGIN_RING(chan, curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
704         OUT_RING  (chan, (4095 << 16));
705         OUT_RING  (chan, (4095 << 16));
706
707         NV40_UploadVtxProg(pNv, &nv40_vp_exa_render, &next_hw_id);
708         for (i = 0; i < NV40EXA_FPID_MAX; i++) {
709                 NV30_UploadFragProg(pNv, nv40_fp_map[i], &next_hw_offset);
710                 NV30_UploadFragProg(pNv, nv40_fp_map_a8[i], &next_hw_offset);
711         }
712
713         NV40_UploadVtxProg(pNv, &nv40_vp_video, &next_hw_id);
714         NV30_UploadFragProg(pNv, &nv40_fp_yv12_bicubic, &next_hw_offset);
715         NV30_UploadFragProg(pNv, &nv30_fp_yv12_bilinear, &next_hw_offset);
716
717         return TRUE;
718 }