Merge branch 'randr-1.2' into nv50-branch
[nouveau] / src / nv30_exa.c
1 #include "nv_include.h"
2 #include "nv_shaders.h"
3
4 #define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(d)         (0x00001880 + d * 0x0008)
5 #define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_Y(d)         (0x00001884 + d * 0x0008)
6 #define NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(d)           (0x00001900 + d * 0x0004)
7
8 typedef struct nv_pict_surface_format {
9         int      pict_fmt;
10         uint32_t card_fmt;
11 } nv_pict_surface_format_t;
12
13 typedef struct nv_pict_texture_format {
14         int      pict_fmt;
15         uint32_t card_fmt;
16         uint32_t card_swz;
17 } nv_pict_texture_format_t;
18
19 typedef struct nv_pict_op {
20         Bool     src_alpha;
21         Bool     dst_alpha;
22         uint32_t src_card_op;
23         uint32_t dst_card_op;
24 } nv_pict_op_t;
25
26 typedef struct nv30_exa_state {
27         Bool have_mask;
28
29         struct {
30                 PictTransformPtr transform;
31                 float width;
32                 float height;
33         } unit[2];
34 } nv30_exa_state_t;
35 static nv30_exa_state_t exa_state;
36 #define NV30EXA_STATE nv30_exa_state_t *state = &exa_state
37
38 static nv_pict_surface_format_t
39 NV30SurfaceFormat[] = {
40         { PICT_a8r8g8b8 , 0x148 },
41         { PICT_x8r8g8b8 , 0x145 },
42         { PICT_r5g6b5   , 0x143 },
43 //      { PICT_a8       , 0x149 },
44         { -1, ~0 }
45 };
46
47 static nv_pict_surface_format_t *
48 NV30_GetPictSurfaceFormat(int format)
49 {
50         int i = 0;
51
52         while (NV30SurfaceFormat[i].pict_fmt != -1) {
53                 if (NV30SurfaceFormat[i].pict_fmt == format)
54                         return &NV30SurfaceFormat[i];
55                 i++;
56         }
57
58         return NULL;
59 }
60
61
62 /* should be in nouveau_reg.h at some point.. */
63 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT      14
64 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ZERO        0
65 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_ONE         1
66 #define NV30TCL_TX_SWIZZLE_UNIT_S0_X_S1          2
67 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT      12
68 #define NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT      10
69 #define NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT       8
70 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT       6
71 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_X           3
72 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Y           2
73 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_Z           1
74 #define NV30TCL_TX_SWIZZLE_UNIT_S1_X_W           0
75 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT       4
76 #define NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT       2
77 #define NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT       0
78
79 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                       \
80   {                                                                           \
81   PICT_##r,                                                                   \
82   (tf),                                                                       \
83   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0x << NV30TCL_TX_SWIZZLE_UNIT_S0_X_SHIFT)|\
84   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0y << NV30TCL_TX_SWIZZLE_UNIT_S0_Y_SHIFT)|\
85   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0z << NV30TCL_TX_SWIZZLE_UNIT_S0_Z_SHIFT)|\
86   (NV30TCL_TX_SWIZZLE_UNIT_S0_X_##ts0w << NV30TCL_TX_SWIZZLE_UNIT_S0_W_SHIFT)|\
87   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1x << NV30TCL_TX_SWIZZLE_UNIT_S1_X_SHIFT)|\
88   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1y << NV30TCL_TX_SWIZZLE_UNIT_S1_Y_SHIFT)|\
89   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1z << NV30TCL_TX_SWIZZLE_UNIT_S1_Z_SHIFT)|\
90   (NV30TCL_TX_SWIZZLE_UNIT_S1_X_##ts1w << NV30TCL_TX_SWIZZLE_UNIT_S1_W_SHIFT)\
91   }
92 static nv_pict_texture_format_t
93 NV30TextureFormat[] = {
94         _(a8r8g8b8, 0x85,   S1,   S1,   S1,   S1, X, Y, Z, W),
95         _(x8r8g8b8, 0x85,   S1,   S1,   S1,  ONE, X, Y, Z, W),
96         _(x8b8g8r8, 0x85,   S1,   S1,   S1,  ONE, Z, Y, X, W),
97         _(a1r5g5b5, 0x82,   S1,   S1,   S1,   S1, X, Y, Z, W),
98         _(x1r5g5b5, 0x82,   S1,   S1,   S1,  ONE, X, Y, Z, W),
99         _(  r5g6b5, 0x84,   S1,   S1,   S1,   S1, X, Y, Z, W),
100         _(      a8, 0x81, ZERO, ZERO, ZERO,   S1, X, X, X, X),
101         { -1, ~0, ~0 }
102 };
103
104 static nv_pict_texture_format_t *
105 NV30_GetPictTextureFormat(int format)
106 {
107         int i = 0;
108
109         while (NV30TextureFormat[i].pict_fmt != -1) {
110                 if (NV30TextureFormat[i].pict_fmt == format)
111                         return &NV30TextureFormat[i];
112                 i++;
113         }
114
115         return NULL;
116 }
117
118 #define NV30_TCL_PRIMITIVE_3D_BF_ZERO                                     0x0000
119 #define NV30_TCL_PRIMITIVE_3D_BF_ONE                                      0x0001
120 #define NV30_TCL_PRIMITIVE_3D_BF_SRC_COLOR                                0x0300
121 #define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_COLOR                      0x0301
122 #define NV30_TCL_PRIMITIVE_3D_BF_SRC_ALPHA                                0x0302
123 #define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_SRC_ALPHA                      0x0303
124 #define NV30_TCL_PRIMITIVE_3D_BF_DST_ALPHA                                0x0304
125 #define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_ALPHA                      0x0305
126 #define NV30_TCL_PRIMITIVE_3D_BF_DST_COLOR                                0x0306
127 #define NV30_TCL_PRIMITIVE_3D_BF_ONE_MINUS_DST_COLOR                      0x0307
128 #define NV30_TCL_PRIMITIVE_3D_BF_ALPHA_SATURATE                           0x0308
129 #define BF(bf) NV30_TCL_PRIMITIVE_3D_BF_##bf
130
131 static nv_pict_op_t 
132 NV30PictOp[] = {
133 /* Clear       */ { 0, 0, BF(               ZERO), BF(               ZERO) },
134 /* Src         */ { 0, 0, BF(                ONE), BF(               ZERO) },
135 /* Dst         */ { 0, 0, BF(               ZERO), BF(                ONE) },
136 /* Over        */ { 1, 0, BF(                ONE), BF(ONE_MINUS_SRC_ALPHA) },
137 /* OverReverse */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(                ONE) },
138 /* In          */ { 0, 1, BF(          DST_ALPHA), BF(               ZERO) },
139 /* InReverse   */ { 1, 0, BF(               ZERO), BF(          SRC_ALPHA) },
140 /* Out         */ { 0, 1, BF(ONE_MINUS_DST_ALPHA), BF(               ZERO) },
141 /* OutReverse  */ { 1, 0, BF(               ZERO), BF(ONE_MINUS_SRC_ALPHA) },
142 /* Atop        */ { 1, 1, BF(          DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
143 /* AtopReverse */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(          SRC_ALPHA) },
144 /* Xor         */ { 1, 1, BF(ONE_MINUS_DST_ALPHA), BF(ONE_MINUS_SRC_ALPHA) },
145 /* Add         */ { 0, 0, BF(                ONE), BF(                ONE) }
146 };
147
148 static nv_pict_op_t *
149 NV30_GetPictOpRec(int op)
150 {
151         if (op >= PictOpSaturate)
152                 return NULL;
153         return &NV30PictOp[op];
154 }
155
156 #if 0
157 #define FALLBACK(fmt,args...) do {                                      \
158         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
159         return FALSE;                                                   \
160 } while(0)
161 #else
162 #define FALLBACK(fmt,args...) do { \
163         return FALSE;              \
164 } while(0)
165 #endif
166
167 static void
168 NV30_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
169 {
170         NVPtr pNv = NVPTR(pScrn);
171         static int next_hw_id = 0;
172         int i;
173
174         if (!shader->hw_id) {
175                 shader->hw_id = next_hw_id;
176
177                 NVDmaStart(pNv, NvSub3D,
178                                 NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_FROM_ID, 1);
179                 NVDmaNext (pNv, (shader->hw_id));
180
181                 for (i=0; i<shader->size; i+=4) {
182                         NVDmaStart(pNv, NvSub3D,
183                                         NV30_TCL_PRIMITIVE_3D_VP_UPLOAD_INST0,
184                                         4);
185                         NVDmaNext (pNv, shader->data[i + 0]);
186                         NVDmaNext (pNv, shader->data[i + 1]);
187                         NVDmaNext (pNv, shader->data[i + 2]);
188                         NVDmaNext (pNv, shader->data[i + 3]);
189                         next_hw_id++;
190                 }
191         }
192
193         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_PROGRAM_START_ID, 1);
194         NVDmaNext (pNv, (shader->hw_id));
195
196         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VP_IN_REG, 2);
197         NVDmaNext (pNv, shader->card_priv.NV30VP.vp_in_reg);
198         NVDmaNext (pNv, shader->card_priv.NV30VP.vp_out_reg);
199 }
200
201 static void
202 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
203 {
204         NVPtr pNv = NVPTR(pScrn);
205         static NVAllocRec *fp_mem = NULL;
206         static int next_hw_id_offset = 0;
207
208         if (!fp_mem) {
209                 fp_mem = NVAllocateMemory(pNv, NOUVEAU_MEM_FB, 0x1000);
210                 if (!fp_mem) {
211                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
212                                    "Couldn't alloc fragprog buffer!\n");
213                         return;
214                 }
215         }
216
217         if (!shader->hw_id) {
218                 memcpy(fp_mem->map + next_hw_id_offset, shader->data,
219                                                         shader->size *
220                                                         sizeof(uint32_t));
221
222                 shader->hw_id  = fp_mem->offset;
223                 shader->hw_id += next_hw_id_offset;
224
225                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
226                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
227         }
228
229         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_FP_ACTIVE_PROGRAM, 1);
230         NVDmaNext (pNv, shader->hw_id | 1);
231
232         if (pNv->Architecture == NV_30) {
233                 NVDmaStart(pNv, NvSub3D, 0x1d60, 1);
234                 NVDmaNext (pNv, 0); /* USES_KIL (1<<7) == 0 */
235                 NVDmaStart(pNv, NvSub3D, 0x1450, 1);
236                 NVDmaNext (pNv, shader->card_priv.NV30FP.num_regs << 16);
237         } else {
238                 NVDmaStart(pNv, NvSub3D, 0x1d60, 1);
239                 NVDmaNext (pNv, (0<<7) /* !USES_KIL */ |
240                          (shader->card_priv.NV30FP.num_regs << 24));
241         }
242 }
243
244 static void
245 NV30_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend, Bool dest_has_alpha,
246                                                         Bool component_alpha)
247 {
248         NVPtr pNv = NVPTR(pScrn);
249         uint32_t sblend, dblend;
250
251         sblend = blend->src_card_op;
252         dblend = blend->dst_card_op;
253
254         if (!dest_has_alpha && blend->dst_alpha) {
255                 if (sblend == BF(DST_ALPHA))
256                         sblend = BF(ONE);
257                 else if (sblend == BF(ONE_MINUS_DST_ALPHA))
258                         sblend = BF(ZERO);
259         }
260
261         if (component_alpha && blend->src_alpha) {
262                 if (dblend == BF(SRC_ALPHA))
263                         dblend = BF(SRC_COLOR);
264                 else if (dblend == BF(ONE_MINUS_SRC_ALPHA))
265                         dblend = BF(ONE_MINUS_SRC_COLOR);
266         }
267
268         if (sblend == BF(ONE) && dblend == BF(ZERO)) {
269         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
270         NVDmaNext (pNv, 0);
271         } else {
272         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 5);
273         NVDmaNext (pNv, 1);
274         NVDmaNext (pNv, (sblend << 16) | sblend);
275         NVDmaNext (pNv, (dblend << 16) | dblend);
276         NVDmaNext (pNv, 0x00000000);                    /* Blend colour */
277         NVDmaNext (pNv, (0x8006 << 16) | 0x8006);       /* FUNC_ADD, FUNC_ADD */
278         }
279 }
280
281 static Bool
282 NV30EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
283 {
284         NVPtr pNv = NVPTR(pScrn);
285         nv_pict_texture_format_t *fmt;
286         uint32_t card_filter, card_repeat;
287         NV30EXA_STATE;
288
289         fmt = NV30_GetPictTextureFormat(pPict->format);
290         if (!fmt)
291                 return FALSE;
292
293         if (pPict->repeat && pPict->repeatType == RepeatNormal)
294                 card_repeat = 1;
295         else
296                 card_repeat = 3;
297
298         if (pPict->filter == PictFilterBilinear)
299                 card_filter = 2;
300         else
301                 card_filter = 1;
302
303         NVDmaStart(pNv, NvSub3D,
304                         NV30_TCL_PRIMITIVE_3D_TX_ADDRESS_UNIT(unit), 8);
305         NVDmaNext (pNv, NVAccelGetPixmapOffset(pPix));
306         NVDmaNext (pNv, (2 << 4)  /* 2D */ |
307                         (fmt->card_fmt << 8) |
308                         (1 << 13) /* NPOT */ |
309                         (1<<16) /* 1 mipmap level */ |
310                         (1<<0) /* NvDmaFB */ |
311                         (1<<3) /* border disable? */);
312         NVDmaNext (pNv, (card_repeat <<  0) /* S */ |
313                         (card_repeat <<  8) /* T */ |
314                         (card_repeat << 16) /* R */);
315         NVDmaNext (pNv, 0x80000000);
316         NVDmaNext (pNv, fmt->card_swz);
317         NVDmaNext (pNv, (card_filter << 16) /* min */ |
318                         (card_filter << 24) /* mag */ |
319                         0x3fd6 /* engine lock */);
320         NVDmaNext (pNv, (pPix->drawable.width << 16) | pPix->drawable.height);
321         NVDmaNext (pNv, 0); /* border ARGB */
322         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_TX_DEPTH_UNIT(unit), 1);
323         NVDmaNext (pNv, (1 << 20) /* depth */ |
324                         (uint32_t)exaGetPixmapPitch(pPix));
325
326         state->unit[unit].width         = (float)pPix->drawable.width;
327         state->unit[unit].height        = (float)pPix->drawable.height;
328         state->unit[unit].transform     = pPict->transform;
329
330         return TRUE;
331 }
332
333 static Bool
334 NV30_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
335 {
336         NVPtr pNv = NVPTR(pScrn);
337         nv_pict_surface_format_t *fmt;
338
339         fmt = NV30_GetPictSurfaceFormat(format);
340         if (!fmt) {
341                 ErrorF("AIII no format\n");
342                 return FALSE;
343         }
344
345         NVDmaStart(pNv, NvSub3D, 0x208, 3);
346         NVDmaNext (pNv, fmt->card_fmt);
347         NVDmaNext (pNv, (uint32_t)exaGetPixmapPitch(pPix));
348         NVDmaNext (pNv, NVAccelGetPixmapOffset(pPix));
349
350         return TRUE;
351 }
352
353 static Bool
354 NV30EXACheckCompositeTexture(PicturePtr pPict)
355 {
356         nv_pict_texture_format_t *fmt;
357         int w = pPict->pDrawable->width;
358         int h = pPict->pDrawable->height;
359
360         if ((w > 4096) || (h>4096))
361                 FALLBACK("picture too large, %dx%d\n", w, h);
362
363         fmt = NV30_GetPictTextureFormat(pPict->format);
364         if (!fmt)
365                 FALLBACK("picture format 0x%08x not supported\n",
366                                 pPict->format);
367
368         if (pPict->filter != PictFilterNearest &&
369                         pPict->filter != PictFilterBilinear)
370                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
371
372         if (pPict->repeat && (pPict->repeat != RepeatNormal &&
373                                 pPict->repeatType != RepeatNone))
374                 FALLBACK("repeat 0x%x not supported\n", pPict->repeatType);
375
376         return TRUE;
377 }
378
379 Bool
380 NV30EXACheckComposite(int op, PicturePtr psPict,
381                               PicturePtr pmPict,
382                               PicturePtr pdPict)
383 {
384         nv_pict_surface_format_t *fmt;
385         nv_pict_op_t *opr;
386
387         opr = NV30_GetPictOpRec(op);
388         if (!opr)
389                 FALLBACK("unsupported blend op 0x%x\n", op);
390
391         fmt = NV30_GetPictSurfaceFormat(pdPict->format);
392         if (!fmt)
393                 FALLBACK("dst picture format 0x%08x not supported\n",
394                                 pdPict->format);
395
396         if (!NV30EXACheckCompositeTexture(psPict))
397                 FALLBACK("src picture\n");
398         if (pmPict) {
399                 if (pmPict->componentAlpha &&
400                                 PICT_FORMAT_RGB(pmPict->format) &&
401                                 opr->src_alpha && opr->src_card_op != BF(ZERO))
402                         FALLBACK("mask CA + SA\n");
403                 if (!NV30EXACheckCompositeTexture(pmPict))
404                         FALLBACK("mask picture\n");
405         }
406         
407         return TRUE;
408 }
409
410 Bool
411 NV30EXAPrepareComposite(int op, PicturePtr psPict,
412                                 PicturePtr pmPict,
413                                 PicturePtr pdPict,
414                                 PixmapPtr  psPix,
415                                 PixmapPtr  pmPix,
416                                 PixmapPtr  pdPix)
417 {
418         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
419         NVPtr pNv = NVPTR(pScrn);
420         nv_pict_op_t *blend;
421         NV30EXA_STATE;
422
423         blend = NV30_GetPictOpRec(op);
424
425         NV30_SetupBlend(pScrn, blend, PICT_FORMAT_A(pdPict->format),
426                         (pmPict && pmPict->componentAlpha &&
427                          PICT_FORMAT_RGB(pmPict->format)));
428
429         NV30_SetupSurface(pScrn, pdPix, pdPict->format);
430         NV30EXATexture(pScrn, psPix, psPict, 0);
431
432         NV30_LoadVtxProg(pScrn, &nv40_vp_exa_render);
433         if (pmPict) {
434                 NV30EXATexture(pScrn, pmPix, pmPict, 1);
435
436                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
437                         if (blend->src_alpha)
438                         NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask_sa_ca);
439                         else
440                         NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask_ca);
441                 } else {
442                         NV30_LoadFragProg(pScrn, &nv30_fp_composite_mask);
443                 }
444
445                 state->have_mask = TRUE;
446         } else {
447                 NV30_LoadFragProg(pScrn, &nv30_fp_pass_tex0);
448
449                 state->have_mask = FALSE;
450         }
451
452         /* Appears to be some kind of cache flush, needed here at least
453          * sometimes.. funky text rendering otherwise :)
454          */
455         NVDmaStart(pNv, NvSub3D, 0x1fd8, 1);
456         NVDmaNext (pNv, 2);
457         NVDmaStart(pNv, NvSub3D, 0x1fd8, 1);
458         NVDmaNext (pNv, 1);
459
460         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BEGIN_END, 1);
461         NVDmaNext (pNv, 8); /* GL_QUADS */
462
463         return TRUE;
464 }
465
466 #define xFixedToFloat(v) \
467         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
468
469 static void
470 NV30EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
471                                           float *x_ret, float *y_ret)
472 {
473         PictVector v;
474
475         if (t) {
476                 v.vector[0] = IntToxFixed(x);
477                 v.vector[1] = IntToxFixed(y);
478                 v.vector[2] = xFixed1;
479                 PictureTransformPoint(t, &v);
480                 *x_ret = xFixedToFloat(v.vector[0]) / sx;
481                 *y_ret = xFixedToFloat(v.vector[1]) / sy;
482         } else {
483                 *x_ret = (float)x / sx;
484                 *y_ret = (float)y / sy;
485         }
486 }
487
488 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
489         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(8), 4);   \
490         NVDmaFloat(pNv, (sx)); NVDmaFloat(pNv, (sy));                          \
491         NVDmaFloat(pNv, (mx)); NVDmaFloat(pNv, (my));                          \
492         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(0), 1);     \
493         NVDmaNext (pNv, ((dy)<<16)|(dx));                                      \
494 } while(0)
495 #define CV_OUT(sx,sy,dx,dy) do {                                               \
496         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2F_X(8), 2);   \
497         NVDmaFloat(pNv, (sx)); NVDmaFloat(pNv, (sy));                          \
498         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VTX_ATTR_2I(0), 1);     \
499         NVDmaNext (pNv, ((dy)<<16)|(dx));                                      \
500 } while(0)
501
502 void
503 NV30EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
504                                   int maskX, int maskY,
505                                   int dstX , int dstY,
506                                   int width, int height)
507 {
508         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
509         NVPtr pNv = NVPTR(pScrn);
510         float sX0, sX1, sY0, sY1;
511         float mX0, mX1, mY0, mY1;
512         NV30EXA_STATE;
513
514         NV30EXATransformCoord(state->unit[0].transform, srcX, srcY,
515                               state->unit[0].width,
516                               state->unit[0].height, &sX0, &sY0);
517         NV30EXATransformCoord(state->unit[0].transform,
518                               srcX + width, srcY + height,
519                               state->unit[0].width,
520                               state->unit[0].height, &sX1, &sY1);
521
522         if (state->have_mask) {
523                 NV30EXATransformCoord(state->unit[1].transform, maskX, maskY,
524                                       state->unit[1].width,
525                                       state->unit[1].height, &mX0, &mY0);
526                 NV30EXATransformCoord(state->unit[1].transform,
527                                       maskX + width, maskY + height,
528                                       state->unit[1].width,
529                                       state->unit[1].height, &mX1, &mY1);
530                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
531                 CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width,          dstY);
532                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
533                 CV_OUTm(sX0 , sY1 , mX0, mY1, dstX        , dstY + height);
534         } else {
535                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
536                 CV_OUT(sX1 , sY0 , dstX + width,          dstY);
537                 CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
538                 CV_OUT(sX0 , sY1 , dstX        , dstY + height);
539         }
540
541         NVDmaKickoff(pNv);
542 }
543
544 void
545 NV30EXADoneComposite(PixmapPtr pdPix)
546 {
547         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
548         NVPtr pNv = NVPTR(pScrn);
549
550         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BEGIN_END, 1);
551         NVDmaNext (pNv, 0);
552 }
553
554 Bool
555 NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
556 {
557         NVPtr pNv = NVPTR(pScrn);
558         static int have_object = FALSE;
559         uint32_t class = 0, chipset;
560         int i;
561
562 #undef  NV40_TCL_PRIMITIVE_3D
563 #define NV40_TCL_PRIMITIVE_3D                 0x4097
564 #define NV40_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK 0x00000baf
565 #define NV44_TCL_PRIMITIVE_3D                 0x4497
566 #define NV44_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK 0x00005450
567
568         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
569         if ((chipset & 0xf0) != 0x40)
570                 return TRUE;
571         chipset &= 0xf;
572
573         if (NV40_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK & (1<<chipset))
574                 class = NV40_TCL_PRIMITIVE_3D;
575         else
576         if (NV44_TCL_PRIMITIVE_3D_CHIPSET_4X_MASK & (1<<chipset))
577                 class = NV44_TCL_PRIMITIVE_3D;
578         else {
579                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
580                            "NV30EXA: Unknown chipset NV%02x\n", chipset);
581                 return FALSE;
582         }
583
584         if (!have_object) {
585                 if (!NVDmaCreateContextObject(pNv, Nv3D, class))
586                         return FALSE;
587                 have_object = TRUE;
588         }
589
590         NVDmaSetObjectOnSubchannel(pNv, NvSub3D, Nv3D);
591
592         NVDmaStart(pNv, NvSub3D, 0x180, 1);
593         NVDmaNext (pNv, NvDmaNotifier0);
594         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT1, 2);
595         NVDmaNext (pNv, NvDmaFB);
596         NVDmaNext (pNv, NvDmaFB);
597         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT8, 1);
598         NVDmaNext (pNv, NvDmaFB);
599         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SET_OBJECT4, 2);
600         NVDmaNext (pNv, NvDmaFB);
601         NVDmaNext (pNv, NvDmaFB);
602
603         /* voodoo */
604         NVDmaStart(pNv, NvSub3D, 0x1ea4, 3);
605         NVDmaNext(pNv, 0x00000010);
606         NVDmaNext(pNv, 0x01000100);
607         NVDmaNext(pNv, 0xff800006);
608         NVDmaStart(pNv, NvSub3D, 0x1fc4, 1);
609         NVDmaNext(pNv, 0x06144321);
610         NVDmaStart(pNv, NvSub3D, 0x1fc8, 2);
611         NVDmaNext(pNv, 0xedcba987);
612         NVDmaNext(pNv, 0x00000021);
613         NVDmaStart(pNv, NvSub3D, 0x1fd0, 1);
614         NVDmaNext(pNv, 0x00171615);
615         NVDmaStart(pNv, NvSub3D, 0x1fd4, 1);
616         NVDmaNext(pNv, 0x001b1a19);
617         NVDmaStart(pNv, NvSub3D, 0x1ef8, 1);
618         NVDmaNext(pNv, 0x0020ffff);
619         NVDmaStart(pNv, NvSub3D, 0x1d64, 1);
620         NVDmaNext(pNv, 0x00d30000);
621         NVDmaStart(pNv, NvSub3D, 0x1e94, 1);
622         NVDmaNext(pNv, 0x00000001);
623
624         /* identity viewport transform */
625         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_XFRM_OX, 8);
626         NVDmaFloat(pNv, 0.0);
627         NVDmaFloat(pNv, 0.0);
628         NVDmaFloat(pNv, 0.0);
629         NVDmaFloat(pNv, 0.0);
630         NVDmaFloat(pNv, 1.0);
631         NVDmaFloat(pNv, 1.0);
632         NVDmaFloat(pNv, 1.0);
633         NVDmaFloat(pNv, 0.0);
634
635         /* default 3D state */
636         /*XXX: replace with the same state that the DRI emits on startup */
637         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_FRONT_ENABLE, 1);
638         NVDmaNext (pNv, 0);
639         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_STENCIL_BACK_ENABLE, 1);
640         NVDmaNext (pNv, 0);
641         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
642         NVDmaNext (pNv, 0);
643         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 2);
644         NVDmaNext (pNv, 0); /* wr disable */
645         NVDmaNext (pNv, 0); /* test disable */
646         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
647         NVDmaNext (pNv, 0x01010101); /* TR,TR,TR,TR */
648         NVDmaStart(pNv, NvSub3D, NV40_TCL_PRIMITIVE_3D_COLOR_MASK_BUFFER123, 1);
649         NVDmaNext (pNv, 0x0000fff0);
650         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
651         NVDmaNext (pNv, 0);
652         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
653         NVDmaNext (pNv, 0);
654         NVDmaStart(pNv, NvSub3D,
655                         NV30_TCL_PRIMITIVE_3D_LOGIC_OP_ENABLE, 2);
656         NVDmaNext (pNv, 0);
657         NVDmaNext (pNv, 0x1503);
658         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_DITHER_ENABLE, 1);
659         NVDmaNext (pNv, 0);
660         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SHADE_MODEL, 1);
661         NVDmaNext (pNv, 0x1d01); /* GL_SMOOTH */
662         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR,2);
663         NVDmaFloat(pNv, 0.0);
664         NVDmaFloat(pNv, 0.0);
665         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
666         NVDmaNext (pNv, 0x1b02); /* FRONT = GL_FILL */
667         NVDmaNext (pNv, 0x1b02); /* BACK  = GL_FILL */
668         NVDmaStart(pNv, NvSub3D,
669                         NV30_TCL_PRIMITIVE_3D_POLYGON_STIPPLE_PATTERN(0), 0x20);
670         for (i=0;i<0x20;i++)
671                 NVDmaNext(pNv, 0xFFFFFFFF);
672         for (i=0;i<16;i++) {
673                 NVDmaStart(pNv, NvSub3D,
674                                 NV30_TCL_PRIMITIVE_3D_TX_ENABLE_UNIT(i), 1);
675                 NVDmaNext(pNv, 0);
676         }
677
678         NVDmaStart(pNv, NvSub3D, 0x1d78, 1);
679         NVDmaNext (pNv, 0x110);
680
681         NVDmaStart(pNv, NvSub3D, 0x0220, 1);
682         NVDmaNext (pNv, 1);
683         NVDmaStart(pNv, NvSub3D,
684                         NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_DIM0, 2);
685         NVDmaNext (pNv, (4096 << 16));
686         NVDmaNext (pNv, (4096 << 16));
687         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_SCISSOR_WIDTH_XPOS, 2);
688         NVDmaNext (pNv, (4096 << 16));
689         NVDmaNext (pNv, (4096 << 16));
690         NVDmaStart(pNv, NvSub3D, NV30_TCL_PRIMITIVE_3D_VIEWPORT_DIMS_0, 2);
691         NVDmaNext (pNv, (4096 << 16));
692         NVDmaNext (pNv, (4096 << 16));
693         NVDmaStart(pNv, NvSub3D,
694                         NV30_TCL_PRIMITIVE_3D_VIEWPORT_COLOR_BUFFER_OFS0, 2);
695         NVDmaNext (pNv, (4095 << 16));
696         NVDmaNext (pNv, (4095 << 16));
697
698         return TRUE;
699 }
700