A bunch more cleaned up bios opcodes
[nouveau] / src / nv40_exa.c
1 /*
2  * Copyright 2007 Ben Skeggs
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20  * SOFTWARE.
21  */
22
23 #include "nv_include.h"
24 #include "nv_shaders.h"
25
26 typedef struct nv_pict_surface_format {
27         int      pict_fmt;
28         uint32_t card_fmt;
29 } nv_pict_surface_format_t;
30
31 typedef struct nv_pict_texture_format {
32         int      pict_fmt;
33         uint32_t card_fmt;
34         uint32_t card_swz;
35 } nv_pict_texture_format_t;
36
37 typedef struct nv_pict_op {
38         Bool     src_alpha;
39         Bool     dst_alpha;
40         uint32_t src_card_op;
41         uint32_t dst_card_op;
42 } nv_pict_op_t;
43
44 typedef struct nv40_exa_state {
45         Bool have_mask;
46
47         struct {
48                 PictTransformPtr transform;
49                 float width;
50                 float height;
51         } unit[2];
52 } nv40_exa_state_t;
53 static nv40_exa_state_t exa_state;
54 #define NV40EXA_STATE nv40_exa_state_t *state = &exa_state
55
56 static nv_pict_surface_format_t
57 NV40SurfaceFormat[] = {
58         { PICT_a8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_A8R8G8B8 },
59         { PICT_x8r8g8b8 , NV40TCL_RT_FORMAT_COLOR_X8R8G8B8 },
60         { PICT_r5g6b5   , NV40TCL_RT_FORMAT_COLOR_R5G6B5   },
61         { PICT_a8       , NV40TCL_RT_FORMAT_COLOR_B8       },
62         { -1, ~0 }
63 };
64
65 static nv_pict_surface_format_t *
66 NV40_GetPictSurfaceFormat(int format)
67 {
68         int i = 0;
69
70         while (NV40SurfaceFormat[i].pict_fmt != -1) {
71                 if (NV40SurfaceFormat[i].pict_fmt == format)
72                         return &NV40SurfaceFormat[i];
73                 i++;
74         }
75
76         return NULL;
77 }
78
79 enum {
80         NV40EXA_FPID_PASS_COL0 = 0,
81         NV40EXA_FPID_PASS_TEX0 = 1,
82         NV40EXA_FPID_COMPOSITE_MASK = 2,
83         NV40EXA_FPID_COMPOSITE_MASK_SA_CA = 3,
84         NV40EXA_FPID_COMPOSITE_MASK_CA = 4,
85         NV40EXA_FPID_MAX = 5
86 } NV40EXA_FPID;
87
88 static nv_shader_t *nv40_fp_map[NV40EXA_FPID_MAX] = {
89         &nv30_fp_pass_col0,
90         &nv30_fp_pass_tex0,
91         &nv30_fp_composite_mask,
92         &nv30_fp_composite_mask_sa_ca,
93         &nv30_fp_composite_mask_ca
94 };
95
96 static nv_shader_t *nv40_fp_map_a8[NV40EXA_FPID_MAX];
97
98 static void
99 NV40EXAHackupA8Shaders(ScrnInfoPtr pScrn)
100 {
101         int s;
102
103         for (s = 0; s < NV40EXA_FPID_MAX; s++) {
104                 nv_shader_t *def, *a8;
105
106                 def = nv40_fp_map[s];
107                 a8 = xcalloc(1, sizeof(nv_shader_t));
108                 a8->card_priv.NV30FP.num_regs = def->card_priv.NV30FP.num_regs;
109                 a8->size = def->size + 4;
110                 memcpy(a8->data, def->data, def->size * sizeof(uint32_t));
111                 nv40_fp_map_a8[s] = a8;
112
113                 a8->data[a8->size - 8 + 0] &= ~0x00000081;
114                 a8->data[a8->size - 4 + 0]  = 0x01401e81;
115                 a8->data[a8->size - 4 + 1]  = 0x1c9dfe00;
116                 a8->data[a8->size - 4 + 2]  = 0x0001c800;
117                 a8->data[a8->size - 4 + 3]  = 0x0001c800;
118         }
119 }
120
121 #define _(r,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                        \
122   {                                                                            \
123   PICT_##r, NV40TCL_TEX_FORMAT_FORMAT_##tf,                                    \
124   NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y |          \
125   NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w |          \
126   NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y |          \
127   NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w,           \
128   }
129 static nv_pict_texture_format_t
130 NV40TextureFormat[] = {
131         _(a8r8g8b8, A8R8G8B8,   S1,   S1,   S1,   S1, X, Y, Z, W),
132         _(x8r8g8b8, A8R8G8B8,   S1,   S1,   S1,  ONE, X, Y, Z, W),
133         _(x8b8g8r8, A8R8G8B8,   S1,   S1,   S1,  ONE, Z, Y, X, W),
134         _(a1r5g5b5, A1R5G5B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
135         _(x1r5g5b5, A1R5G5B5,   S1,   S1,   S1,  ONE, X, Y, Z, W),
136         _(  r5g6b5,   R5G6B5,   S1,   S1,   S1,   S1, X, Y, Z, W),
137         _(      a8,       L8, ZERO, ZERO, ZERO,   S1, X, X, X, X),
138         { -1, ~0, ~0 }
139 };
140 #undef _
141
142 static nv_pict_texture_format_t *
143 NV40_GetPictTextureFormat(int format)
144 {
145         int i = 0;
146
147         while (NV40TextureFormat[i].pict_fmt != -1) {
148                 if (NV40TextureFormat[i].pict_fmt == format)
149                         return &NV40TextureFormat[i];
150                 i++;
151         }
152
153         return NULL;
154 }
155
156 #define SF(bf) (NV40TCL_BLEND_FUNC_SRC_RGB_##bf |                              \
157                 NV40TCL_BLEND_FUNC_SRC_ALPHA_##bf)
158 #define DF(bf) (NV40TCL_BLEND_FUNC_DST_RGB_##bf |                              \
159                 NV40TCL_BLEND_FUNC_DST_ALPHA_##bf)
160 static nv_pict_op_t 
161 NV40PictOp[] = {
162 /* Clear       */ { 0, 0, SF(               ZERO), DF(               ZERO) },
163 /* Src         */ { 0, 0, SF(                ONE), DF(               ZERO) },
164 /* Dst         */ { 0, 0, SF(               ZERO), DF(                ONE) },
165 /* Over        */ { 1, 0, SF(                ONE), DF(ONE_MINUS_SRC_ALPHA) },
166 /* OverReverse */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(                ONE) },
167 /* In          */ { 0, 1, SF(          DST_ALPHA), DF(               ZERO) },
168 /* InReverse   */ { 1, 0, SF(               ZERO), DF(          SRC_ALPHA) },
169 /* Out         */ { 0, 1, SF(ONE_MINUS_DST_ALPHA), DF(               ZERO) },
170 /* OutReverse  */ { 1, 0, SF(               ZERO), DF(ONE_MINUS_SRC_ALPHA) },
171 /* Atop        */ { 1, 1, SF(          DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
172 /* AtopReverse */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(          SRC_ALPHA) },
173 /* Xor         */ { 1, 1, SF(ONE_MINUS_DST_ALPHA), DF(ONE_MINUS_SRC_ALPHA) },
174 /* Add         */ { 0, 0, SF(                ONE), DF(                ONE) }
175 };
176
177 static nv_pict_op_t *
178 NV40_GetPictOpRec(int op)
179 {
180         if (op >= PictOpSaturate)
181                 return NULL;
182         return &NV40PictOp[op];
183 }
184
185 #if 0
186 #define FALLBACK(fmt,args...) do {                                      \
187         ErrorF("FALLBACK %s:%d> " fmt, __func__, __LINE__, ##args);     \
188         return FALSE;                                                   \
189 } while(0)
190 #else
191 #define FALLBACK(fmt,args...) do { \
192         return FALSE;              \
193 } while(0)
194 #endif
195
196 static void
197 NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
198 {
199         NVPtr pNv = NVPTR(pScrn);
200         static int next_hw_id = 0;
201         int i;
202
203         if (!shader->hw_id) {
204                 shader->hw_id = next_hw_id;
205
206                 BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_FROM_ID, 1);
207                 OUT_RING  ((shader->hw_id));
208                 for (i=0; i<shader->size; i+=4) {
209                         BEGIN_RING(Nv3D, NV40TCL_VP_UPLOAD_INST(0), 4);
210                         OUT_RING  (shader->data[i + 0]);
211                         OUT_RING  (shader->data[i + 1]);
212                         OUT_RING  (shader->data[i + 2]);
213                         OUT_RING  (shader->data[i + 3]);
214                         next_hw_id++;
215                 }
216         }
217
218         BEGIN_RING(Nv3D, NV40TCL_VP_START_FROM_ID, 1);
219         OUT_RING  ((shader->hw_id));
220
221         BEGIN_RING(Nv3D, NV40TCL_VP_ATTRIB_EN, 2);
222         OUT_RING  (shader->card_priv.NV30VP.vp_in_reg);
223         OUT_RING  (shader->card_priv.NV30VP.vp_out_reg);
224 }
225
226 static void
227 NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
228 {
229         NVPtr pNv = NVPTR(pScrn);
230         static struct nouveau_bo *fp_mem = NULL;
231         static int next_hw_id_offset = 0;
232
233         if (!fp_mem) {
234                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
235                                    0, 0x1000, &fp_mem)) {
236                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
237                                    "Couldn't alloc fragprog buffer!\n");
238                         return;
239                 }
240
241                 if (nouveau_bo_map(fp_mem, NOUVEAU_BO_RDWR)) {
242                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
243                                    "Couldn't map fragprog buffer!\n");
244                 }
245         }
246
247         if (!shader->hw_id) {
248                 uint32_t *map = fp_mem->map + next_hw_id_offset;
249                 int i;
250
251                 for (i = 0; i < shader->size; i++) {
252                         uint32_t data = shader->data[i];
253 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
254                         data = ((data >> 16) | ((data & 0xffff) << 16));
255 #endif
256                         map[i] = data;
257                 }
258
259                 shader->hw_id = next_hw_id_offset;
260                 next_hw_id_offset += (shader->size * sizeof(uint32_t));
261                 next_hw_id_offset = (next_hw_id_offset + 63) & ~63;
262         }
263
264         BEGIN_RING(Nv3D, NV40TCL_FP_ADDRESS, 1);
265         OUT_RELOC (fp_mem, shader->hw_id, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART |
266                    NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
267                    NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1);
268         BEGIN_RING(Nv3D, NV40TCL_FP_CONTROL, 1);
269         OUT_RING  (shader->card_priv.NV30FP.num_regs <<
270                    NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT);
271 }
272
273 static void
274 NV40_SetupBlend(ScrnInfoPtr pScrn, nv_pict_op_t *blend,
275                 PictFormatShort dest_format, Bool component_alpha)
276 {
277         NVPtr pNv = NVPTR(pScrn);
278         uint32_t sblend, dblend;
279
280         sblend = blend->src_card_op;
281         dblend = blend->dst_card_op;
282
283         if (blend->dst_alpha) {
284                 if (!PICT_FORMAT_A(dest_format)) {
285                         if (sblend == SF(DST_ALPHA)) {
286                                 sblend = SF(ONE);
287                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
288                                 sblend = SF(ZERO);
289                         }
290                 } else if (dest_format == PICT_a8) {
291                         if (sblend == SF(DST_ALPHA)) {
292                                 sblend = SF(DST_COLOR);
293                         } else if (sblend == SF(ONE_MINUS_DST_ALPHA)) {
294                                 sblend = SF(ONE_MINUS_DST_COLOR);
295                         }
296                 }
297         }
298
299         if (blend->src_alpha && (component_alpha || dest_format == PICT_a8)) {
300                 if (dblend == DF(SRC_ALPHA)) {
301                         dblend = DF(SRC_COLOR);
302                 } else if (dblend == DF(ONE_MINUS_SRC_ALPHA)) {
303                         dblend = DF(ONE_MINUS_SRC_COLOR);
304                 }
305         }
306
307         if (sblend == SF(ONE) && dblend == DF(ZERO)) {
308                 BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
309                 OUT_RING  (0);
310         } else {
311                 BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 5);
312                 OUT_RING  (1);
313                 OUT_RING  (sblend);
314                 OUT_RING  (dblend);
315                 OUT_RING  (0x00000000);
316                 OUT_RING  (NV40TCL_BLEND_EQUATION_ALPHA_FUNC_ADD |
317                            NV40TCL_BLEND_EQUATION_RGB_FUNC_ADD);
318         }
319 }
320
321 static Bool
322 NV40EXATexture(ScrnInfoPtr pScrn, PixmapPtr pPix, PicturePtr pPict, int unit)
323 {
324         NVPtr pNv = NVPTR(pScrn);
325         nv_pict_texture_format_t *fmt;
326         NV40EXA_STATE;
327
328         fmt = NV40_GetPictTextureFormat(pPict->format);
329         if (!fmt)
330                 return FALSE;
331
332         BEGIN_RING(Nv3D, NV40TCL_TEX_OFFSET(unit), 8);
333         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
334         OUT_PIXMAPd(pPix, fmt->card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
335                     NV40TCL_TEX_FORMAT_DIMS_2D | NV40TCL_TEX_FORMAT_NO_BORDER |
336                     (0x8000) | (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
337                     NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD,
338                     NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1);
339         if (pPict->repeat && pPict->repeatType == RepeatNormal) {
340                 OUT_RING  (NV40TCL_TEX_WRAP_S_REPEAT |
341                            NV40TCL_TEX_WRAP_T_REPEAT |
342                            NV40TCL_TEX_WRAP_R_REPEAT);
343         } else {
344                 OUT_RING  (NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE |
345                            NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
346                            NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
347         }
348         OUT_RING  (NV40TCL_TEX_ENABLE_ENABLE);
349         OUT_RING  (fmt->card_swz);
350         if (pPict->filter == PictFilterBilinear) {
351                 OUT_RING  (NV40TCL_TEX_FILTER_MIN_LINEAR |
352                            NV40TCL_TEX_FILTER_MAG_LINEAR |
353                            0x3fd6);
354         } else {
355                 OUT_RING  (NV40TCL_TEX_FILTER_MIN_NEAREST |
356                            NV40TCL_TEX_FILTER_MAG_NEAREST |
357                            0x3fd6);
358         }
359         OUT_RING  ((pPix->drawable.width << 16) | pPix->drawable.height);
360         OUT_RING  (0); /* border ARGB */
361         BEGIN_RING(Nv3D, NV40TCL_TEX_SIZE1(unit), 1);
362         OUT_RING  ((1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
363                    (uint32_t)exaGetPixmapPitch(pPix));
364
365         state->unit[unit].width         = (float)pPix->drawable.width;
366         state->unit[unit].height        = (float)pPix->drawable.height;
367         state->unit[unit].transform     = pPict->transform;
368
369         return TRUE;
370 }
371
372 static Bool
373 NV40_SetupSurface(ScrnInfoPtr pScrn, PixmapPtr pPix, PictFormatShort format)
374 {
375         NVPtr pNv = NVPTR(pScrn);
376         nv_pict_surface_format_t *fmt;
377
378         fmt = NV40_GetPictSurfaceFormat(format);
379         if (!fmt) {
380                 ErrorF("AIII no format\n");
381                 return FALSE;
382         }
383
384         BEGIN_RING(Nv3D, NV40TCL_RT_FORMAT, 3);
385         OUT_RING  (NV40TCL_RT_FORMAT_TYPE_LINEAR |
386                    NV40TCL_RT_FORMAT_ZETA_Z24S8 |
387                    fmt->card_fmt);
388         OUT_RING  (exaGetPixmapPitch(pPix));
389         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
390
391         return TRUE;
392 }
393
394 static Bool
395 NV40EXACheckCompositeTexture(PicturePtr pPict)
396 {
397         nv_pict_texture_format_t *fmt;
398         int w = pPict->pDrawable->width;
399         int h = pPict->pDrawable->height;
400
401         if ((w > 4096) || (h > 4096))
402                 FALLBACK("picture too large, %dx%d\n", w, h);
403
404         fmt = NV40_GetPictTextureFormat(pPict->format);
405         if (!fmt)
406                 FALLBACK("picture format 0x%08x not supported\n",
407                                 pPict->format);
408
409         if (pPict->filter != PictFilterNearest &&
410             pPict->filter != PictFilterBilinear)
411                 FALLBACK("filter 0x%x not supported\n", pPict->filter);
412
413         if (pPict->repeat &&
414             (pPict->repeat != RepeatNormal && pPict->repeatType != RepeatNone))
415                 FALLBACK("repeat 0x%x not supported\n", pPict->repeatType);
416
417         return TRUE;
418 }
419
420 Bool
421 NV40EXACheckComposite(int op, PicturePtr psPict,
422                               PicturePtr pmPict,
423                               PicturePtr pdPict)
424 {
425         nv_pict_surface_format_t *fmt;
426         nv_pict_op_t *opr;
427
428         opr = NV40_GetPictOpRec(op);
429         if (!opr)
430                 FALLBACK("unsupported blend op 0x%x\n", op);
431
432         fmt = NV40_GetPictSurfaceFormat(pdPict->format);
433         if (!fmt)
434                 FALLBACK("dst picture format 0x%08x not supported\n",
435                                 pdPict->format);
436
437         if (!NV40EXACheckCompositeTexture(psPict))
438                 FALLBACK("src picture\n");
439         if (pmPict) {
440                 if (pmPict->componentAlpha && 
441                     PICT_FORMAT_RGB(pmPict->format) &&
442                     opr->src_alpha && opr->src_card_op != SF(ZERO))
443                         FALLBACK("mask CA + SA\n");
444                 if (!NV40EXACheckCompositeTexture(pmPict))
445                         FALLBACK("mask picture\n");
446         }
447
448         return TRUE;
449 }
450
451 Bool
452 NV40EXAPrepareComposite(int op, PicturePtr psPict,
453                                 PicturePtr pmPict,
454                                 PicturePtr pdPict,
455                                 PixmapPtr  psPix,
456                                 PixmapPtr  pmPix,
457                                 PixmapPtr  pdPix)
458 {
459         ScrnInfoPtr pScrn = xf86Screens[psPix->drawable.pScreen->myNum];
460         NVPtr pNv = NVPTR(pScrn);
461         nv_pict_op_t *blend;
462         int fpid = NV40EXA_FPID_PASS_COL0;
463         NV40EXA_STATE;
464
465         blend = NV40_GetPictOpRec(op);
466
467         NV40_SetupBlend(pScrn, blend, pdPict->format,
468                         (pmPict && pmPict->componentAlpha &&
469                          PICT_FORMAT_RGB(pmPict->format)));
470
471         NV40_SetupSurface(pScrn, pdPix, pdPict->format);
472         NV40EXATexture(pScrn, psPix, psPict, 0);
473
474         NV40_LoadVtxProg(pScrn, &nv40_vp_exa_render);
475         if (pmPict) {
476                 NV40EXATexture(pScrn, pmPix, pmPict, 1);
477
478                 if (pmPict->componentAlpha && PICT_FORMAT_RGB(pmPict->format)) {
479                         if (blend->src_alpha)
480                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_SA_CA;
481                         else
482                                 fpid = NV40EXA_FPID_COMPOSITE_MASK_CA;
483                 } else {
484                         fpid = NV40EXA_FPID_COMPOSITE_MASK;
485                 }
486
487                 state->have_mask = TRUE;
488         } else {
489                 fpid = NV40EXA_FPID_PASS_TEX0;
490
491                 state->have_mask = FALSE;
492         }
493
494         if (pdPict->format == PICT_a8)
495                 NV40_LoadFragProg(pScrn, nv40_fp_map_a8[fpid]);
496         else
497                 NV40_LoadFragProg(pScrn, nv40_fp_map[fpid]);
498
499         /* Appears to be some kind of cache flush, needed here at least
500          * sometimes.. funky text rendering otherwise :)
501          */
502         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
503         OUT_RING  (2);
504         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
505         OUT_RING  (1);
506
507         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
508         OUT_RING  (NV40TCL_BEGIN_END_QUADS);
509
510         return TRUE;
511 }
512
513 #define xFixedToFloat(v) \
514         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
515
516 static void
517 NV40EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
518                                           float *x_ret, float *y_ret)
519 {
520         PictVector v;
521
522         if (t) {
523                 v.vector[0] = IntToxFixed(x);
524                 v.vector[1] = IntToxFixed(y);
525                 v.vector[2] = xFixed1;
526                 PictureTransformPoint(t, &v);
527                 *x_ret = xFixedToFloat(v.vector[0]) / sx;
528                 *y_ret = xFixedToFloat(v.vector[1]) / sy;
529         } else {
530                 *x_ret = (float)x / sx;
531                 *y_ret = (float)y / sy;
532         }
533 }
534
535 #define CV_OUTm(sx,sy,mx,my,dx,dy) do {                                        \
536         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 4);                         \
537         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
538         OUT_RINGf ((mx)); OUT_RINGf ((my));                                    \
539         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1);                           \
540         OUT_RING  (((dy)<<16)|(dx));                                           \
541 } while(0)
542 #define CV_OUT(sx,sy,dx,dy) do {                                               \
543         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 2);                         \
544         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
545         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1);                           \
546         OUT_RING  (((dy)<<16)|(dx));                                           \
547 } while(0)
548
549 void
550 NV40EXAComposite(PixmapPtr pdPix, int srcX , int srcY,
551                                   int maskX, int maskY,
552                                   int dstX , int dstY,
553                                   int width, int height)
554 {
555         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
556         NVPtr pNv = NVPTR(pScrn);
557         float sX0, sX1, sY0, sY1;
558         float mX0, mX1, mY0, mY1;
559         NV40EXA_STATE;
560
561         NV40EXATransformCoord(state->unit[0].transform, srcX, srcY,
562                               state->unit[0].width,
563                               state->unit[0].height, &sX0, &sY0);
564         NV40EXATransformCoord(state->unit[0].transform,
565                               srcX + width, srcY + height,
566                               state->unit[0].width,
567                               state->unit[0].height, &sX1, &sY1);
568
569         if (state->have_mask) {
570                 NV40EXATransformCoord(state->unit[1].transform, maskX, maskY,
571                                       state->unit[1].width,
572                                       state->unit[1].height, &mX0, &mY0);
573                 NV40EXATransformCoord(state->unit[1].transform,
574                                       maskX + width, maskY + height,
575                                       state->unit[1].width,
576                                       state->unit[1].height, &mX1, &mY1);
577                 CV_OUTm(sX0 , sY0 , mX0, mY0, dstX        ,          dstY);
578                 CV_OUTm(sX1 , sY0 , mX1, mY0, dstX + width,          dstY);
579                 CV_OUTm(sX1 , sY1 , mX1, mY1, dstX + width, dstY + height);
580                 CV_OUTm(sX0 , sY1 , mX0, mY1, dstX        , dstY + height);
581         } else {
582                 CV_OUT(sX0 , sY0 , dstX        ,          dstY);
583                 CV_OUT(sX1 , sY0 , dstX + width,          dstY);
584                 CV_OUT(sX1 , sY1 , dstX + width, dstY + height);
585                 CV_OUT(sX0 , sY1 , dstX        , dstY + height);
586         }
587
588         FIRE_RING();
589 }
590
591 void
592 NV40EXADoneComposite(PixmapPtr pdPix)
593 {
594         ScrnInfoPtr pScrn = xf86Screens[pdPix->drawable.pScreen->myNum];
595         NVPtr pNv = NVPTR(pScrn);
596
597         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
598         OUT_RING  (NV40TCL_BEGIN_END_STOP);
599 }
600
601 #define NV40TCL_CHIPSET_4X_MASK 0x00000baf
602 #define NV44TCL_CHIPSET_4X_MASK 0x00005450
603 Bool
604 NVAccelInitNV40TCL(ScrnInfoPtr pScrn)
605 {
606         NVPtr pNv = NVPTR(pScrn);
607         uint32_t class = 0, chipset;
608         int i;
609
610         NV40EXAHackupA8Shaders(pScrn);
611
612         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
613         if ((chipset & 0xf0) != NV_ARCH_40)
614                 return TRUE;
615         chipset &= 0xf;
616
617         if (NV40TCL_CHIPSET_4X_MASK & (1<<chipset))
618                 class = NV40TCL;
619         else if (NV44TCL_CHIPSET_4X_MASK & (1<<chipset))
620                 class = NV44TCL;
621         else {
622                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
623                            "NV40EXA: Unknown chipset NV4%1x\n", chipset);
624                 return FALSE;
625         }
626
627         if (!pNv->Nv3D) {
628                 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
629                         return FALSE;
630         }
631
632         BEGIN_RING(Nv3D, NV40TCL_DMA_NOTIFY, 1);
633         OUT_RING  (pNv->notify0->handle);
634         BEGIN_RING(Nv3D, NV40TCL_DMA_TEXTURE0, 2);
635         OUT_RING  (pNv->chan->vram->handle);
636         OUT_RING  (pNv->chan->gart->handle);
637         BEGIN_RING(Nv3D, NV40TCL_DMA_COLOR0, 2);
638         OUT_RING  (pNv->chan->vram->handle);
639         OUT_RING  (pNv->chan->vram->handle);
640
641         /* voodoo */
642         BEGIN_RING(Nv3D, 0x1ea4, 3);
643         OUT_RING  (0x00000010);
644         OUT_RING  (0x01000100);
645         OUT_RING  (0xff800006);
646         BEGIN_RING(Nv3D, 0x1fc4, 1);
647         OUT_RING  (0x06144321);
648         BEGIN_RING(Nv3D, 0x1fc8, 2);
649         OUT_RING  (0xedcba987);
650         OUT_RING  (0x00000021);
651         BEGIN_RING(Nv3D, 0x1fd0, 1);
652         OUT_RING  (0x00171615);
653         BEGIN_RING(Nv3D, 0x1fd4, 1);
654         OUT_RING  (0x001b1a19);
655         BEGIN_RING(Nv3D, 0x1ef8, 1);
656         OUT_RING  (0x0020ffff);
657         BEGIN_RING(Nv3D, 0x1d64, 1);
658         OUT_RING  (0x00d30000);
659         BEGIN_RING(Nv3D, 0x1e94, 1);
660         OUT_RING  (0x00000001);
661
662         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_TRANSLATE_X, 8);
663         OUT_RINGf (0.0);
664         OUT_RINGf (0.0);
665         OUT_RINGf (0.0);
666         OUT_RINGf (0.0);
667         OUT_RINGf (1.0);
668         OUT_RINGf (1.0);
669         OUT_RINGf (1.0);
670         OUT_RINGf (0.0);
671
672         /* default 3D state */
673         /*XXX: replace with the same state that the DRI emits on startup */
674         BEGIN_RING(Nv3D, NV40TCL_STENCIL_FRONT_ENABLE, 1);
675         OUT_RING  (0);
676         BEGIN_RING(Nv3D, NV40TCL_STENCIL_BACK_ENABLE, 1);
677         OUT_RING  (0);
678         BEGIN_RING(Nv3D, NV40TCL_ALPHA_TEST_ENABLE, 1);
679         OUT_RING  (0);
680         BEGIN_RING(Nv3D, NV40TCL_DEPTH_WRITE_ENABLE, 2);
681         OUT_RING  (0);
682         OUT_RING  (0); 
683         BEGIN_RING(Nv3D, NV40TCL_COLOR_MASK, 1);
684         OUT_RING  (0x01010101); /* TR,TR,TR,TR */
685         BEGIN_RING(Nv3D, NV40TCL_CULL_FACE_ENABLE, 1);
686         OUT_RING  (0);
687         BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
688         OUT_RING  (0);
689         BEGIN_RING(Nv3D, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2);
690         OUT_RING  (0);
691         OUT_RING  (NV40TCL_COLOR_LOGIC_OP_COPY);
692         BEGIN_RING(Nv3D, NV40TCL_DITHER_ENABLE, 1);
693         OUT_RING  (0);
694         BEGIN_RING(Nv3D, NV40TCL_SHADE_MODEL, 1);
695         OUT_RING  (NV40TCL_SHADE_MODEL_SMOOTH);
696         BEGIN_RING(Nv3D, NV40TCL_POLYGON_OFFSET_FACTOR,2);
697         OUT_RINGf (0.0);
698         OUT_RINGf (0.0);
699         BEGIN_RING(Nv3D, NV40TCL_POLYGON_MODE_FRONT, 2);
700         OUT_RING  (NV40TCL_POLYGON_MODE_FRONT_FILL);
701         OUT_RING  (NV40TCL_POLYGON_MODE_BACK_FILL);
702         BEGIN_RING(Nv3D, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 0x20);
703         for (i=0;i<0x20;i++)
704                 OUT_RING  (0xFFFFFFFF);
705         for (i=0;i<16;i++) {
706                 BEGIN_RING(Nv3D, NV40TCL_TEX_ENABLE(i), 1);
707                 OUT_RING  (0);
708         }
709
710         BEGIN_RING(Nv3D, 0x1d78, 1);
711         OUT_RING  (0x110);
712
713         BEGIN_RING(Nv3D, NV40TCL_RT_ENABLE, 1);
714         OUT_RING  (NV40TCL_RT_ENABLE_COLOR0);
715
716         BEGIN_RING(Nv3D, NV40TCL_RT_HORIZ, 2);
717         OUT_RING  ((4096 << 16));
718         OUT_RING  ((4096 << 16));
719         BEGIN_RING(Nv3D, NV40TCL_SCISSOR_HORIZ, 2);
720         OUT_RING  ((4096 << 16));
721         OUT_RING  ((4096 << 16));
722         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_HORIZ, 2);
723         OUT_RING  ((4096 << 16));
724         OUT_RING  ((4096 << 16));
725         BEGIN_RING(Nv3D, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2);
726         OUT_RING  ((4095 << 16));
727         OUT_RING  ((4095 << 16));
728
729         return TRUE;
730 }
731