Enable nv30 exa on PPC.
[nouveau] / src / nv10_exa.c
1 /*
2  * Copyright 2007 Stephane Marchesin
3  * Copyright 2007 Arthur Huillet
4  * Copyright 2007 Peter Winters
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21  * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #include "nv_include.h"
30
31 typedef struct nv10_exa_state {
32         Bool have_mask;
33         Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
34         struct {
35                 PictTransformPtr transform;
36                 float width;
37                 float height;
38         } unit[2];
39 } nv10_exa_state_t;
40 static nv10_exa_state_t state;
41
42 static int NV10TexFormat(int ExaFormat)
43 {
44         struct {int exa;int hw;} tex_format[] =
45         {
46                 {PICT_a8r8g8b8, 0x900},
47                 {PICT_x8r8g8b8, 0x900},
48                 {PICT_r5g6b5, 0x880}, //this one was only tested with rendercheck
49                 //{PICT_a1r5g5b5,       NV10_TCL_PRIMITIVE_3D_TX_FORMAT_FORMAT_R5G5B5A1},
50                 //{PICT_a4r4g4b4,       NV10_TCL_PRIMITIVE_3D_TX_FORMAT_FORMAT_R4G4B4A4},
51                 {PICT_a8,       0x980},
52                 // FIXME other formats
53         };
54
55         int i;
56         for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57         {
58                 if(tex_format[i].exa==ExaFormat)
59                         return tex_format[i].hw;
60         }
61
62         return 0;
63 }
64
65 static int NV10DstFormat(int ExaFormat)
66 {
67         struct {int exa;int hw;} dst_format[] =
68         {
69                 {PICT_a8r8g8b8, 0x108},
70                 {PICT_x8r8g8b8, 0x108}, //FIXME blending factors?
71                 {PICT_r5g6b5,   0x103}
72                 // FIXME other formats
73         };
74
75         int i;
76         for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
77         {
78                 if(dst_format[i].exa==ExaFormat)
79                         return dst_format[i].hw;
80         }
81
82         return 0;
83 }
84
85 static Bool NV10CheckTexture(PicturePtr Picture)
86 {
87         int w = Picture->pDrawable->width;
88         int h = Picture->pDrawable->height;
89
90         if ((w > 2046) || (h>2046))
91                 return FALSE;
92         if (!NV10TexFormat(Picture->format))
93                 return FALSE;
94         if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
95                 return FALSE;
96         if (Picture->componentAlpha)
97                 return FALSE;
98         /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
99         if (Picture->repeat != RepeatNone)
100                 /* we can repeat 1x1 textures */
101                 if (!(w == 1 && h == 1))
102                         return FALSE;
103         return TRUE;
104 }
105
106 static Bool NV10CheckBuffer(PicturePtr Picture)
107 {
108         int w = Picture->pDrawable->width;
109         int h = Picture->pDrawable->height;
110
111         if ((w > 4096) || (h>4096))
112                 return FALSE;
113         if (Picture->componentAlpha) //this is used by rendercheck CA composite tests. not sure about real-life.
114                 return FALSE;
115         if (!NV10DstFormat(Picture->format))
116                 return FALSE;
117         return TRUE;
118 }
119
120 static Bool NV10CheckPictOp(int op)
121 {
122         if ( op == PictOpAtopReverse ) /*this op doesn't work right now*/
123                 {
124                 return FALSE;
125                 }
126         if ( op >= PictOpSaturate )
127                 { //we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear
128                 return FALSE;
129                 }
130         return TRUE;
131 }       
132
133 /* Check if the current operation is a doable A8 + A8 */
134 /* A8 destination is a special case, because we do it by having the card think 
135 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format, 
136 and without transformation or funny things.*/
137 static Bool NV10CheckA8_PLUS_A8_FEASABILITY(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)  
138 {
139         /*This does not work quite well yet so we fallback*/
140         return FALSE;
141         
142         if ((!msk) &&   (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) && 
143                                                                         ((op == PictOpAdd) || (op == PictOpSrc) || (op == PictOpClear) || (op == PictOpDst)) && (src->repeat == RepeatNone))
144                 {
145                 return TRUE;
146                 }
147         return FALSE;
148 }
149
150 #if 0
151 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
152 #else
153 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
154 #endif
155
156 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
157                              PicturePtr pMaskPicture,
158                              PicturePtr pDstPicture)
159 {
160         char out2[4096];
161         char * out = out2;
162         sprintf(out, "%s  ", reason);
163         out = out + strlen(out);
164         switch ( op )
165                 {
166                 case PictOpClear:
167                         sprintf(out, "PictOpClear ");
168                         break;
169                 case PictOpSrc:
170                         sprintf(out, "PictOpSrc ");
171                         break;
172                 case PictOpDst:
173                         sprintf(out, "PictOpDst ");
174                         break;
175                 case PictOpOver:
176                         sprintf(out, "PictOpOver ");
177                         break;
178                 case PictOpOutReverse:
179                         sprintf(out, "PictOpOutReverse ");
180                         break;
181                 case PictOpAdd:
182                         sprintf(out, "PictOpAdd ");
183                         break;
184                 default :
185                         sprintf(out, "PictOp%d ", op);
186                 }
187         out = out + strlen(out);
188         switch ( pSrcPicture->format )
189                 {
190                 case PICT_a8r8g8b8:
191                         sprintf(out, "A8R8G8B8 ");
192                         break;
193                 case PICT_x8r8g8b8:
194                         sprintf(out, "X8R8G8B8 ");
195                         break;
196                 case PICT_x8b8g8r8:
197                         sprintf(out, "X8B8G8R8 ");
198                         break;
199                 case PICT_r5g6b5:
200                         sprintf(out, "R5G6B5 ");
201                         break;
202                 case PICT_a8:
203                         sprintf(out, "A8 ");
204                         break;
205                 case PICT_a1:
206                         sprintf(out, "A1 ");
207                         break;
208                 default:
209                         sprintf(out, "%x ", pSrcPicture->format);
210                 }
211         out+=strlen(out);
212         sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
213         if ( pSrcPicture->repeat != RepeatNone )
214                 strcat(out, "R ");
215         strcat(out, "-> ");
216         out+=strlen(out);
217         
218         switch ( pDstPicture->format )
219                 {
220                 case PICT_a8r8g8b8:
221                         sprintf(out, "A8R8G8B8 ");
222                         break;
223                 case PICT_x8r8g8b8:
224                         sprintf(out, "X8R8G8B8  ");
225                         break;
226                 case PICT_x8b8g8r8:
227                         sprintf(out, "X8B8G8R8  ");
228                         break;
229                 case PICT_r5g6b5:
230                         sprintf(out, "R5G6B5 ");
231                         break;
232                 case PICT_a8:
233                         sprintf(out, "A8  ");
234                         break;
235                 case PICT_a1:
236                         sprintf(out, "A1  ");
237                         break;
238                 default:
239                         sprintf(out, "%x  ", pDstPicture->format);
240                 }
241         out+=strlen(out);
242         sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
243         if ( pDstPicture->repeat != RepeatNone )
244                 strcat(out, "R ");
245         out+=strlen(out);
246         if ( !pMaskPicture ) 
247                 sprintf(out, "& NONE");
248         else {
249         switch ( pMaskPicture->format )
250                 {
251                 case PICT_a8r8g8b8:
252                         sprintf(out, "& A8R8G8B8 ");
253                         break;
254                 case PICT_x8r8g8b8:
255                         sprintf(out, "& X8R8G8B8  ");
256                         break;
257                 case PICT_x8b8g8r8:
258                         sprintf(out, "& X8B8G8R8  ");
259                         break;
260                 case PICT_a8:
261                         sprintf(out, "& A8  ");
262                         break;
263                 case PICT_a1:
264                         sprintf(out, "& A1  ");
265                         break;
266                 default:
267                         sprintf(out, "& %x  ", pMaskPicture->format);
268                 }
269                 out+=strlen(out);
270                 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
271                 if ( pMaskPicture->repeat != RepeatNone )
272                         strcat(out, "R ");
273                 out+=strlen(out);
274         }
275         strcat(out, "\n");
276         xf86DrvMsg(0, X_INFO, out2);
277 }
278
279
280 Bool NV10CheckComposite(int     op,
281                              PicturePtr pSrcPicture,
282                              PicturePtr pMaskPicture,
283                              PicturePtr pDstPicture)
284 {
285         
286         if (NV10CheckA8_PLUS_A8_FEASABILITY(pSrcPicture,pMaskPicture,pDstPicture,op))
287                 { //A8 destination hack is OK ? - disabled by default anyway
288                 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
289                 return TRUE;
290                 }
291
292         if (!NV10CheckPictOp(op))
293                 {
294                 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
295                 return FALSE;
296                 }
297         if (!NV10CheckBuffer(pDstPicture)) 
298                 {
299                 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
300                 return FALSE;
301                 }
302                 
303         if (!NV10CheckTexture(pSrcPicture))
304                 {
305                 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
306                 return FALSE;
307                 }
308                 
309         if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
310                 {
311                 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
312                 return FALSE;
313                 }
314                 
315         NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
316         return TRUE;
317 }
318
319 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
320 {
321         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_OFFSET(unit), 1 );
322         OUT_RING  (NVAccelGetPixmapOffset(pixmap));
323         int log2w = log2i(Pict->pDrawable->width);
324         int log2h = log2i(Pict->pDrawable->height);
325         int w;
326         unsigned int txfmt =
327                         (NV10_TCL_PRIMITIVE_3D_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
328                         (NV10_TCL_PRIMITIVE_3D_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
329                         (log2w<<20) |
330                         (log2h<<16) |
331                         (1<<12) | /* lod == 1 */
332                         0x51 /* UNK */;
333
334         /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination, in that case we change the format
335         to use the POT (swizzled) matching format */
336         if (Pict->repeat != RepeatNone)
337         {
338                 if (Pict->format == PICT_a8)
339                         txfmt |= 0x80; /* A8 */
340                 else if (Pict->format == PICT_r5g6b5 )
341                         txfmt |= 0x280; /* R5G6B5 */
342                 else
343                         txfmt |= 0x300; /* ARGB format */
344         }
345         else
346         {
347                 if ( ! state.is_a8_plus_a8 )
348                         {
349                         txfmt |= NV10TexFormat(Pict->format);
350                         w = Pict->pDrawable->width;
351                         /* NPOT_SIZE expects an even number for width, we can round up uneven
352                         * numbers here because EXA always gives 64 byte aligned pixmaps
353                         * and for all formats we support 64 bytes represents an even number
354                         * of pixels
355                         */
356                         w = (w + 1) &~ 1;
357                         }
358                 else {
359                         txfmt |= NV10TexFormat(PICT_a8r8g8b8);
360                         w = (exaGetPixmapPitch(pixmap)) >> 2;
361                         }
362
363                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(unit), 1);
364                 OUT_RING  (exaGetPixmapPitch(pixmap) << 16);
365
366                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(unit), 1);
367                 OUT_RING  ((w<<16) | Pict->pDrawable->height);
368         }
369
370         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_FORMAT(unit), 1 );
371         OUT_RING  (txfmt);
372
373         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_ENABLE(unit), 1 );
374         OUT_RING  (NV10_TCL_PRIMITIVE_3D_TX_ENABLE_ENABLE);
375
376         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_FILTER(unit), 1);
377         if (Pict->filter == PictFilterNearest)
378                 OUT_RING  ((NV10_TCL_PRIMITIVE_3D_TX_FILTER_MAGNIFY_NEAREST) |
379                                 (NV10_TCL_PRIMITIVE_3D_TX_FILTER_MINIFY_NEAREST));
380         else
381                 OUT_RING  ((NV10_TCL_PRIMITIVE_3D_TX_FILTER_MAGNIFY_LINEAR) |
382                                 (NV10_TCL_PRIMITIVE_3D_TX_FILTER_MINIFY_LINEAR));
383
384         state.unit[unit].width          = (float)pixmap->drawable.width;
385         state.unit[unit].height         = (float)pixmap->drawable.height;
386         state.unit[unit].transform      = Pict->transform;
387 }
388
389 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
390 {
391         int i;
392         int x = 0;
393         int y = 0;
394         int w = 2048;
395         int h = 2048;
396
397         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BUFFER_FORMAT, 4);
398         if ( state.is_a8_plus_a8 )
399                 { /*A8 + A8 hack*/
400                 OUT_RING  (NV10DstFormat(PICT_a8r8g8b8));
401                 }
402         else {
403                 OUT_RING  (NV10DstFormat(Pict->format));
404                 }
405         
406         OUT_RING  (((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
407         OUT_RING  (NVAccelGetPixmapOffset(pixmap));
408         OUT_RING  (0);
409                 
410         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
411         OUT_RING  ((w<<16)|x);
412         OUT_RING  ((h<<16)|y);
413         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
414         OUT_RING  (0);
415         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 1);
416         OUT_RING  (((w-1+x)<<16)|x|0x08000800);
417         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(0), 1);
418         OUT_RING  (((h-1+y)<<16)|y|0x08000800);
419
420         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_PROJECTION_MATRIX(0), 16);
421         for(i=0;i<16;i++)
422                 if (i/4==i%4)
423                         OUT_RINGf (1.0f);
424                 else
425                         OUT_RINGf (0.0f);
426
427         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_RANGE_NEAR, 2);
428         OUT_RING  (0);
429 #if SCREEN_BPP == 32
430         OUT_RINGf (16777216.0);
431 #else
432         OUT_RINGf (65536.0);
433 #endif
434         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_SCALE_X, 4);
435         OUT_RINGf (-2048.0);
436         OUT_RINGf (-2048.0);
437         OUT_RINGf (0);
438         OUT_RING  (0);
439 }
440
441 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
442 {
443 /*This can be a bit difficult to understand at first glance.
444 Reg combiners are described here:
445 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
446         
447 Single texturing setup, without honoring vertex colors (non default setup) is:
448 Alpha RC 0 : a_0  * 1 + 0 * 0
449 RGB RC 0 : rgb_0 * 1 + 0 * 0
450 RC 1s are unused
451         
452 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
453         
454 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
455 Alpha RC 0 : a_0 * a_1 + 0 * 0
456 RGB RC0 : rgb_0 * a_1 + 0 * 0
457 RC 1s are unused
458         
459 */
460
461 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
462 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
463
464 #define A_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
465 #define B_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
466 #define C_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
467 #define D_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
468         
469 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
470 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
471 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
472 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
473
474 #define A_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
475 #define B_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
476 #define C_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
477 #define D_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
478
479 #define A_RGB_ONE (A_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
480 #define B_RGB_ONE (B_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
481 #define C_RGB_ONE (C_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
482 #define D_RGB_ONE (D_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
483
484                         
485         rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
486         if (src->format == PICT_x8r8g8b8)
487                 rc0_in_alpha |= A_ALPHA_ONE; //A = alpha = 1 everywhere
488         else
489                 rc0_in_alpha |= 0x18000000; //A = a_0, use texture 0 alpha value
490         
491         if ( ! mask ) 
492                 rc0_in_alpha |= B_ALPHA_ONE;
493         else 
494                 if ( mask->format == PICT_x8r8g8b8 )  //no alpha? ignore it
495                         rc0_in_alpha |= B_ALPHA_ONE;
496                 else
497                         rc0_in_alpha |= 0x00190000; //B = a_1, use texture 1 alpha value
498         
499         rc0_in_rgb |=  C_RGB_ZERO | D_RGB_ZERO;
500         if (src->format == PICT_a8 )
501                 rc0_in_rgb |= A_RGB_ZERO;
502         else 
503                 rc0_in_rgb |= 0x08000000; //A = rgb_0, use texture 0 rgb
504         
505         if ( ! mask )
506                 rc0_in_rgb |= B_RGB_ONE;
507         else 
508                 if (  mask->format == PICT_x8r8g8b8 )  //no alpha? ignore it
509                         rc0_in_rgb |= B_RGB_ONE;
510                 else
511                         rc0_in_rgb |= 0x00190000; //B = a_1, use texture 1 alpha value
512         
513         if ( state.is_a8_plus_a8 )
514                 {
515                 rc0_in_alpha = 0x18000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO; //A = a_0, use texture 0 alpha value
516                 rc0_in_rgb = 0x08000000 | B_RGB_ONE | C_RGB_ZERO | D_RGB_ZERO; //A = rgb_0, use texture 0 rgb
517                 }
518                 
519         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA(0), 12);
520         OUT_RING(rc0_in_alpha);
521         OUT_RING  (rc1_in_alpha);
522         OUT_RING (rc0_in_rgb);
523         OUT_RING  (rc1_in_rgb);
524         OUT_RING  (0); /*COLOR 0*/
525         OUT_RING  (0); /*COLOR 1*/
526         OUT_RING  (0x00000c00);
527         OUT_RING  (0);
528         OUT_RING  (0x000010cd);
529         OUT_RING  (0x18000000);
530         OUT_RING  (0x300e0300);
531         OUT_RING  (0x0c091c80);
532 }
533
534 static void NV10SetPictOp(NVPtr pNv,int op, int sf, int df)
535 {
536         struct {int src;int dst;} pictops[] =
537         {
538                 {0x0000,0x0000}, // PictOpClear
539                 {0x0001,0x0000}, // PictOpSrc 
540                 {0x0000,0x0001}, // PictOpDst
541                 {0x0001,0x0303}, // PictOpOver
542                 {0x0305,0x0001}, // PictOpOverReverse
543                 {0x0304,0x0000}, // PictOpIn
544                 {0x0000,0x0302}, // PictOpInReverse
545                 {0x0305,0x0000}, // PictOpOut
546                 {0x0000,0x0303}, // PictOpOutReverse
547                 {0x0304,0x0303}, // PictOpAtop
548                 {0x0305,0x0302}, // PictOpAtopReverse
549                 {0x0305,0x0303}, // PictOpXor
550                 {0x0001,0x0001}, // PictOpAdd
551         };
552         
553         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_SRC, 2);
554         OUT_RING  (pictops[op].src);
555         OUT_RING  (pictops[op].dst);
556         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
557         OUT_RING  (1);
558 }
559
560 Bool NV10PrepareComposite(int     op,
561                                PicturePtr pSrcPicture,
562                                PicturePtr pMaskPicture,
563                                PicturePtr pDstPicture,
564                                PixmapPtr  pSrc,
565                                PixmapPtr  pMask,
566                                PixmapPtr  pDst)
567 {
568         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
569         NVPtr pNv = NVPTR(pScrn);
570
571         if (NV10CheckA8_PLUS_A8_FEASABILITY(pSrcPicture,pMaskPicture,pDstPicture,op))
572                 { //is this our A8 + A8 hack?
573                 state.have_mask = FALSE;
574                 state.is_a8_plus_a8 = TRUE;
575                 NV10SetBuffer(pNv,pDstPicture,pDst);
576                 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
577                 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
578                 NV10SetPictOp(pNv, op, PICT_r5g6b5, PICT_r5g6b5);
579                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
580                 OUT_RING  (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_QUADS);
581                 return TRUE;
582                 }
583         
584         state.is_a8_plus_a8 = FALSE;
585                 
586         /* Set dst format */
587         NV10SetBuffer(pNv,pDstPicture,pDst);
588
589         /* Set src format */
590         NV10SetTexture(pNv,0,pSrcPicture,pSrc);
591
592         /* Set mask format */
593         if (pMaskPicture)
594                 NV10SetTexture(pNv,1,pMaskPicture,pMask);
595
596         NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
597
598         /* Set PictOp */
599         NV10SetPictOp(pNv, op, pSrcPicture->format, pDstPicture->format);
600
601         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
602         OUT_RING  (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_QUADS);
603
604         state.have_mask=(pMaskPicture!=NULL);
605         return TRUE;
606 }
607
608 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
609 {
610         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_2F_S, 2);
611         OUT_RINGf (tx);
612         OUT_RINGf (ty);
613         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_POS_3F_X, 3);
614         OUT_RINGf (vx);
615         OUT_RINGf (vy);
616         OUT_RINGf (0.f);
617 }
618
619 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
620 {
621         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_2F_S, 2);
622         OUT_RINGf (t0x);
623         OUT_RINGf (t0y);
624         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX1_2F_S, 2);
625         OUT_RINGf (t1x);
626         OUT_RINGf (t1y);
627         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_POS_3F_X, 3);
628         OUT_RINGf (vx);
629         OUT_RINGf (vy);
630         OUT_RINGf (0.f);
631 }
632
633 #define xFixedToFloat(v) \
634         ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
635
636 static void
637 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
638                                           float *x_ret, float *y_ret)
639 {
640         PictVector v;
641
642         if (t) {
643                 v.vector[0] = IntToxFixed(x);
644                 v.vector[1] = IntToxFixed(y);
645                 v.vector[2] = xFixed1;
646                 PictureTransformPoint(t, &v);
647                 *x_ret = xFixedToFloat(v.vector[0]);
648                 *y_ret = xFixedToFloat(v.vector[1]);
649         } else {
650                 *x_ret = (float)x;
651                 *y_ret = (float)y;
652         }
653 }
654
655
656 void NV10Composite(PixmapPtr pDst,
657                         int       srcX,
658                         int       srcY,
659                         int       maskX,
660                         int       maskY,
661                         int       dstX,
662                         int       dstY,
663                         int       width,
664                         int       height)
665 {
666         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
667         NVPtr pNv = NVPTR(pScrn);
668         float sX0, sX1, sY0, sY1;
669         float mX0, mX1, mY0, mY1;
670         
671         if ( state.is_a8_plus_a8 )
672                 {
673                 //xf86DrvMsg(0, X_INFO, "Composite hack part - srcX %d srcY %d dstX %d dstY %d w %d h %d\n", srcX, srcY, dstX, dstY, width, height);            
674                 srcX = srcX >> 2;
675                 dstX = dstX >> 2;
676                 width = ((width + 3) &~ 3) >> 2;                
677                 }
678                 
679         NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
680                               state.unit[0].width,
681                               state.unit[0].height, &sX0, &sY0);
682         NV10EXATransformCoord(state.unit[0].transform,
683                               srcX + width, srcY + height,
684                               state.unit[0].width,
685                               state.unit[0].height, &sX1, &sY1);
686
687         if (state.have_mask) {
688                 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
689                                       state.unit[1].width,
690                                       state.unit[1].height, &mX0, &mY0);
691                 NV10EXATransformCoord(state.unit[1].transform,
692                                       maskX + width, maskY + height,
693                                       state.unit[1].width,
694                                       state.unit[1].height, &mX1, &mY1);
695                 NV10MVertex(pNv , dstX         ,          dstY,sX0 , sY0 , mX0 , mY0);
696                 NV10MVertex(pNv , dstX + width ,          dstY,sX1 , sY0 , mX1 , mY0);
697                 NV10MVertex(pNv , dstX + width , dstY + height,sX1 , sY1 , mX1 , mY1);
698                 NV10MVertex(pNv , dstX         , dstY + height,sX0 , sY1 , mX0 , mY1);
699         } else {
700                 NV10Vertex(pNv , dstX         ,          dstY , sX0 , sY0);
701                 NV10Vertex(pNv , dstX + width ,          dstY , sX1 , sY0);
702                 NV10Vertex(pNv , dstX + width , dstY + height , sX1 , sY1);
703                 NV10Vertex(pNv , dstX         , dstY + height , sX0 , sY1);
704         }
705
706         FIRE_RING();
707 }
708
709 void NV10DoneComposite (PixmapPtr pDst)
710 {
711         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
712         NVPtr pNv = NVPTR(pScrn);
713
714         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
715         OUT_RING  (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_STOP);
716
717         exaMarkSync(pDst->drawable.pScreen);
718 }
719
720
721 Bool
722 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
723 {
724         NVPtr pNv = NVPTR(pScrn);
725         static int have_object = FALSE;
726         uint32_t class = 0, chipset;
727         int i;
728
729         chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
730         if (    ((chipset & 0xf0) != NV_ARCH_10) &&
731                 ((chipset & 0xf0) != NV_ARCH_20) )
732                 return FALSE;
733
734         if (chipset>=0x20)
735                 class = NV11_TCL_PRIMITIVE_3D;
736         else if (chipset>=0x17)
737                 class = NV17_TCL_PRIMITIVE_3D;
738         else if (chipset>=0x11)
739                 class = NV11_TCL_PRIMITIVE_3D;
740         else
741                 class = NV10_TCL_PRIMITIVE_3D;
742
743         if (!have_object) {
744                 if (!NVDmaCreateContextObject(pNv, Nv3D, class))
745                         return FALSE;
746                 have_object = TRUE;
747         }
748
749         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_NOTIFY, 1);
750         OUT_RING  (NvNullObject);
751
752         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_IN_MEMORY0, 2);
753         OUT_RING  (NvDmaFB);
754         OUT_RING  (NvDmaTT);
755
756         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_IN_MEMORY2, 2);
757         OUT_RING  (NvDmaFB);
758         OUT_RING  (NvDmaFB);
759
760         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
761         OUT_RING  (0);
762
763         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
764         OUT_RING  (0);
765         OUT_RING  (0);
766
767         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 1);
768         OUT_RING  ((0x7ff<<16)|0x800);
769         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(0), 1);
770         OUT_RING  ((0x7ff<<16)|0x800);
771
772         for (i=1;i<8;i++) {
773                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(i), 1);
774                 OUT_RING  (0);
775                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(i), 1);
776                 OUT_RING  (0);
777         }
778
779         BEGIN_RING(Nv3D, 0x290, 1);
780         OUT_RING  ((0x10<<16)|1);
781         BEGIN_RING(Nv3D, 0x3f4, 1);
782         OUT_RING  (0);
783
784 //      BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOTIFY, 1);
785 //      OUT_RING  (0);
786         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
787         OUT_RING  (0);
788
789         if (class != NV10_TCL_PRIMITIVE_3D) {
790                 /* For nv11, nv17 */
791                 BEGIN_RING(Nv3D, 0x120, 3);
792                 OUT_RING  (0);
793                 OUT_RING  (1);
794                 OUT_RING  (2);
795
796                 BEGIN_RING(NvImageBlit, 0x120, 3);
797                 OUT_RING  (0);
798                 OUT_RING  (1);
799                 OUT_RING  (2);
800
801                 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
802                 OUT_RING  (0);
803         }
804
805         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
806         OUT_RING  (0);
807
808         /* Set state */
809         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_ENABLE, 1);
810         OUT_RING  (0);
811         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
812         OUT_RING  (0);
813         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ALPHA_FUNC_FUNC, 2);
814         OUT_RING  (0x207);
815         OUT_RING  (0);
816         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_ENABLE(0), 2);
817         OUT_RING  (0);
818         OUT_RING  (0);
819         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA(0), 12);
820         OUT_RING  (0x30141010);
821         OUT_RING  (0);
822         OUT_RING  (0x20040000);
823         OUT_RING  (0);
824         OUT_RING  (0);
825         OUT_RING  (0);
826         OUT_RING  (0x00000c00);
827         OUT_RING  (0);
828         OUT_RING  (0x00000c00);
829         OUT_RING  (0x18000000);
830         OUT_RING  (0x300e0300);
831         OUT_RING  (0x0c091c80);
832         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
833         OUT_RING  (0);
834         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DITHER_ENABLE, 2);
835         OUT_RING  (1);
836         OUT_RING  (0);
837         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_SMOOTH_ENABLE, 1);
838         OUT_RING  (0);
839         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_WEIGHT_ENABLE, 2);
840         OUT_RING  (0);
841         OUT_RING  (0);
842         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_SRC, 4);
843         OUT_RING  (1);
844         OUT_RING  (0);
845         OUT_RING  (0);
846         OUT_RING  (0x8006);
847         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_STENCIL_MASK, 8);
848         OUT_RING  (0xff);
849         OUT_RING  (0x207);
850         OUT_RING  (0);
851         OUT_RING  (0xff);
852         OUT_RING  (0x1e00);
853         OUT_RING  (0x1e00);
854         OUT_RING  (0x1e00);
855         OUT_RING  (0x1d01);
856         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NORMALIZE_ENABLE, 1);
857         OUT_RING  (0);
858         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_ENABLE, 2);
859         OUT_RING  (0);
860         OUT_RING  (0);
861         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LIGHT_MODEL, 1);
862         OUT_RING  (0);
863         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_COLOR_CONTROL, 1);
864         OUT_RING  (0);
865         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ENABLED_LIGHTS, 1);
866         OUT_RING  (0);
867         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_OFFSET_POINT_ENABLE, 3);
868         OUT_RING  (0);
869         OUT_RING  (0);
870         OUT_RING  (0);
871         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_FUNC, 1);
872         OUT_RING  (0x201);
873         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 1);
874         OUT_RING  (0);
875         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_TEST_ENABLE, 1);
876         OUT_RING  (0);
877         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR, 2);
878         OUT_RING  (0);
879         OUT_RING  (0);
880         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POINT_SIZE, 1);
881         OUT_RING  (8);
882         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POINT_PARAMETERS_ENABLE, 2);
883         OUT_RING  (0);
884         OUT_RING  (0);
885         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_WIDTH, 1);
886         OUT_RING  (8);
887         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_SMOOTH_ENABLE, 1);
888         OUT_RING  (0);
889         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
890         OUT_RING  (0x1b02);
891         OUT_RING  (0x1b02);
892         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CULL_FACE, 2);
893         OUT_RING  (0x405);
894         OUT_RING  (0x901);
895         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_SMOOTH_ENABLE, 1);
896         OUT_RING  (0);
897         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
898         OUT_RING  (0);
899         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE(0), 8);
900         for (i=0;i<8;i++) {
901                 OUT_RING  (0);
902         }
903         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_EQUATION_CONSTANT, 3);
904         OUT_RING  (0x3fc00000); /* -1.50 */
905         OUT_RING  (0xbdb8aa0a); /* -0.09 */
906         OUT_RING  (0);          /*  0.00 */
907
908         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
909         OUT_RING  (0);
910
911         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_MODE, 2);
912         OUT_RING  (0x802);
913         OUT_RING  (2);
914         /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
915          * using texturing, except when using the texture matrix
916          */
917         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEW_MATRIX_ENABLE, 1);
918         OUT_RING  (6);
919         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
920         OUT_RING  (0x01010101);
921
922         /* Set vertex component */
923         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_COL_4F_R, 4);
924         OUT_RINGf (1.0);
925         OUT_RINGf (1.0);
926         OUT_RINGf (1.0);
927         OUT_RINGf (1.0);
928         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_COL2_3F_R, 3);
929         OUT_RING  (0);
930         OUT_RING  (0);
931         OUT_RING  (0);
932         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_NOR_3F_X, 3);
933         OUT_RING  (0);
934         OUT_RING  (0);
935         OUT_RINGf (1.0);
936         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_4F_S, 4);
937         OUT_RINGf (0.0);
938         OUT_RINGf (0.0);
939         OUT_RINGf (0.0);
940         OUT_RINGf (1.0);
941         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX1_4F_S, 4);
942         OUT_RINGf (0.0);
943         OUT_RINGf (0.0);
944         OUT_RINGf (0.0);
945         OUT_RINGf (1.0);
946         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_FOG_1F, 1);
947         OUT_RINGf (0.0);
948         BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_EDGEFLAG_ENABLE, 1);
949         OUT_RING  (1);
950
951         return TRUE;
952 }
953
954
955
956