2 * Copyright 2007 Stephane Marchesin
3 * Copyright 2007 Arthur Huillet
4 * Copyright 2007 Peter Winters
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "nv_include.h"
31 typedef struct nv10_exa_state {
33 Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
35 PictTransformPtr transform;
40 static nv10_exa_state_t state;
42 static int NV10TexFormat(int ExaFormat)
44 struct {int exa;int hw;} tex_format[] =
46 {PICT_a8r8g8b8, 0x900},
47 {PICT_x8r8g8b8, 0x900},
48 {PICT_r5g6b5, 0x880}, /*this one was only tested with rendercheck*/
49 /*{PICT_a1r5g5b5, NV10TCL_TX_FORMAT_FORMAT_R5G5B5A1},
50 {PICT_a4r4g4b4, NV10TCL_TX_FORMAT_FORMAT_R4G4B4A4},*/
51 {PICT_a8, 0x980}, /*this is a NV1x only format, corresponding NV2x is 0xD80, we hack it in below*/
55 for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57 if(tex_format[i].exa==ExaFormat)
58 return tex_format[i].hw;
64 static int NV10DstFormat(int ExaFormat)
66 struct {int exa;int hw;} dst_format[] =
68 {PICT_a8r8g8b8, 0x108},
69 {PICT_x8r8g8b8, 0x108},
74 for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
76 if(dst_format[i].exa==ExaFormat)
77 return dst_format[i].hw;
83 static Bool NV10CheckTexture(PicturePtr Picture)
85 int w = Picture->pDrawable->width;
86 int h = Picture->pDrawable->height;
88 if ((w > 2046) || (h>2046))
90 if (!NV10TexFormat(Picture->format))
92 if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
94 if (Picture->componentAlpha)
96 /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
97 if (Picture->repeat != RepeatNone)
98 /* we can repeat 1x1 textures */
99 if (!(w == 1 && h == 1))
104 static Bool NV10CheckBuffer(PicturePtr Picture)
106 int w = Picture->pDrawable->width;
107 int h = Picture->pDrawable->height;
109 if ((w > 4096) || (h>4096))
111 if (Picture->componentAlpha)
113 if (!NV10DstFormat(Picture->format))
118 static Bool NV10CheckPictOp(int op)
120 if ( op == PictOpAtopReverse ) /*this op doesn't work*/
124 if ( op >= PictOpSaturate )
125 { /*we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear*/
131 /* Check if the current operation is a doable A8 + A8 */
132 /* A8 destination is a special case, because we do it by having the card think
133 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format,
134 and without transformation or funny things.*/
135 static Bool NV10Check_A8plusA8_Feasability(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)
137 #if X_BYTE_ORDER == X_BIG_ENDIAN
140 if ((!msk) && (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) &&
141 (op == PictOpAdd) && (src->repeat == RepeatNone))
149 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
151 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
154 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
155 PicturePtr pMaskPicture,
156 PicturePtr pDstPicture)
160 sprintf(out, "%s ", reason);
161 out = out + strlen(out);
165 sprintf(out, "PictOpClear ");
168 sprintf(out, "PictOpSrc ");
171 sprintf(out, "PictOpDst ");
174 sprintf(out, "PictOpOver ");
176 case PictOpOutReverse:
177 sprintf(out, "PictOpOutReverse ");
180 sprintf(out, "PictOpAdd ");
183 sprintf(out, "PictOp%d ", op);
185 out = out + strlen(out);
186 switch ( pSrcPicture->format )
189 sprintf(out, "A8R8G8B8 ");
192 sprintf(out, "X8R8G8B8 ");
195 sprintf(out, "X8B8G8R8 ");
198 sprintf(out, "R5G6B5 ");
207 sprintf(out, "%x ", pSrcPicture->format);
210 sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
211 if ( pSrcPicture->repeat != RepeatNone )
216 switch ( pDstPicture->format )
219 sprintf(out, "A8R8G8B8 ");
222 sprintf(out, "X8R8G8B8 ");
225 sprintf(out, "X8B8G8R8 ");
228 sprintf(out, "R5G6B5 ");
237 sprintf(out, "%x ", pDstPicture->format);
240 sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
241 if ( pDstPicture->repeat != RepeatNone )
245 sprintf(out, "& NONE");
247 switch ( pMaskPicture->format )
250 sprintf(out, "& A8R8G8B8 ");
253 sprintf(out, "& X8R8G8B8 ");
256 sprintf(out, "& X8B8G8R8 ");
259 sprintf(out, "& A8 ");
262 sprintf(out, "& A1 ");
265 sprintf(out, "& %x ", pMaskPicture->format);
268 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
269 if ( pMaskPicture->repeat != RepeatNone )
274 xf86DrvMsg(0, X_INFO, out2);
278 Bool NV10CheckComposite(int op,
279 PicturePtr pSrcPicture,
280 PicturePtr pMaskPicture,
281 PicturePtr pDstPicture)
284 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
286 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
290 if (!NV10CheckPictOp(op))
292 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
295 if (!NV10CheckBuffer(pDstPicture))
297 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
301 if (!NV10CheckTexture(pSrcPicture))
303 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
307 if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
309 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
313 NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
317 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
319 struct nouveau_channel *chan = pNv->chan;
320 struct nouveau_grobj *celcius = pNv->Nv3D;
321 struct nouveau_pixmap *nvpix = nouveau_pixmap(pixmap);
322 int log2w = log2i(Pict->pDrawable->width);
323 int log2h = log2i(Pict->pDrawable->height);
326 (NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
327 (NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
330 (1<<12) | /* lod == 1 */
333 BEGIN_RING(chan, celcius, NV10TCL_TX_OFFSET(unit), 1 );
334 OUT_RELOCl(chan, nvpix->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
336 /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination,
337 in that case we change the format to use the POT (swizzled) matching format */
338 if (Pict->repeat != RepeatNone)
340 if (Pict->format == PICT_a8)
341 txfmt |= 0x80; /* A8 */
342 else if (Pict->format == PICT_r5g6b5 )
343 txfmt |= 0x280; /* R5G6B5 */
345 txfmt |= 0x300; /* ARGB format */
349 if (pNv->Architecture == NV_ARCH_20 && Pict->format == PICT_a8 )
351 else txfmt |= NV10TexFormat(Pict->format);
352 w = Pict->pDrawable->width;
353 /* NPOT_SIZE expects an even number for width, we can round up uneven
354 * numbers here because EXA always gives 64 byte aligned pixmaps
355 * and for all formats we support 64 bytes represents an even number
360 BEGIN_RING(chan, celcius, NV10TCL_TX_NPOT_PITCH(unit), 1);
361 OUT_RING (chan, exaGetPixmapPitch(pixmap) << 16);
363 BEGIN_RING(chan, celcius, NV10TCL_TX_NPOT_SIZE(unit), 1);
364 OUT_RING (chan, (w<<16) | Pict->pDrawable->height);
367 BEGIN_RING(chan, celcius, NV10TCL_TX_FORMAT(unit), 1 );
368 OUT_RING (chan, txfmt);
370 BEGIN_RING(chan, celcius, NV10TCL_TX_ENABLE(unit), 1 );
371 OUT_RING (chan, NV10TCL_TX_ENABLE_ENABLE);
373 BEGIN_RING(chan, celcius, NV10TCL_TX_FILTER(unit), 1);
374 if (Pict->filter == PictFilterNearest)
375 OUT_RING (chan, (NV10TCL_TX_FILTER_MAGNIFY_NEAREST) |
376 (NV10TCL_TX_FILTER_MINIFY_NEAREST));
378 OUT_RING (chan, (NV10TCL_TX_FILTER_MAGNIFY_LINEAR) |
379 (NV10TCL_TX_FILTER_MINIFY_LINEAR));
381 state.unit[unit].width = (float)pixmap->drawable.width;
382 state.unit[unit].height = (float)pixmap->drawable.height;
383 state.unit[unit].transform = Pict->transform;
386 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
388 struct nouveau_channel *chan = pNv->chan;
389 struct nouveau_grobj *celcius = pNv->Nv3D;
390 struct nouveau_pixmap *nvpix = nouveau_pixmap(pixmap);
397 BEGIN_RING(chan, celcius, NV10TCL_RT_FORMAT, 4);
398 if ( state.is_a8_plus_a8 )
400 OUT_RING (chan, NV10DstFormat(PICT_a8r8g8b8));
403 OUT_RING (chan, NV10DstFormat(Pict->format));
406 OUT_RING (chan, ((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
407 OUT_RELOCl(chan, nvpix->bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
410 BEGIN_RING(chan, celcius, NV10TCL_RT_HORIZ, 2);
411 OUT_RING (chan, (w<<16)|x);
412 OUT_RING (chan, (h<<16)|y);
413 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
415 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
416 OUT_RING (chan, ((w-1+x)<<16)|x|0x08000800);
417 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
418 OUT_RING (chan, ((h-1+y)<<16)|y|0x08000800);
420 BEGIN_RING(chan, celcius, NV10TCL_PROJECTION_MATRIX(0), 16);
423 OUT_RINGf (chan, 1.0f);
425 OUT_RINGf (chan, 0.0f);
427 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_RANGE_NEAR, 2);
430 OUT_RINGf (chan, 16777216.0);
432 OUT_RINGf (chan, 65536.0);
434 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_SCALE_X, 4);
435 OUT_RINGf (chan, -2048.0);
436 OUT_RINGf (chan, -2048.0);
441 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
443 struct nouveau_channel *chan = pNv->chan;
444 struct nouveau_grobj *celcius = pNv->Nv3D;
446 /*This can be a bit difficult to understand at first glance.
447 Reg combiners are described here:
448 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
450 Single texturing setup, without honoring vertex colors (non default setup) is:
451 Alpha RC 0 : a_0 * 1 + 0 * 0
452 RGB RC 0 : rgb_0 * 1 + 0 * 0
454 Final combiner uses default setup
456 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
458 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
459 Alpha RC 0 : a_0 * a_1 + 0 * 0
460 RGB RC0 : rgb_0 * a_1 + 0 * 0
462 Final combiner uses default setup
466 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
467 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
468 unsigned int color0 = 0, color1 = 0;
469 #define A_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
470 #define B_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
471 #define C_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
472 #define D_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
474 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
475 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
476 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
477 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
479 #define A_RGB_ZERO (NV10TCL_RC_IN_RGB_A_INPUT_ZERO | NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
480 #define B_RGB_ZERO (NV10TCL_RC_IN_RGB_B_INPUT_ZERO | NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
481 #define C_RGB_ZERO (NV10TCL_RC_IN_RGB_C_INPUT_ZERO | NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
482 #define D_RGB_ZERO (NV10TCL_RC_IN_RGB_D_INPUT_ZERO | NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
484 #define A_RGB_ONE (A_RGB_ZERO | NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
485 #define B_RGB_ONE (B_RGB_ZERO | NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
486 #define C_RGB_ONE (C_RGB_ZERO | NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
487 #define D_RGB_ONE (D_RGB_ZERO | NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
489 rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
490 if (src->format == PICT_x8r8g8b8)
491 rc0_in_alpha |= A_ALPHA_ONE;
493 rc0_in_alpha |= 0x18000000;
496 rc0_in_alpha |= B_ALPHA_ONE;
498 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
499 rc0_in_alpha |= B_ALPHA_ONE;
501 rc0_in_alpha |= 0x00190000; /*B = a_1*/
503 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
504 if (src->format == PICT_a8 )
505 rc0_in_rgb |= A_RGB_ZERO;
507 rc0_in_rgb |= 0x08000000; /*A = rgb_0*/
510 rc0_in_rgb |= B_RGB_ONE;
512 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
513 rc0_in_rgb |= B_RGB_ONE;
515 rc0_in_rgb |= 0x00190000; /*B = a_1*/
517 BEGIN_RING(chan, celcius, NV10TCL_RC_IN_ALPHA(0), 6);
518 OUT_RING (chan, rc0_in_alpha);
519 OUT_RING (chan, rc1_in_alpha);
520 OUT_RING (chan, rc0_in_rgb);
521 OUT_RING (chan, rc1_in_rgb);
522 OUT_RING (chan, color0); /*COLOR 0*/
523 OUT_RING (chan, color1); /*COLOR 1*/
526 static void NV10SetRegCombs_A8plusA8(NVPtr pNv, int pass, int mask_out_bytes)
528 struct nouveau_channel *chan = pNv->chan;
529 struct nouveau_grobj *celcius = pNv->Nv3D;
530 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
531 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
532 unsigned int color0 = 0, color1 = 0;
536 if ( mask_out_bytes & 1 )
537 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
538 else rc0_in_alpha = 0x19000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO;
540 rc0_in_rgb = C_RGB_ZERO | D_RGB_ZERO;
542 if ( mask_out_bytes & 2 )
543 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
544 else rc0_in_rgb |= 0x18000000 | 0x00010000;
546 color0 = 0x00ff0000; /*R = 1 G = 0 B = 0*/
549 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
555 if ( mask_out_bytes & 8 )
556 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
557 else rc0_in_rgb |= 0x18000000 | 0x00010000; /*A = a_0, B= cst color 0*/
561 if ( mask_out_bytes & 4)
562 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
563 else rc0_in_rgb |= 0x1900 | 0x02; /*C = a_1, D = cst color 1*/
565 color1 = 0x0000ff00; /*R = 0, G = 1, B = 0*/
568 BEGIN_RING(chan, celcius, NV10TCL_RC_IN_ALPHA(0), 6);
569 OUT_RING (chan, rc0_in_alpha);
570 OUT_RING (chan, rc1_in_alpha);
571 OUT_RING (chan, rc0_in_rgb);
572 OUT_RING (chan, rc1_in_rgb);
573 OUT_RING (chan, color0); /*COLOR 0*/
574 OUT_RING (chan, color1); /*COLOR 1*/
577 static void NV10SetPictOp(NVPtr pNv,int op)
579 struct nouveau_channel *chan = pNv->chan;
580 struct nouveau_grobj *celcius = pNv->Nv3D;
581 struct {int src;int dst;} pictops[] =
583 {0x0000,0x0000}, /* PictOpClear */
584 {0x0001,0x0000}, /* PictOpSrc */
585 {0x0000,0x0001}, /* PictOpDst */
586 {0x0001,0x0303}, /* PictOpOver */
587 {0x0305,0x0001}, /* PictOpOverReverse */
588 {0x0304,0x0000}, /* PictOpIn */
589 {0x0000,0x0302}, /* PictOpInReverse */
590 {0x0305,0x0000}, /* PictOpOut */
591 {0x0000,0x0303}, /* PictOpOutReverse */
592 {0x0304,0x0303}, /* PictOpAtop */
593 {0x0305,0x0302}, /* PictOpAtopReverse - DOES NOT WORK*/
594 {0x0305,0x0303}, /* PictOpXor */
595 {0x0001,0x0001}, /* PictOpAdd */
598 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_SRC, 2);
599 OUT_RING (chan, pictops[op].src);
600 OUT_RING (chan, pictops[op].dst);
601 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_ENABLE, 1);
605 Bool NV10PrepareComposite(int op,
606 PicturePtr pSrcPicture,
607 PicturePtr pMaskPicture,
608 PicturePtr pDstPicture,
613 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
614 NVPtr pNv = NVPTR(pScrn);
615 struct nouveau_channel *chan = pNv->chan;
616 struct nouveau_grobj *celcius = pNv->Nv3D;
618 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
620 state.have_mask = FALSE;
621 state.is_a8_plus_a8 = TRUE;
622 NV10SetBuffer(pNv,pDstPicture,pDst);
623 NV10SetPictOp(pNv, op);
624 NV10SetTexture(pNv, 0, pSrcPicture, pSrc);
625 NV10SetTexture(pNv, 1, pSrcPicture, pSrc);
629 state.is_a8_plus_a8 = FALSE;
632 NV10SetBuffer(pNv,pDstPicture,pDst);
635 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
637 /* Set mask format */
639 NV10SetTexture(pNv,1,pMaskPicture,pMask);
641 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
644 NV10SetPictOp(pNv, op);
646 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
647 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
649 state.have_mask=(pMaskPicture!=NULL);
653 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
655 struct nouveau_channel *chan = pNv->chan;
656 struct nouveau_grobj *celcius = pNv->Nv3D;
658 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_2F_S, 2);
659 OUT_RINGf (chan, tx);
660 OUT_RINGf (chan, ty);
661 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_POS_3F_X, 3);
662 OUT_RINGf (chan, vx);
663 OUT_RINGf (chan, vy);
664 OUT_RINGf (chan, 0.f);
667 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
669 struct nouveau_channel *chan = pNv->chan;
670 struct nouveau_grobj *celcius = pNv->Nv3D;
672 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_2F_S, 2);
673 OUT_RINGf (chan, t0x);
674 OUT_RINGf (chan, t0y);
675 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX1_2F_S, 2);
676 OUT_RINGf (chan, t1x);
677 OUT_RINGf (chan, t1y);
678 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_POS_3F_X, 3);
679 OUT_RINGf (chan, vx);
680 OUT_RINGf (chan, vy);
681 OUT_RINGf (chan, 0.f);
684 #define xFixedToFloat(v) \
685 ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
688 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
689 float *x_ret, float *y_ret)
694 v.vector[0] = IntToxFixed(x);
695 v.vector[1] = IntToxFixed(y);
696 v.vector[2] = xFixed1;
697 PictureTransformPoint(t, &v);
698 *x_ret = xFixedToFloat(v.vector[0]);
699 *y_ret = xFixedToFloat(v.vector[1]);
707 void NV10Composite(PixmapPtr pDst,
717 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
718 NVPtr pNv = NVPTR(pScrn);
719 struct nouveau_channel *chan = pNv->chan;
720 struct nouveau_grobj *celcius = pNv->Nv3D;
721 float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
722 float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
724 NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
726 state.unit[0].height, &sX0, &sY0);
727 NV10EXATransformCoord(state.unit[0].transform,
730 state.unit[0].height, &sX1, &sY1);
731 NV10EXATransformCoord(state.unit[0].transform,
732 srcX + width, srcY + height,
734 state.unit[0].height, &sX2, &sY2);
735 NV10EXATransformCoord(state.unit[0].transform,
738 state.unit[0].height, &sX3, &sY3);
740 if ( state.is_a8_plus_a8 )
742 /*We do A8 + A8 in 2-pass : setup the source texture as A8 twice,
743 with different tex coords, do B and G on first pass
744 Then setup again and do R and A on second pass
748 int mask_out_bytes = 0;
750 part_pos_dX = (dstX &~ 3) >> 2; /*we start at the 4byte boundary to the left of the image*/
751 part_pos_sX = sX0 + (dstX &~ 3) - dstX;
753 /*xf86DrvMsg(0, X_INFO, "drawing - srcX %f dstX %d w %d\n", sX0, dstX, width);*/
754 for ( ; part_pos_dX <= (((dstX + width) &~ 3) >> 2); part_pos_sX += 4, part_pos_dX ++ )
757 if ( part_pos_dX == (dstX &~ 3) >> 2 ) /*then we're slightly on the left of the image, bytes to mask out*/
759 /*xf86DrvMsg(0, X_INFO, "on left border...\n");*/
760 switch ( dstX - (dstX &~ 3) ) /*mask out the extra pixels on the left*/
763 mask_out_bytes |= 1 << 0;
765 mask_out_bytes |= 1 << 1;
767 mask_out_bytes |= 1 << 2;
769 mask_out_bytes |= 1 << 3;
774 /*mask out extra pixels on the right, in case the picture never touches an alignment marker*/
775 switch ( width + (dstX & 3) )
778 mask_out_bytes |= 1 << 3;
780 mask_out_bytes |= 1 << 2;
782 mask_out_bytes |= 1 << 1;
784 mask_out_bytes |= 1 << 0;
788 else if ( part_pos_dX == (((dstX + width) &~ 3) >> 2) )
790 /*xf86DrvMsg(0, X_INFO, "on right border...\n");*/
791 switch (4 - ((dstX + width) & 3))
794 mask_out_bytes |= 1 << 3;
796 mask_out_bytes |= 1 << 2;
798 mask_out_bytes |= 1 << 1;
800 mask_out_bytes |= 1 << 0;
808 NV10SetRegCombs_A8plusA8(pNv, 0, mask_out_bytes);
809 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
810 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
812 NV10MVertex(pNv , part_pos_dX , dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
813 NV10MVertex(pNv , part_pos_dX + 1, dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
814 NV10MVertex(pNv , part_pos_dX + 1, dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
815 NV10MVertex(pNv , part_pos_dX , dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
817 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
818 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
822 NV10SetRegCombs_A8plusA8(pNv, 1, mask_out_bytes);
823 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
824 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
826 NV10MVertex(pNv , part_pos_dX, dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
827 NV10MVertex(pNv , part_pos_dX + 1 , dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
828 NV10MVertex(pNv , part_pos_dX + 1 , dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
829 NV10MVertex(pNv , part_pos_dX, dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
831 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
832 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
836 else if (state.have_mask) {
837 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
839 state.unit[1].height, &mX0, &mY0);
840 NV10EXATransformCoord(state.unit[1].transform,
841 maskX + width, maskY,
843 state.unit[1].height, &mX1, &mY1);
844 NV10EXATransformCoord(state.unit[1].transform,
845 maskX + width, maskY + height,
847 state.unit[1].height, &mX2, &mY2);
848 NV10EXATransformCoord(state.unit[1].transform,
849 maskX, maskY + height,
851 state.unit[1].height, &mX3, &mY3);
852 NV10MVertex(pNv , dstX , dstY,sX0 , sY0 , mX0 , mY0);
853 NV10MVertex(pNv , dstX + width , dstY,sX1 , sY1 , mX1 , mY1);
854 NV10MVertex(pNv , dstX + width , dstY + height,sX2 , sY2 , mX2 , mY2);
855 NV10MVertex(pNv , dstX , dstY + height,sX3 , sY3 , mX3 , mY3);
857 NV10Vertex(pNv , dstX , dstY , sX0 , sY0);
858 NV10Vertex(pNv , dstX + width , dstY , sX1 , sY1);
859 NV10Vertex(pNv , dstX + width , dstY + height , sX2 , sY2);
860 NV10Vertex(pNv , dstX , dstY + height , sX3 , sY3);
864 void NV10DoneComposite (PixmapPtr pDst)
866 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
867 NVPtr pNv = NVPTR(pScrn);
868 struct nouveau_channel *chan = pNv->chan;
869 struct nouveau_grobj *celcius = pNv->Nv3D;
871 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
872 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
877 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
879 NVPtr pNv = NVPTR(pScrn);
880 struct nouveau_channel *chan = pNv->chan;
881 struct nouveau_grobj *celcius;
882 uint32_t class = 0, chipset;
885 chipset = (nvReadMC(pNv, NV_PMC_BOOT_0) >> 20) & 0xff;
886 if ( ((chipset & 0xf0) != NV_ARCH_10) &&
887 ((chipset & 0xf0) != NV_ARCH_20) )
892 else if (chipset>=0x17)
894 else if (chipset>=0x11)
900 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
905 BEGIN_RING(chan, celcius, NV10TCL_DMA_NOTIFY, 1);
906 OUT_RING (chan, chan->nullobj->handle);
908 BEGIN_RING(chan, celcius, NV10TCL_DMA_IN_MEMORY0, 2);
909 OUT_RING (chan, chan->vram->handle);
910 OUT_RING (chan, chan->gart->handle);
912 BEGIN_RING(chan, celcius, NV10TCL_DMA_IN_MEMORY2, 2);
913 OUT_RING (chan, chan->vram->handle);
914 OUT_RING (chan, chan->vram->handle);
916 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
919 BEGIN_RING(chan, celcius, NV10TCL_RT_HORIZ, 2);
923 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
924 OUT_RING (chan, (0x7ff<<16)|0x800);
925 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
926 OUT_RING (chan, (0x7ff<<16)|0x800);
929 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
931 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
935 BEGIN_RING(chan, celcius, 0x290, 1);
936 OUT_RING (chan, (0x10<<16)|1);
937 BEGIN_RING(chan, celcius, 0x3f4, 1);
940 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
943 if (class != NV10TCL) {
945 BEGIN_RING(chan, celcius, 0x120, 3);
950 BEGIN_RING(chan, pNv->NvImageBlit, 0x120, 3);
955 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
959 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
963 BEGIN_RING(chan, celcius, NV10TCL_FOG_ENABLE, 1);
965 BEGIN_RING(chan, celcius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
967 BEGIN_RING(chan, celcius, NV10TCL_ALPHA_FUNC_FUNC, 2);
968 OUT_RING (chan, 0x207);
970 BEGIN_RING(chan, celcius, NV10TCL_TX_ENABLE(0), 2);
973 BEGIN_RING(chan, celcius, NV10TCL_RC_OUT_ALPHA(0), 6);
974 OUT_RING (chan, 0x00000c00);
976 OUT_RING (chan, 0x00000c00);
977 OUT_RING (chan, 0x18000000);
978 OUT_RING (chan, 0x300c0000);
979 OUT_RING (chan, 0x00001c80);
980 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_ENABLE, 1);
982 BEGIN_RING(chan, celcius, NV10TCL_DITHER_ENABLE, 2);
985 BEGIN_RING(chan, celcius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
987 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
990 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_SRC, 4);
994 OUT_RING (chan, 0x8006);
995 BEGIN_RING(chan, celcius, NV10TCL_STENCIL_MASK, 8);
996 OUT_RING (chan, 0xff);
997 OUT_RING (chan, 0x207);
999 OUT_RING (chan, 0xff);
1000 OUT_RING (chan, 0x1e00);
1001 OUT_RING (chan, 0x1e00);
1002 OUT_RING (chan, 0x1e00);
1003 OUT_RING (chan, 0x1d01);
1004 BEGIN_RING(chan, celcius, NV10TCL_NORMALIZE_ENABLE, 1);
1006 BEGIN_RING(chan, celcius, NV10TCL_FOG_ENABLE, 2);
1009 BEGIN_RING(chan, celcius, NV10TCL_LIGHT_MODEL, 1);
1011 BEGIN_RING(chan, celcius, NV10TCL_COLOR_CONTROL, 1);
1013 BEGIN_RING(chan, celcius, NV10TCL_ENABLED_LIGHTS, 1);
1015 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
1019 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_FUNC, 1);
1020 OUT_RING (chan, 0x201);
1021 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
1023 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_TEST_ENABLE, 1);
1025 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
1028 BEGIN_RING(chan, celcius, NV10TCL_POINT_SIZE, 1);
1030 BEGIN_RING(chan, celcius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
1033 BEGIN_RING(chan, celcius, NV10TCL_LINE_WIDTH, 1);
1035 BEGIN_RING(chan, celcius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
1037 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_MODE_FRONT, 2);
1038 OUT_RING (chan, 0x1b02);
1039 OUT_RING (chan, 0x1b02);
1040 BEGIN_RING(chan, celcius, NV10TCL_CULL_FACE, 2);
1041 OUT_RING (chan, 0x405);
1042 OUT_RING (chan, 0x901);
1043 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
1045 BEGIN_RING(chan, celcius, NV10TCL_CULL_FACE_ENABLE, 1);
1047 BEGIN_RING(chan, celcius, NV10TCL_TX_GEN_S(0), 8);
1051 BEGIN_RING(chan, celcius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
1052 OUT_RING (chan, 0x3fc00000); /* -1.50 */
1053 OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */
1054 OUT_RING (chan, 0); /* 0.00 */
1056 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
1059 BEGIN_RING(chan, celcius, NV10TCL_FOG_MODE, 2);
1060 OUT_RING (chan, 0x802);
1062 /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
1063 * using texturing, except when using the texture matrix
1065 BEGIN_RING(chan, celcius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
1067 BEGIN_RING(chan, celcius, NV10TCL_COLOR_MASK, 1);
1068 OUT_RING (chan, 0x01010101);
1070 /* Set vertex component */
1071 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_COL_4F_R, 4);
1072 OUT_RINGf (chan, 1.0);
1073 OUT_RINGf (chan, 1.0);
1074 OUT_RINGf (chan, 1.0);
1075 OUT_RINGf (chan, 1.0);
1076 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_COL2_3F_R, 3);
1080 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_NOR_3F_X, 3);
1083 OUT_RINGf (chan, 1.0);
1084 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_4F_S, 4);
1085 OUT_RINGf (chan, 0.0);
1086 OUT_RINGf (chan, 0.0);
1087 OUT_RINGf (chan, 0.0);
1088 OUT_RINGf (chan, 1.0);
1089 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX1_4F_S, 4);
1090 OUT_RINGf (chan, 0.0);
1091 OUT_RINGf (chan, 0.0);
1092 OUT_RINGf (chan, 0.0);
1093 OUT_RINGf (chan, 1.0);
1094 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_FOG_1F, 1);
1095 OUT_RINGf (chan, 0.0);
1096 BEGIN_RING(chan, celcius, NV10TCL_EDGEFLAG_ENABLE, 1);