2 * Copyright 2007 Stephane Marchesin
3 * Copyright 2007 Arthur Huillet
4 * Copyright 2007 Peter Winters
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "nv_include.h"
31 typedef struct nv10_exa_state {
33 Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
35 PictTransformPtr transform;
40 static nv10_exa_state_t state;
42 static int NV10TexFormat(int ExaFormat)
44 struct {int exa;int hw;} tex_format[] =
46 {PICT_a8r8g8b8, 0x900},
47 {PICT_x8r8g8b8, 0x900},
48 {PICT_r5g6b5, 0x880}, /*this one was only tested with rendercheck*/
49 /*{PICT_a1r5g5b5, NV10TCL_TX_FORMAT_FORMAT_R5G5B5A1},
50 {PICT_a4r4g4b4, NV10TCL_TX_FORMAT_FORMAT_R4G4B4A4},*/
51 {PICT_a8, 0x980}, /*this is a NV1x only format, corresponding NV2x is 0xD80, we hack it in below*/
55 for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57 if(tex_format[i].exa==ExaFormat)
58 return tex_format[i].hw;
64 static int NV10DstFormat(int ExaFormat)
66 struct {int exa;int hw;} dst_format[] =
68 {PICT_a8r8g8b8, 0x108},
69 {PICT_x8r8g8b8, 0x108},
74 for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
76 if(dst_format[i].exa==ExaFormat)
77 return dst_format[i].hw;
83 static Bool NV10CheckTexture(PicturePtr Picture)
85 int w = Picture->pDrawable->width;
86 int h = Picture->pDrawable->height;
88 if ((w > 2046) || (h>2046))
90 if (!NV10TexFormat(Picture->format))
92 if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
94 if (Picture->componentAlpha)
96 /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
97 if (Picture->repeat != RepeatNone)
98 /* we can repeat 1x1 textures */
99 if (!(w == 1 && h == 1))
104 static Bool NV10CheckBuffer(PicturePtr Picture)
106 int w = Picture->pDrawable->width;
107 int h = Picture->pDrawable->height;
109 if ((w > 4096) || (h>4096))
111 if (Picture->componentAlpha)
113 if (!NV10DstFormat(Picture->format))
118 static Bool NV10CheckPictOp(int op)
120 if ( op == PictOpAtopReverse ) /*this op doesn't work*/
124 if ( op >= PictOpSaturate )
125 { /*we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear*/
131 /* Check if the current operation is a doable A8 + A8 */
132 /* A8 destination is a special case, because we do it by having the card think
133 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format,
134 and without transformation or funny things.*/
135 static Bool NV10Check_A8plusA8_Feasability(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)
137 #if X_BYTE_ORDER == X_BIG_ENDIAN
140 if ((!msk) && (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) &&
141 (op == PictOpAdd) && (src->repeat == RepeatNone))
149 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
151 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
154 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
155 PicturePtr pMaskPicture,
156 PicturePtr pDstPicture)
160 sprintf(out, "%s ", reason);
161 out = out + strlen(out);
165 sprintf(out, "PictOpClear ");
168 sprintf(out, "PictOpSrc ");
171 sprintf(out, "PictOpDst ");
174 sprintf(out, "PictOpOver ");
176 case PictOpOutReverse:
177 sprintf(out, "PictOpOutReverse ");
180 sprintf(out, "PictOpAdd ");
183 sprintf(out, "PictOp%d ", op);
185 out = out + strlen(out);
186 switch ( pSrcPicture->format )
189 sprintf(out, "A8R8G8B8 ");
192 sprintf(out, "X8R8G8B8 ");
195 sprintf(out, "X8B8G8R8 ");
198 sprintf(out, "R5G6B5 ");
207 sprintf(out, "%x ", pSrcPicture->format);
210 sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
211 if ( pSrcPicture->repeat != RepeatNone )
216 switch ( pDstPicture->format )
219 sprintf(out, "A8R8G8B8 ");
222 sprintf(out, "X8R8G8B8 ");
225 sprintf(out, "X8B8G8R8 ");
228 sprintf(out, "R5G6B5 ");
237 sprintf(out, "%x ", pDstPicture->format);
240 sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
241 if ( pDstPicture->repeat != RepeatNone )
245 sprintf(out, "& NONE");
247 switch ( pMaskPicture->format )
250 sprintf(out, "& A8R8G8B8 ");
253 sprintf(out, "& X8R8G8B8 ");
256 sprintf(out, "& X8B8G8R8 ");
259 sprintf(out, "& A8 ");
262 sprintf(out, "& A1 ");
265 sprintf(out, "& %x ", pMaskPicture->format);
268 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
269 if ( pMaskPicture->repeat != RepeatNone )
274 xf86DrvMsg(0, X_INFO, out2);
278 Bool NV10CheckComposite(int op,
279 PicturePtr pSrcPicture,
280 PicturePtr pMaskPicture,
281 PicturePtr pDstPicture)
284 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
286 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
290 if (!NV10CheckPictOp(op))
292 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
295 if (!NV10CheckBuffer(pDstPicture))
297 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
301 if (!NV10CheckTexture(pSrcPicture))
303 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
307 if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
309 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
313 NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
317 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
319 struct nouveau_channel *chan = pNv->chan;
320 struct nouveau_grobj *celcius = pNv->Nv3D;
321 int log2w = log2i(Pict->pDrawable->width);
322 int log2h = log2i(Pict->pDrawable->height);
325 (NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
326 (NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
329 (1<<12) | /* lod == 1 */
332 BEGIN_RING(chan, celcius, NV10TCL_TX_OFFSET(unit), 1 );
333 OUT_PIXMAPl(chan, pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
335 /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination,
336 in that case we change the format to use the POT (swizzled) matching format */
337 if (Pict->repeat != RepeatNone)
339 if (Pict->format == PICT_a8)
340 txfmt |= 0x80; /* A8 */
341 else if (Pict->format == PICT_r5g6b5 )
342 txfmt |= 0x280; /* R5G6B5 */
344 txfmt |= 0x300; /* ARGB format */
348 if (pNv->Architecture == NV_ARCH_20 && Pict->format == PICT_a8 )
350 else txfmt |= NV10TexFormat(Pict->format);
351 w = Pict->pDrawable->width;
352 /* NPOT_SIZE expects an even number for width, we can round up uneven
353 * numbers here because EXA always gives 64 byte aligned pixmaps
354 * and for all formats we support 64 bytes represents an even number
359 BEGIN_RING(chan, celcius, NV10TCL_TX_NPOT_PITCH(unit), 1);
360 OUT_RING (chan, exaGetPixmapPitch(pixmap) << 16);
362 BEGIN_RING(chan, celcius, NV10TCL_TX_NPOT_SIZE(unit), 1);
363 OUT_RING (chan, (w<<16) | Pict->pDrawable->height);
366 BEGIN_RING(chan, celcius, NV10TCL_TX_FORMAT(unit), 1 );
367 OUT_RING (chan, txfmt);
369 BEGIN_RING(chan, celcius, NV10TCL_TX_ENABLE(unit), 1 );
370 OUT_RING (chan, NV10TCL_TX_ENABLE_ENABLE);
372 BEGIN_RING(chan, celcius, NV10TCL_TX_FILTER(unit), 1);
373 if (Pict->filter == PictFilterNearest)
374 OUT_RING (chan, (NV10TCL_TX_FILTER_MAGNIFY_NEAREST) |
375 (NV10TCL_TX_FILTER_MINIFY_NEAREST));
377 OUT_RING (chan, (NV10TCL_TX_FILTER_MAGNIFY_LINEAR) |
378 (NV10TCL_TX_FILTER_MINIFY_LINEAR));
380 state.unit[unit].width = (float)pixmap->drawable.width;
381 state.unit[unit].height = (float)pixmap->drawable.height;
382 state.unit[unit].transform = Pict->transform;
385 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
387 struct nouveau_channel *chan = pNv->chan;
388 struct nouveau_grobj *celcius = pNv->Nv3D;
395 BEGIN_RING(chan, celcius, NV10TCL_RT_FORMAT, 4);
396 if ( state.is_a8_plus_a8 )
398 OUT_RING (chan, NV10DstFormat(PICT_a8r8g8b8));
401 OUT_RING (chan, NV10DstFormat(Pict->format));
404 OUT_RING (chan, ((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
405 OUT_PIXMAPl(chan, pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
408 BEGIN_RING(chan, celcius, NV10TCL_RT_HORIZ, 2);
409 OUT_RING (chan, (w<<16)|x);
410 OUT_RING (chan, (h<<16)|y);
411 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
413 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
414 OUT_RING (chan, ((w-1+x)<<16)|x|0x08000800);
415 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
416 OUT_RING (chan, ((h-1+y)<<16)|y|0x08000800);
418 BEGIN_RING(chan, celcius, NV10TCL_PROJECTION_MATRIX(0), 16);
421 OUT_RINGf (chan, 1.0f);
423 OUT_RINGf (chan, 0.0f);
425 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_RANGE_NEAR, 2);
428 OUT_RINGf (chan, 16777216.0);
430 OUT_RINGf (chan, 65536.0);
432 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_SCALE_X, 4);
433 OUT_RINGf (chan, -2048.0);
434 OUT_RINGf (chan, -2048.0);
439 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
441 struct nouveau_channel *chan = pNv->chan;
442 struct nouveau_grobj *celcius = pNv->Nv3D;
444 /*This can be a bit difficult to understand at first glance.
445 Reg combiners are described here:
446 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
448 Single texturing setup, without honoring vertex colors (non default setup) is:
449 Alpha RC 0 : a_0 * 1 + 0 * 0
450 RGB RC 0 : rgb_0 * 1 + 0 * 0
452 Final combiner uses default setup
454 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
456 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
457 Alpha RC 0 : a_0 * a_1 + 0 * 0
458 RGB RC0 : rgb_0 * a_1 + 0 * 0
460 Final combiner uses default setup
464 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
465 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
466 unsigned int color0 = 0, color1 = 0;
467 #define A_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
468 #define B_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
469 #define C_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
470 #define D_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
472 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
473 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
474 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
475 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
477 #define A_RGB_ZERO (NV10TCL_RC_IN_RGB_A_INPUT_ZERO | NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
478 #define B_RGB_ZERO (NV10TCL_RC_IN_RGB_B_INPUT_ZERO | NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
479 #define C_RGB_ZERO (NV10TCL_RC_IN_RGB_C_INPUT_ZERO | NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
480 #define D_RGB_ZERO (NV10TCL_RC_IN_RGB_D_INPUT_ZERO | NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
482 #define A_RGB_ONE (A_RGB_ZERO | NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
483 #define B_RGB_ONE (B_RGB_ZERO | NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
484 #define C_RGB_ONE (C_RGB_ZERO | NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
485 #define D_RGB_ONE (D_RGB_ZERO | NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
487 rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
488 if (src->format == PICT_x8r8g8b8)
489 rc0_in_alpha |= A_ALPHA_ONE;
491 rc0_in_alpha |= 0x18000000;
494 rc0_in_alpha |= B_ALPHA_ONE;
496 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
497 rc0_in_alpha |= B_ALPHA_ONE;
499 rc0_in_alpha |= 0x00190000; /*B = a_1*/
501 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
502 if (src->format == PICT_a8 )
503 rc0_in_rgb |= A_RGB_ZERO;
505 rc0_in_rgb |= 0x08000000; /*A = rgb_0*/
508 rc0_in_rgb |= B_RGB_ONE;
510 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
511 rc0_in_rgb |= B_RGB_ONE;
513 rc0_in_rgb |= 0x00190000; /*B = a_1*/
515 BEGIN_RING(chan, celcius, NV10TCL_RC_IN_ALPHA(0), 6);
516 OUT_RING (chan, rc0_in_alpha);
517 OUT_RING (chan, rc1_in_alpha);
518 OUT_RING (chan, rc0_in_rgb);
519 OUT_RING (chan, rc1_in_rgb);
520 OUT_RING (chan, color0); /*COLOR 0*/
521 OUT_RING (chan, color1); /*COLOR 1*/
524 static void NV10SetRegCombs_A8plusA8(NVPtr pNv, int pass, int mask_out_bytes)
526 struct nouveau_channel *chan = pNv->chan;
527 struct nouveau_grobj *celcius = pNv->Nv3D;
528 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
529 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
530 unsigned int color0 = 0, color1 = 0;
534 if ( mask_out_bytes & 1 )
535 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
536 else rc0_in_alpha = 0x19000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO;
538 rc0_in_rgb = C_RGB_ZERO | D_RGB_ZERO;
540 if ( mask_out_bytes & 2 )
541 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
542 else rc0_in_rgb |= 0x18000000 | 0x00010000;
544 color0 = 0x00ff0000; /*R = 1 G = 0 B = 0*/
547 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
553 if ( mask_out_bytes & 8 )
554 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
555 else rc0_in_rgb |= 0x18000000 | 0x00010000; /*A = a_0, B= cst color 0*/
559 if ( mask_out_bytes & 4)
560 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
561 else rc0_in_rgb |= 0x1900 | 0x02; /*C = a_1, D = cst color 1*/
563 color1 = 0x0000ff00; /*R = 0, G = 1, B = 0*/
566 BEGIN_RING(chan, celcius, NV10TCL_RC_IN_ALPHA(0), 6);
567 OUT_RING (chan, rc0_in_alpha);
568 OUT_RING (chan, rc1_in_alpha);
569 OUT_RING (chan, rc0_in_rgb);
570 OUT_RING (chan, rc1_in_rgb);
571 OUT_RING (chan, color0); /*COLOR 0*/
572 OUT_RING (chan, color1); /*COLOR 1*/
575 static void NV10SetPictOp(NVPtr pNv,int op)
577 struct nouveau_channel *chan = pNv->chan;
578 struct nouveau_grobj *celcius = pNv->Nv3D;
579 struct {int src;int dst;} pictops[] =
581 {0x0000,0x0000}, /* PictOpClear */
582 {0x0001,0x0000}, /* PictOpSrc */
583 {0x0000,0x0001}, /* PictOpDst */
584 {0x0001,0x0303}, /* PictOpOver */
585 {0x0305,0x0001}, /* PictOpOverReverse */
586 {0x0304,0x0000}, /* PictOpIn */
587 {0x0000,0x0302}, /* PictOpInReverse */
588 {0x0305,0x0000}, /* PictOpOut */
589 {0x0000,0x0303}, /* PictOpOutReverse */
590 {0x0304,0x0303}, /* PictOpAtop */
591 {0x0305,0x0302}, /* PictOpAtopReverse - DOES NOT WORK*/
592 {0x0305,0x0303}, /* PictOpXor */
593 {0x0001,0x0001}, /* PictOpAdd */
596 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_SRC, 2);
597 OUT_RING (chan, pictops[op].src);
598 OUT_RING (chan, pictops[op].dst);
599 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_ENABLE, 1);
603 Bool NV10PrepareComposite(int op,
604 PicturePtr pSrcPicture,
605 PicturePtr pMaskPicture,
606 PicturePtr pDstPicture,
611 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
612 NVPtr pNv = NVPTR(pScrn);
613 struct nouveau_channel *chan = pNv->chan;
614 struct nouveau_grobj *celcius = pNv->Nv3D;
616 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
618 state.have_mask = FALSE;
619 state.is_a8_plus_a8 = TRUE;
620 NV10SetBuffer(pNv,pDstPicture,pDst);
621 NV10SetPictOp(pNv, op);
622 NV10SetTexture(pNv, 0, pSrcPicture, pSrc);
623 NV10SetTexture(pNv, 1, pSrcPicture, pSrc);
627 state.is_a8_plus_a8 = FALSE;
630 NV10SetBuffer(pNv,pDstPicture,pDst);
633 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
635 /* Set mask format */
637 NV10SetTexture(pNv,1,pMaskPicture,pMask);
639 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
642 NV10SetPictOp(pNv, op);
644 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
645 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
647 state.have_mask=(pMaskPicture!=NULL);
651 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
653 struct nouveau_channel *chan = pNv->chan;
654 struct nouveau_grobj *celcius = pNv->Nv3D;
656 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_2F_S, 2);
657 OUT_RINGf (chan, tx);
658 OUT_RINGf (chan, ty);
659 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_POS_3F_X, 3);
660 OUT_RINGf (chan, vx);
661 OUT_RINGf (chan, vy);
662 OUT_RINGf (chan, 0.f);
665 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
667 struct nouveau_channel *chan = pNv->chan;
668 struct nouveau_grobj *celcius = pNv->Nv3D;
670 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_2F_S, 2);
671 OUT_RINGf (chan, t0x);
672 OUT_RINGf (chan, t0y);
673 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX1_2F_S, 2);
674 OUT_RINGf (chan, t1x);
675 OUT_RINGf (chan, t1y);
676 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_POS_3F_X, 3);
677 OUT_RINGf (chan, vx);
678 OUT_RINGf (chan, vy);
679 OUT_RINGf (chan, 0.f);
682 #define xFixedToFloat(v) \
683 ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
686 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
687 float *x_ret, float *y_ret)
692 v.vector[0] = IntToxFixed(x);
693 v.vector[1] = IntToxFixed(y);
694 v.vector[2] = xFixed1;
695 PictureTransformPoint(t, &v);
696 *x_ret = xFixedToFloat(v.vector[0]);
697 *y_ret = xFixedToFloat(v.vector[1]);
705 void NV10Composite(PixmapPtr pDst,
715 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
716 NVPtr pNv = NVPTR(pScrn);
717 struct nouveau_channel *chan = pNv->chan;
718 struct nouveau_grobj *celcius = pNv->Nv3D;
719 float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
720 float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
722 NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
724 state.unit[0].height, &sX0, &sY0);
725 NV10EXATransformCoord(state.unit[0].transform,
728 state.unit[0].height, &sX1, &sY1);
729 NV10EXATransformCoord(state.unit[0].transform,
730 srcX + width, srcY + height,
732 state.unit[0].height, &sX2, &sY2);
733 NV10EXATransformCoord(state.unit[0].transform,
736 state.unit[0].height, &sX3, &sY3);
738 if ( state.is_a8_plus_a8 )
740 /*We do A8 + A8 in 2-pass : setup the source texture as A8 twice,
741 with different tex coords, do B and G on first pass
742 Then setup again and do R and A on second pass
746 int mask_out_bytes = 0;
748 part_pos_dX = (dstX &~ 3) >> 2; /*we start at the 4byte boundary to the left of the image*/
749 part_pos_sX = sX0 + (dstX &~ 3) - dstX;
751 /*xf86DrvMsg(0, X_INFO, "drawing - srcX %f dstX %d w %d\n", sX0, dstX, width);*/
752 for ( ; part_pos_dX <= (((dstX + width) &~ 3) >> 2); part_pos_sX += 4, part_pos_dX ++ )
755 if ( part_pos_dX == (dstX &~ 3) >> 2 ) /*then we're slightly on the left of the image, bytes to mask out*/
757 /*xf86DrvMsg(0, X_INFO, "on left border...\n");*/
758 switch ( dstX - (dstX &~ 3) ) /*mask out the extra pixels on the left*/
761 mask_out_bytes |= 1 << 0;
763 mask_out_bytes |= 1 << 1;
765 mask_out_bytes |= 1 << 2;
767 mask_out_bytes |= 1 << 3;
772 /*mask out extra pixels on the right, in case the picture never touches an alignment marker*/
773 switch ( width + (dstX & 3) )
776 mask_out_bytes |= 1 << 3;
778 mask_out_bytes |= 1 << 2;
780 mask_out_bytes |= 1 << 1;
782 mask_out_bytes |= 1 << 0;
786 else if ( part_pos_dX == (((dstX + width) &~ 3) >> 2) )
788 /*xf86DrvMsg(0, X_INFO, "on right border...\n");*/
789 switch (4 - ((dstX + width) & 3))
792 mask_out_bytes |= 1 << 3;
794 mask_out_bytes |= 1 << 2;
796 mask_out_bytes |= 1 << 1;
798 mask_out_bytes |= 1 << 0;
806 NV10SetRegCombs_A8plusA8(pNv, 0, mask_out_bytes);
807 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
808 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
810 NV10MVertex(pNv , part_pos_dX , dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
811 NV10MVertex(pNv , part_pos_dX + 1, dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
812 NV10MVertex(pNv , part_pos_dX + 1, dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
813 NV10MVertex(pNv , part_pos_dX , dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
815 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
816 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
820 NV10SetRegCombs_A8plusA8(pNv, 1, mask_out_bytes);
821 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
822 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_QUADS);
824 NV10MVertex(pNv , part_pos_dX, dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
825 NV10MVertex(pNv , part_pos_dX + 1 , dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
826 NV10MVertex(pNv , part_pos_dX + 1 , dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
827 NV10MVertex(pNv , part_pos_dX, dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
829 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
830 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
834 else if (state.have_mask) {
835 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
837 state.unit[1].height, &mX0, &mY0);
838 NV10EXATransformCoord(state.unit[1].transform,
839 maskX + width, maskY,
841 state.unit[1].height, &mX1, &mY1);
842 NV10EXATransformCoord(state.unit[1].transform,
843 maskX + width, maskY + height,
845 state.unit[1].height, &mX2, &mY2);
846 NV10EXATransformCoord(state.unit[1].transform,
847 maskX, maskY + height,
849 state.unit[1].height, &mX3, &mY3);
850 NV10MVertex(pNv , dstX , dstY,sX0 , sY0 , mX0 , mY0);
851 NV10MVertex(pNv , dstX + width , dstY,sX1 , sY1 , mX1 , mY1);
852 NV10MVertex(pNv , dstX + width , dstY + height,sX2 , sY2 , mX2 , mY2);
853 NV10MVertex(pNv , dstX , dstY + height,sX3 , sY3 , mX3 , mY3);
855 NV10Vertex(pNv , dstX , dstY , sX0 , sY0);
856 NV10Vertex(pNv , dstX + width , dstY , sX1 , sY1);
857 NV10Vertex(pNv , dstX + width , dstY + height , sX2 , sY2);
858 NV10Vertex(pNv , dstX , dstY + height , sX3 , sY3);
862 void NV10DoneComposite (PixmapPtr pDst)
864 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
865 NVPtr pNv = NVPTR(pScrn);
866 struct nouveau_channel *chan = pNv->chan;
867 struct nouveau_grobj *celcius = pNv->Nv3D;
869 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_BEGIN_END, 1);
870 OUT_RING (chan, NV10TCL_VERTEX_BEGIN_END_STOP);
877 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
879 NVPtr pNv = NVPTR(pScrn);
880 struct nouveau_channel *chan = pNv->chan;
881 struct nouveau_grobj *celcius;
882 uint32_t class = 0, chipset;
885 chipset = (nvReadMC(pNv, NV_PMC_BOOT_0) >> 20) & 0xff;
886 if ( ((chipset & 0xf0) != NV_ARCH_10) &&
887 ((chipset & 0xf0) != NV_ARCH_20) )
892 else if (chipset>=0x17)
894 else if (chipset>=0x11)
900 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
905 BEGIN_RING(chan, celcius, NV10TCL_DMA_NOTIFY, 1);
906 OUT_RING (chan, pNv->NvNull->handle);
908 BEGIN_RING(chan, celcius, NV10TCL_DMA_IN_MEMORY0, 2);
909 OUT_RING (chan, pNv->chan->vram->handle);
910 OUT_RING (chan, pNv->chan->gart->handle);
912 BEGIN_RING(chan, celcius, NV10TCL_DMA_IN_MEMORY2, 2);
913 OUT_RING (chan, pNv->chan->vram->handle);
914 OUT_RING (chan, pNv->chan->vram->handle);
916 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
919 BEGIN_RING(chan, celcius, NV10TCL_RT_HORIZ, 2);
923 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
924 OUT_RING (chan, (0x7ff<<16)|0x800);
925 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
926 OUT_RING (chan, (0x7ff<<16)|0x800);
929 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
931 BEGIN_RING(chan, celcius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
935 BEGIN_RING(chan, celcius, 0x290, 1);
936 OUT_RING (chan, (0x10<<16)|1);
937 BEGIN_RING(chan, celcius, 0x3f4, 1);
940 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
943 if (class != NV10TCL) {
945 BEGIN_RING(chan, celcius, 0x120, 3);
950 BEGIN_RING(chan, pNv->NvImageBlit, 0x120, 3);
955 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
959 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
963 BEGIN_RING(chan, celcius, NV10TCL_FOG_ENABLE, 1);
965 BEGIN_RING(chan, celcius, NV10TCL_ALPHA_FUNC_ENABLE, 1);
967 BEGIN_RING(chan, celcius, NV10TCL_ALPHA_FUNC_FUNC, 2);
968 OUT_RING (chan, 0x207);
970 BEGIN_RING(chan, celcius, NV10TCL_TX_ENABLE(0), 2);
973 BEGIN_RING(chan, celcius, NV10TCL_RC_OUT_ALPHA(0), 6);
974 OUT_RING (chan, 0x00000c00);
976 OUT_RING (chan, 0x00000c00);
977 OUT_RING (chan, 0x18000000);
978 OUT_RING (chan, 0x300c0000);
979 OUT_RING (chan, 0x00001c80);
980 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_ENABLE, 1);
982 BEGIN_RING(chan, celcius, NV10TCL_DITHER_ENABLE, 2);
985 BEGIN_RING(chan, celcius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
987 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
990 BEGIN_RING(chan, celcius, NV10TCL_BLEND_FUNC_SRC, 4);
994 OUT_RING (chan, 0x8006);
995 BEGIN_RING(chan, celcius, NV10TCL_STENCIL_MASK, 8);
996 OUT_RING (chan, 0xff);
997 OUT_RING (chan, 0x207);
999 OUT_RING (chan, 0xff);
1000 OUT_RING (chan, 0x1e00);
1001 OUT_RING (chan, 0x1e00);
1002 OUT_RING (chan, 0x1e00);
1003 OUT_RING (chan, 0x1d01);
1004 BEGIN_RING(chan, celcius, NV10TCL_NORMALIZE_ENABLE, 1);
1006 BEGIN_RING(chan, celcius, NV10TCL_FOG_ENABLE, 2);
1009 BEGIN_RING(chan, celcius, NV10TCL_LIGHT_MODEL, 1);
1011 BEGIN_RING(chan, celcius, NV10TCL_COLOR_CONTROL, 1);
1013 BEGIN_RING(chan, celcius, NV10TCL_ENABLED_LIGHTS, 1);
1015 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
1019 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_FUNC, 1);
1020 OUT_RING (chan, 0x201);
1021 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_WRITE_ENABLE, 1);
1023 BEGIN_RING(chan, celcius, NV10TCL_DEPTH_TEST_ENABLE, 1);
1025 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
1028 BEGIN_RING(chan, celcius, NV10TCL_POINT_SIZE, 1);
1030 BEGIN_RING(chan, celcius, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
1033 BEGIN_RING(chan, celcius, NV10TCL_LINE_WIDTH, 1);
1035 BEGIN_RING(chan, celcius, NV10TCL_LINE_SMOOTH_ENABLE, 1);
1037 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_MODE_FRONT, 2);
1038 OUT_RING (chan, 0x1b02);
1039 OUT_RING (chan, 0x1b02);
1040 BEGIN_RING(chan, celcius, NV10TCL_CULL_FACE, 2);
1041 OUT_RING (chan, 0x405);
1042 OUT_RING (chan, 0x901);
1043 BEGIN_RING(chan, celcius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
1045 BEGIN_RING(chan, celcius, NV10TCL_CULL_FACE_ENABLE, 1);
1047 BEGIN_RING(chan, celcius, NV10TCL_CLIP_PLANE_ENABLE(0), 8);
1051 BEGIN_RING(chan, celcius, NV10TCL_FOG_EQUATION_CONSTANT, 3);
1052 OUT_RING (chan, 0x3fc00000); /* -1.50 */
1053 OUT_RING (chan, 0xbdb8aa0a); /* -0.09 */
1054 OUT_RING (chan, 0); /* 0.00 */
1056 BEGIN_RING(chan, celcius, NV10TCL_NOP, 1);
1059 BEGIN_RING(chan, celcius, NV10TCL_FOG_MODE, 2);
1060 OUT_RING (chan, 0x802);
1062 /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
1063 * using texturing, except when using the texture matrix
1065 BEGIN_RING(chan, celcius, NV10TCL_VIEW_MATRIX_ENABLE, 1);
1067 BEGIN_RING(chan, celcius, NV10TCL_COLOR_MASK, 1);
1068 OUT_RING (chan, 0x01010101);
1070 /* Set vertex component */
1071 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_COL_4F_R, 4);
1072 OUT_RINGf (chan, 1.0);
1073 OUT_RINGf (chan, 1.0);
1074 OUT_RINGf (chan, 1.0);
1075 OUT_RINGf (chan, 1.0);
1076 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_COL2_3F_R, 3);
1080 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_NOR_3F_X, 3);
1083 OUT_RINGf (chan, 1.0);
1084 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX0_4F_S, 4);
1085 OUT_RINGf (chan, 0.0);
1086 OUT_RINGf (chan, 0.0);
1087 OUT_RINGf (chan, 0.0);
1088 OUT_RINGf (chan, 1.0);
1089 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_TX1_4F_S, 4);
1090 OUT_RINGf (chan, 0.0);
1091 OUT_RINGf (chan, 0.0);
1092 OUT_RINGf (chan, 0.0);
1093 OUT_RINGf (chan, 1.0);
1094 BEGIN_RING(chan, celcius, NV10TCL_VERTEX_FOG_1F, 1);
1095 OUT_RINGf (chan, 0.0);
1096 BEGIN_RING(chan, celcius, NV10TCL_EDGEFLAG_ENABLE, 1);