2 * Copyright 2007 Stephane Marchesin
3 * Copyright 2007 Arthur Huillet
4 * Copyright 2007 Peter Winters
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "nv_include.h"
31 typedef struct nv10_exa_state {
33 Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
35 PictTransformPtr transform;
40 static nv10_exa_state_t state;
42 static int NV10TexFormat(int ExaFormat)
44 struct {int exa;int hw;} tex_format[] =
46 {PICT_a8r8g8b8, 0x900},
47 {PICT_x8r8g8b8, 0x900},
48 {PICT_r5g6b5, 0x880}, /*this one was only tested with rendercheck*/
49 /*{PICT_a1r5g5b5, NV10_TCL_PRIMITIVE_3D_TX_FORMAT_FORMAT_R5G5B5A1},
50 {PICT_a4r4g4b4, NV10_TCL_PRIMITIVE_3D_TX_FORMAT_FORMAT_R4G4B4A4},*/
51 {PICT_a8, 0x980}, /*this is a NV1x only format, corresponding NV2x is 0xD80, we hack it in below*/
55 for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57 if(tex_format[i].exa==ExaFormat)
58 return tex_format[i].hw;
64 static int NV10DstFormat(int ExaFormat)
66 struct {int exa;int hw;} dst_format[] =
68 {PICT_a8r8g8b8, 0x108},
69 {PICT_x8r8g8b8, 0x108},
74 for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
76 if(dst_format[i].exa==ExaFormat)
77 return dst_format[i].hw;
83 static Bool NV10CheckTexture(PicturePtr Picture)
85 int w = Picture->pDrawable->width;
86 int h = Picture->pDrawable->height;
88 if ((w > 2046) || (h>2046))
90 if (!NV10TexFormat(Picture->format))
92 if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
94 if (Picture->componentAlpha)
96 /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
97 if (Picture->repeat != RepeatNone)
98 /* we can repeat 1x1 textures */
99 if (!(w == 1 && h == 1))
104 static Bool NV10CheckBuffer(PicturePtr Picture)
106 int w = Picture->pDrawable->width;
107 int h = Picture->pDrawable->height;
109 if ((w > 4096) || (h>4096))
111 if (Picture->componentAlpha)
113 if (!NV10DstFormat(Picture->format))
118 static Bool NV10CheckPictOp(int op)
120 if ( op == PictOpAtopReverse ) /*this op doesn't work*/
124 if ( op >= PictOpSaturate )
125 { /*we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear*/
131 /* Check if the current operation is a doable A8 + A8 */
132 /* A8 destination is a special case, because we do it by having the card think
133 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format,
134 and without transformation or funny things.*/
135 static Bool NV10Check_A8plusA8_Feasability(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)
137 if ((!msk) && (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) &&
138 (op == PictOpAdd) && (src->repeat == RepeatNone))
146 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
148 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
151 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
152 PicturePtr pMaskPicture,
153 PicturePtr pDstPicture)
157 sprintf(out, "%s ", reason);
158 out = out + strlen(out);
162 sprintf(out, "PictOpClear ");
165 sprintf(out, "PictOpSrc ");
168 sprintf(out, "PictOpDst ");
171 sprintf(out, "PictOpOver ");
173 case PictOpOutReverse:
174 sprintf(out, "PictOpOutReverse ");
177 sprintf(out, "PictOpAdd ");
180 sprintf(out, "PictOp%d ", op);
182 out = out + strlen(out);
183 switch ( pSrcPicture->format )
186 sprintf(out, "A8R8G8B8 ");
189 sprintf(out, "X8R8G8B8 ");
192 sprintf(out, "X8B8G8R8 ");
195 sprintf(out, "R5G6B5 ");
204 sprintf(out, "%x ", pSrcPicture->format);
207 sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
208 if ( pSrcPicture->repeat != RepeatNone )
213 switch ( pDstPicture->format )
216 sprintf(out, "A8R8G8B8 ");
219 sprintf(out, "X8R8G8B8 ");
222 sprintf(out, "X8B8G8R8 ");
225 sprintf(out, "R5G6B5 ");
234 sprintf(out, "%x ", pDstPicture->format);
237 sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
238 if ( pDstPicture->repeat != RepeatNone )
242 sprintf(out, "& NONE");
244 switch ( pMaskPicture->format )
247 sprintf(out, "& A8R8G8B8 ");
250 sprintf(out, "& X8R8G8B8 ");
253 sprintf(out, "& X8B8G8R8 ");
256 sprintf(out, "& A8 ");
259 sprintf(out, "& A1 ");
262 sprintf(out, "& %x ", pMaskPicture->format);
265 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
266 if ( pMaskPicture->repeat != RepeatNone )
271 xf86DrvMsg(0, X_INFO, out2);
275 Bool NV10CheckComposite(int op,
276 PicturePtr pSrcPicture,
277 PicturePtr pMaskPicture,
278 PicturePtr pDstPicture)
281 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
283 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
287 if (!NV10CheckPictOp(op))
289 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
292 if (!NV10CheckBuffer(pDstPicture))
294 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
298 if (!NV10CheckTexture(pSrcPicture))
300 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
304 if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
306 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
310 NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
314 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
316 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_OFFSET(unit), 1 );
317 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
318 int log2w = log2i(Pict->pDrawable->width);
319 int log2h = log2i(Pict->pDrawable->height);
322 (NV10_TCL_PRIMITIVE_3D_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
323 (NV10_TCL_PRIMITIVE_3D_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
326 (1<<12) | /* lod == 1 */
329 /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination,
330 in that case we change the format to use the POT (swizzled) matching format */
331 if (Pict->repeat != RepeatNone)
333 if (Pict->format == PICT_a8)
334 txfmt |= 0x80; /* A8 */
335 else if (Pict->format == PICT_r5g6b5 )
336 txfmt |= 0x280; /* R5G6B5 */
338 txfmt |= 0x300; /* ARGB format */
342 if (pNv->Architecture == NV_ARCH_20 && Pict->format == PICT_a8 )
344 else txfmt |= NV10TexFormat(Pict->format);
345 w = Pict->pDrawable->width;
346 /* NPOT_SIZE expects an even number for width, we can round up uneven
347 * numbers here because EXA always gives 64 byte aligned pixmaps
348 * and for all formats we support 64 bytes represents an even number
353 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_NPOT_PITCH(unit), 1);
354 OUT_RING (exaGetPixmapPitch(pixmap) << 16);
356 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_NPOT_SIZE(unit), 1);
357 OUT_RING ((w<<16) | Pict->pDrawable->height);
360 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_FORMAT(unit), 1 );
363 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_ENABLE(unit), 1 );
364 OUT_RING (NV10_TCL_PRIMITIVE_3D_TX_ENABLE_ENABLE);
366 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_FILTER(unit), 1);
367 if (Pict->filter == PictFilterNearest)
368 OUT_RING ((NV10_TCL_PRIMITIVE_3D_TX_FILTER_MAGNIFY_NEAREST) |
369 (NV10_TCL_PRIMITIVE_3D_TX_FILTER_MINIFY_NEAREST));
371 OUT_RING ((NV10_TCL_PRIMITIVE_3D_TX_FILTER_MAGNIFY_LINEAR) |
372 (NV10_TCL_PRIMITIVE_3D_TX_FILTER_MINIFY_LINEAR));
374 state.unit[unit].width = (float)pixmap->drawable.width;
375 state.unit[unit].height = (float)pixmap->drawable.height;
376 state.unit[unit].transform = Pict->transform;
379 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
387 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BUFFER_FORMAT, 4);
388 if ( state.is_a8_plus_a8 )
390 OUT_RING (NV10DstFormat(PICT_a8r8g8b8));
393 OUT_RING (NV10DstFormat(Pict->format));
396 OUT_RING (((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
397 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
400 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
401 OUT_RING ((w<<16)|x);
402 OUT_RING ((h<<16)|y);
403 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
405 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 1);
406 OUT_RING (((w-1+x)<<16)|x|0x08000800);
407 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(0), 1);
408 OUT_RING (((h-1+y)<<16)|y|0x08000800);
410 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_PROJECTION_MATRIX(0), 16);
417 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_RANGE_NEAR, 2);
420 OUT_RINGf (16777216.0);
424 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_SCALE_X, 4);
431 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
433 /*This can be a bit difficult to understand at first glance.
434 Reg combiners are described here:
435 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
437 Single texturing setup, without honoring vertex colors (non default setup) is:
438 Alpha RC 0 : a_0 * 1 + 0 * 0
439 RGB RC 0 : rgb_0 * 1 + 0 * 0
441 Final combiner uses default setup
443 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
445 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
446 Alpha RC 0 : a_0 * a_1 + 0 * 0
447 RGB RC0 : rgb_0 * a_1 + 0 * 0
449 Final combiner uses default setup
453 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
454 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
455 unsigned int color0 = 0, color1 = 0;
456 #define A_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
457 #define B_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
458 #define C_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
459 #define D_ALPHA_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
461 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
462 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
463 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
464 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
466 #define A_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
467 #define B_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
468 #define C_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
469 #define D_RGB_ZERO (NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_INPUT_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
471 #define A_RGB_ONE (A_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
472 #define B_RGB_ONE (B_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
473 #define C_RGB_ONE (C_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
474 #define D_RGB_ONE (D_RGB_ZERO | NV10_TCL_PRIMITIVE_3D_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
476 rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
477 if (src->format == PICT_x8r8g8b8)
478 rc0_in_alpha |= A_ALPHA_ONE;
480 rc0_in_alpha |= 0x18000000;
483 rc0_in_alpha |= B_ALPHA_ONE;
485 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
486 rc0_in_alpha |= B_ALPHA_ONE;
488 rc0_in_alpha |= 0x00190000; /*B = a_1*/
490 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
491 if (src->format == PICT_a8 )
492 rc0_in_rgb |= A_RGB_ZERO;
494 rc0_in_rgb |= 0x08000000; /*A = rgb_0*/
497 rc0_in_rgb |= B_RGB_ONE;
499 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
500 rc0_in_rgb |= B_RGB_ONE;
502 rc0_in_rgb |= 0x00190000; /*B = a_1*/
504 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA(0), 12);
505 OUT_RING(rc0_in_alpha);
506 OUT_RING (rc1_in_alpha);
507 OUT_RING (rc0_in_rgb);
508 OUT_RING (rc1_in_rgb);
509 OUT_RING (color0); /*COLOR 0*/
510 OUT_RING (color1); /*COLOR 1*/
511 OUT_RING (0x00000c00);
513 OUT_RING (0x000010cd);
514 OUT_RING (0x18000000);
515 OUT_RING (0x300e0300);
516 OUT_RING (0x0c091c80);
519 static void NV10SetRegCombs_A8plusA8(NVPtr pNv, int pass, int mask_out_bytes)
521 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
522 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
523 unsigned int color0 = 0, color1 = 0;
527 if ( mask_out_bytes & 1 )
528 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
529 else rc0_in_alpha = 0x19000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO;
531 rc0_in_rgb = C_RGB_ZERO | D_RGB_ZERO;
533 if ( mask_out_bytes & 2 )
534 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
535 else rc0_in_rgb |= 0x18000000 | 0x00010000;
537 color0 = 0x00ff0000; /*R = 1 G = 0 B = 0*/
540 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
546 if ( mask_out_bytes & 8 )
547 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
548 else rc0_in_rgb |= 0x18000000 | 0x00010000; /*A = a_0, B= cst color 0*/
552 if ( mask_out_bytes & 4)
553 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
554 else rc0_in_rgb |= 0x1900 | 0x02; /*C = a_1, D = cst color 1*/
556 color1 = 0x0000ff00; /*R = 0, G = 1, B = 0*/
559 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA(0), 12);
560 OUT_RING(rc0_in_alpha);
561 OUT_RING (rc1_in_alpha);
562 OUT_RING (rc0_in_rgb);
563 OUT_RING (rc1_in_rgb);
564 OUT_RING (color0); /*COLOR 0*/
565 OUT_RING (color1); /*COLOR 1*/
566 OUT_RING (0x00000c00);
568 OUT_RING (0x00000c00);
569 OUT_RING (0x18000000);
570 OUT_RING (0x300c0000);
571 OUT_RING (0x00001c80);
574 static void NV10SetPictOp(NVPtr pNv,int op)
576 struct {int src;int dst;} pictops[] =
578 {0x0000,0x0000}, /* PictOpClear */
579 {0x0001,0x0000}, /* PictOpSrc */
580 {0x0000,0x0001}, /* PictOpDst */
581 {0x0001,0x0303}, /* PictOpOver */
582 {0x0305,0x0001}, /* PictOpOverReverse */
583 {0x0304,0x0000}, /* PictOpIn */
584 {0x0000,0x0302}, /* PictOpInReverse */
585 {0x0305,0x0000}, /* PictOpOut */
586 {0x0000,0x0303}, /* PictOpOutReverse */
587 {0x0304,0x0303}, /* PictOpAtop */
588 {0x0305,0x0302}, /* PictOpAtopReverse - DOES NOT WORK*/
589 {0x0305,0x0303}, /* PictOpXor */
590 {0x0001,0x0001}, /* PictOpAdd */
593 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_SRC, 2);
594 OUT_RING (pictops[op].src);
595 OUT_RING (pictops[op].dst);
596 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
600 Bool NV10PrepareComposite(int op,
601 PicturePtr pSrcPicture,
602 PicturePtr pMaskPicture,
603 PicturePtr pDstPicture,
608 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
609 NVPtr pNv = NVPTR(pScrn);
611 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
613 state.have_mask = FALSE;
614 state.is_a8_plus_a8 = TRUE;
615 NV10SetBuffer(pNv,pDstPicture,pDst);
616 NV10SetPictOp(pNv, op);
617 NV10SetTexture(pNv, 0, pSrcPicture, pSrc);
618 NV10SetTexture(pNv, 1, pSrcPicture, pSrc);
622 state.is_a8_plus_a8 = FALSE;
625 NV10SetBuffer(pNv,pDstPicture,pDst);
628 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
630 /* Set mask format */
632 NV10SetTexture(pNv,1,pMaskPicture,pMask);
634 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
637 NV10SetPictOp(pNv, op);
639 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
640 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_QUADS);
642 state.have_mask=(pMaskPicture!=NULL);
646 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
648 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_2F_S, 2);
651 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_POS_3F_X, 3);
657 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
659 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_2F_S, 2);
662 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX1_2F_S, 2);
665 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_POS_3F_X, 3);
671 #define xFixedToFloat(v) \
672 ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
675 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
676 float *x_ret, float *y_ret)
681 v.vector[0] = IntToxFixed(x);
682 v.vector[1] = IntToxFixed(y);
683 v.vector[2] = xFixed1;
684 PictureTransformPoint(t, &v);
685 *x_ret = xFixedToFloat(v.vector[0]);
686 *y_ret = xFixedToFloat(v.vector[1]);
694 void NV10Composite(PixmapPtr pDst,
704 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
705 NVPtr pNv = NVPTR(pScrn);
706 float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
707 float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
709 NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
711 state.unit[0].height, &sX0, &sY0);
712 NV10EXATransformCoord(state.unit[0].transform,
715 state.unit[0].height, &sX1, &sY1);
716 NV10EXATransformCoord(state.unit[0].transform,
717 srcX + width, srcY + height,
719 state.unit[0].height, &sX2, &sY2);
720 NV10EXATransformCoord(state.unit[0].transform,
723 state.unit[0].height, &sX3, &sY3);
725 if ( state.is_a8_plus_a8 )
727 /*We do A8 + A8 in 2-pass : setup the source texture as A8 twice,
728 with different tex coords, do B and G on first pass
729 Then setup again and do R and A on second pass
733 int mask_out_bytes = 0;
735 part_pos_dX = (dstX &~ 3) >> 2; /*we start at the 4byte boundary to the left of the image*/
736 part_pos_sX = sX0 + (dstX &~ 3) - dstX;
738 /*xf86DrvMsg(0, X_INFO, "drawing - srcX %f dstX %d w %d\n", sX0, dstX, width);*/
739 for ( ; part_pos_dX <= (((dstX + width) &~ 3) >> 2); part_pos_sX += 4, part_pos_dX ++ )
742 if ( part_pos_dX == (dstX &~ 3) >> 2 ) /*then we're slightly on the left of the image, bytes to mask out*/
744 /*xf86DrvMsg(0, X_INFO, "on left border...\n");*/
745 switch ( dstX - (dstX &~ 3) ) /*mask out the extra pixels on the left*/
748 mask_out_bytes |= 1 << 0;
750 mask_out_bytes |= 1 << 1;
752 mask_out_bytes |= 1 << 2;
754 mask_out_bytes |= 1 << 3;
759 /*mask out extra pixels on the right, in case the picture never touches an alignment marker*/
760 switch ( width + (dstX & 3) )
763 mask_out_bytes |= 1 << 3;
765 mask_out_bytes |= 1 << 2;
767 mask_out_bytes |= 1 << 1;
769 mask_out_bytes |= 1 << 0;
773 else if ( part_pos_dX == (((dstX + width) &~ 3) >> 2) )
775 /*xf86DrvMsg(0, X_INFO, "on right border...\n");*/
776 switch (4 - ((dstX + width) & 3))
779 mask_out_bytes |= 1 << 3;
781 mask_out_bytes |= 1 << 2;
783 mask_out_bytes |= 1 << 1;
785 mask_out_bytes |= 1 << 0;
793 NV10SetRegCombs_A8plusA8(pNv, 0, mask_out_bytes);
794 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
795 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_QUADS);
797 NV10MVertex(pNv , part_pos_dX , dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
798 NV10MVertex(pNv , part_pos_dX + 1, dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
799 NV10MVertex(pNv , part_pos_dX + 1, dstY + height, part_pos_sX, sY1, part_pos_sX + 1, sY1);
800 NV10MVertex(pNv , part_pos_dX , dstY + height, part_pos_sX, sY1, part_pos_sX + 1, sY1);
802 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
803 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_STOP);
807 NV10SetRegCombs_A8plusA8(pNv, 1, mask_out_bytes);
808 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
809 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_QUADS);
811 NV10MVertex(pNv , part_pos_dX, dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
812 NV10MVertex(pNv , part_pos_dX + 1 , dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
813 NV10MVertex(pNv , part_pos_dX + 1 , dstY + height, part_pos_sX + 2, sY1, part_pos_sX + 3, sY1);
814 NV10MVertex(pNv , part_pos_dX, dstY + height, part_pos_sX + 2, sY1, part_pos_sX + 3, sY1);
816 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
817 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_STOP);
822 if (state.have_mask) {
823 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
825 state.unit[1].height, &mX0, &mY0);
826 NV10EXATransformCoord(state.unit[1].transform,
827 maskX + width, maskY,
829 state.unit[1].height, &mX1, &mY1);
830 NV10EXATransformCoord(state.unit[1].transform,
831 maskX + width, maskY + height,
833 state.unit[1].height, &mX2, &mY2);
834 NV10EXATransformCoord(state.unit[1].transform,
835 maskX, maskY + height,
837 state.unit[1].height, &mX3, &mY3);
838 NV10MVertex(pNv , dstX , dstY,sX0 , sY0 , mX0 , mY0);
839 NV10MVertex(pNv , dstX + width , dstY,sX1 , sY1 , mX1 , mY1);
840 NV10MVertex(pNv , dstX + width , dstY + height,sX2 , sY2 , mX2 , mY2);
841 NV10MVertex(pNv , dstX , dstY + height,sX3 , sY3 , mX3 , mY3);
843 NV10Vertex(pNv , dstX , dstY , sX0 , sY0);
844 NV10Vertex(pNv , dstX + width , dstY , sX1 , sY1);
845 NV10Vertex(pNv , dstX + width , dstY + height , sX2 , sY2);
846 NV10Vertex(pNv , dstX , dstY + height , sX3 , sY3);
852 void NV10DoneComposite (PixmapPtr pDst)
854 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
855 NVPtr pNv = NVPTR(pScrn);
857 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END, 1);
858 OUT_RING (NV10_TCL_PRIMITIVE_3D_VERTEX_BEGIN_END_STOP);
860 exaMarkSync(pDst->drawable.pScreen);
865 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
867 NVPtr pNv = NVPTR(pScrn);
868 uint32_t class = 0, chipset;
871 chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
872 if ( ((chipset & 0xf0) != NV_ARCH_10) &&
873 ((chipset & 0xf0) != NV_ARCH_20) )
877 class = NV11_TCL_PRIMITIVE_3D;
878 else if (chipset>=0x17)
879 class = NV17_TCL_PRIMITIVE_3D;
880 else if (chipset>=0x11)
881 class = NV11_TCL_PRIMITIVE_3D;
883 class = NV10_TCL_PRIMITIVE_3D;
886 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
890 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_NOTIFY, 1);
891 OUT_RING (pNv->NvNull->handle);
893 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_IN_MEMORY0, 2);
894 OUT_RING (pNv->chan->vram->handle);
895 OUT_RING (pNv->chan->gart->handle);
897 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DMA_IN_MEMORY2, 2);
898 OUT_RING (pNv->chan->vram->handle);
899 OUT_RING (pNv->chan->vram->handle);
901 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
904 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_HORIZ, 2);
908 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(0), 1);
909 OUT_RING ((0x7ff<<16)|0x800);
910 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(0), 1);
911 OUT_RING ((0x7ff<<16)|0x800);
914 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_HORIZ(i), 1);
916 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEWPORT_CLIP_VERT(i), 1);
920 BEGIN_RING(Nv3D, 0x290, 1);
921 OUT_RING ((0x10<<16)|1);
922 BEGIN_RING(Nv3D, 0x3f4, 1);
925 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
928 if (class != NV10_TCL_PRIMITIVE_3D) {
930 BEGIN_RING(Nv3D, 0x120, 3);
935 BEGIN_RING(NvImageBlit, 0x120, 3);
940 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
944 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
948 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_ENABLE, 1);
950 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ALPHA_FUNC_ENABLE, 1);
952 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ALPHA_FUNC_FUNC, 2);
955 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_TX_ENABLE(0), 2);
958 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_RC_IN_ALPHA(0), 12);
959 OUT_RING (0x30141010);
961 OUT_RING (0x20040000);
965 OUT_RING (0x00000c00);
967 OUT_RING (0x00000c00);
968 OUT_RING (0x18000000);
969 OUT_RING (0x300e0300);
970 OUT_RING (0x0c091c80);
971 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_ENABLE, 1);
973 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DITHER_ENABLE, 2);
976 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_SMOOTH_ENABLE, 1);
978 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_WEIGHT_ENABLE, 2);
981 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_BLEND_FUNC_SRC, 4);
986 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_STENCIL_MASK, 8);
995 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NORMALIZE_ENABLE, 1);
997 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_ENABLE, 2);
1000 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LIGHT_MODEL, 1);
1002 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_COLOR_CONTROL, 1);
1004 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_ENABLED_LIGHTS, 1);
1006 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_OFFSET_POINT_ENABLE, 3);
1010 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_FUNC, 1);
1012 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_WRITE_ENABLE, 1);
1014 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_DEPTH_TEST_ENABLE, 1);
1016 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_OFFSET_FACTOR, 2);
1019 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POINT_SIZE, 1);
1021 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POINT_PARAMETERS_ENABLE, 2);
1024 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_WIDTH, 1);
1026 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_LINE_SMOOTH_ENABLE, 1);
1028 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_MODE_FRONT, 2);
1031 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CULL_FACE, 2);
1034 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_POLYGON_SMOOTH_ENABLE, 1);
1036 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CULL_FACE_ENABLE, 1);
1038 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_CLIP_PLANE_ENABLE(0), 8);
1042 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_EQUATION_CONSTANT, 3);
1043 OUT_RING (0x3fc00000); /* -1.50 */
1044 OUT_RING (0xbdb8aa0a); /* -0.09 */
1045 OUT_RING (0); /* 0.00 */
1047 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_NOP, 1);
1050 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_FOG_MODE, 2);
1053 /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
1054 * using texturing, except when using the texture matrix
1056 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VIEW_MATRIX_ENABLE, 1);
1058 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_COLOR_MASK, 1);
1059 OUT_RING (0x01010101);
1061 /* Set vertex component */
1062 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_COL_4F_R, 4);
1067 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_COL2_3F_R, 3);
1071 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_NOR_3F_X, 3);
1075 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX0_4F_S, 4);
1080 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_TX1_4F_S, 4);
1085 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_VERTEX_FOG_1F, 1);
1087 BEGIN_RING(Nv3D, NV10_TCL_PRIMITIVE_3D_EDGEFLAG_ENABLE, 1);