2 * Copyright 2007 Stephane Marchesin
3 * Copyright 2007 Arthur Huillet
4 * Copyright 2007 Peter Winters
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "nv_include.h"
31 typedef struct nv10_exa_state {
33 Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
35 PictTransformPtr transform;
40 static nv10_exa_state_t state;
42 static int NV10TexFormat(int ExaFormat)
44 struct {int exa;int hw;} tex_format[] =
46 {PICT_a8r8g8b8, 0x900},
47 {PICT_x8r8g8b8, 0x900},
48 {PICT_r5g6b5, 0x880}, /*this one was only tested with rendercheck*/
49 /*{PICT_a1r5g5b5, NV10TCL_TX_FORMAT_FORMAT_R5G5B5A1},
50 {PICT_a4r4g4b4, NV10TCL_TX_FORMAT_FORMAT_R4G4B4A4},*/
51 {PICT_a8, 0x980}, /*this is a NV1x only format, corresponding NV2x is 0xD80, we hack it in below*/
55 for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57 if(tex_format[i].exa==ExaFormat)
58 return tex_format[i].hw;
64 static int NV10DstFormat(int ExaFormat)
66 struct {int exa;int hw;} dst_format[] =
68 {PICT_a8r8g8b8, 0x108},
69 {PICT_x8r8g8b8, 0x108},
74 for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
76 if(dst_format[i].exa==ExaFormat)
77 return dst_format[i].hw;
83 static Bool NV10CheckTexture(PicturePtr Picture)
85 int w = Picture->pDrawable->width;
86 int h = Picture->pDrawable->height;
88 if ((w > 2046) || (h>2046))
90 if (!NV10TexFormat(Picture->format))
92 if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
94 if (Picture->componentAlpha)
96 /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
97 if (Picture->repeat != RepeatNone)
98 /* we can repeat 1x1 textures */
99 if (!(w == 1 && h == 1))
104 static Bool NV10CheckBuffer(PicturePtr Picture)
106 int w = Picture->pDrawable->width;
107 int h = Picture->pDrawable->height;
109 if ((w > 4096) || (h>4096))
111 if (Picture->componentAlpha)
113 if (!NV10DstFormat(Picture->format))
118 static Bool NV10CheckPictOp(int op)
120 if ( op == PictOpAtopReverse ) /*this op doesn't work*/
124 if ( op >= PictOpSaturate )
125 { /*we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear*/
131 /* Check if the current operation is a doable A8 + A8 */
132 /* A8 destination is a special case, because we do it by having the card think
133 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format,
134 and without transformation or funny things.*/
135 static Bool NV10Check_A8plusA8_Feasability(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)
137 #if X_BYTE_ORDER == X_BIG_ENDIAN
140 if ((!msk) && (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) &&
141 (op == PictOpAdd) && (src->repeat == RepeatNone))
149 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
151 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
154 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
155 PicturePtr pMaskPicture,
156 PicturePtr pDstPicture)
160 sprintf(out, "%s ", reason);
161 out = out + strlen(out);
165 sprintf(out, "PictOpClear ");
168 sprintf(out, "PictOpSrc ");
171 sprintf(out, "PictOpDst ");
174 sprintf(out, "PictOpOver ");
176 case PictOpOutReverse:
177 sprintf(out, "PictOpOutReverse ");
180 sprintf(out, "PictOpAdd ");
183 sprintf(out, "PictOp%d ", op);
185 out = out + strlen(out);
186 switch ( pSrcPicture->format )
189 sprintf(out, "A8R8G8B8 ");
192 sprintf(out, "X8R8G8B8 ");
195 sprintf(out, "X8B8G8R8 ");
198 sprintf(out, "R5G6B5 ");
207 sprintf(out, "%x ", pSrcPicture->format);
210 sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
211 if ( pSrcPicture->repeat != RepeatNone )
216 switch ( pDstPicture->format )
219 sprintf(out, "A8R8G8B8 ");
222 sprintf(out, "X8R8G8B8 ");
225 sprintf(out, "X8B8G8R8 ");
228 sprintf(out, "R5G6B5 ");
237 sprintf(out, "%x ", pDstPicture->format);
240 sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
241 if ( pDstPicture->repeat != RepeatNone )
245 sprintf(out, "& NONE");
247 switch ( pMaskPicture->format )
250 sprintf(out, "& A8R8G8B8 ");
253 sprintf(out, "& X8R8G8B8 ");
256 sprintf(out, "& X8B8G8R8 ");
259 sprintf(out, "& A8 ");
262 sprintf(out, "& A1 ");
265 sprintf(out, "& %x ", pMaskPicture->format);
268 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
269 if ( pMaskPicture->repeat != RepeatNone )
274 xf86DrvMsg(0, X_INFO, out2);
278 Bool NV10CheckComposite(int op,
279 PicturePtr pSrcPicture,
280 PicturePtr pMaskPicture,
281 PicturePtr pDstPicture)
284 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
286 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
290 if (!NV10CheckPictOp(op))
292 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
295 if (!NV10CheckBuffer(pDstPicture))
297 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
301 if (!NV10CheckTexture(pSrcPicture))
303 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
307 if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
309 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
313 NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
317 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
319 BEGIN_RING(Nv3D, NV10TCL_TX_OFFSET(unit), 1 );
320 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
321 int log2w = log2i(Pict->pDrawable->width);
322 int log2h = log2i(Pict->pDrawable->height);
325 (NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
326 (NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
329 (1<<12) | /* lod == 1 */
332 /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination,
333 in that case we change the format to use the POT (swizzled) matching format */
334 if (Pict->repeat != RepeatNone)
336 if (Pict->format == PICT_a8)
337 txfmt |= 0x80; /* A8 */
338 else if (Pict->format == PICT_r5g6b5 )
339 txfmt |= 0x280; /* R5G6B5 */
341 txfmt |= 0x300; /* ARGB format */
345 if (pNv->Architecture == NV_ARCH_20 && Pict->format == PICT_a8 )
347 else txfmt |= NV10TexFormat(Pict->format);
348 w = Pict->pDrawable->width;
349 /* NPOT_SIZE expects an even number for width, we can round up uneven
350 * numbers here because EXA always gives 64 byte aligned pixmaps
351 * and for all formats we support 64 bytes represents an even number
356 BEGIN_RING(Nv3D, NV10TCL_TX_NPOT_PITCH(unit), 1);
357 OUT_RING (exaGetPixmapPitch(pixmap) << 16);
359 BEGIN_RING(Nv3D, NV10TCL_TX_NPOT_SIZE(unit), 1);
360 OUT_RING ((w<<16) | Pict->pDrawable->height);
363 BEGIN_RING(Nv3D, NV10TCL_TX_FORMAT(unit), 1 );
366 BEGIN_RING(Nv3D, NV10TCL_TX_ENABLE(unit), 1 );
367 OUT_RING (NV10TCL_TX_ENABLE_ENABLE);
369 BEGIN_RING(Nv3D, NV10TCL_TX_FILTER(unit), 1);
370 if (Pict->filter == PictFilterNearest)
371 OUT_RING ((NV10TCL_TX_FILTER_MAGNIFY_NEAREST) |
372 (NV10TCL_TX_FILTER_MINIFY_NEAREST));
374 OUT_RING ((NV10TCL_TX_FILTER_MAGNIFY_LINEAR) |
375 (NV10TCL_TX_FILTER_MINIFY_LINEAR));
377 state.unit[unit].width = (float)pixmap->drawable.width;
378 state.unit[unit].height = (float)pixmap->drawable.height;
379 state.unit[unit].transform = Pict->transform;
382 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
390 BEGIN_RING(Nv3D, NV10TCL_BUFFER_FORMAT, 4);
391 if ( state.is_a8_plus_a8 )
393 OUT_RING (NV10DstFormat(PICT_a8r8g8b8));
396 OUT_RING (NV10DstFormat(Pict->format));
399 OUT_RING (((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
400 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
403 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_HORIZ, 2);
404 OUT_RING ((w<<16)|x);
405 OUT_RING ((h<<16)|y);
406 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
408 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
409 OUT_RING (((w-1+x)<<16)|x|0x08000800);
410 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
411 OUT_RING (((h-1+y)<<16)|y|0x08000800);
413 BEGIN_RING(Nv3D, NV10TCL_PROJECTION_MATRIX(0), 16);
420 BEGIN_RING(Nv3D, NV10TCL_DEPTH_RANGE_NEAR, 2);
423 OUT_RINGf (16777216.0);
427 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_SCALE_X, 4);
434 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
436 /*This can be a bit difficult to understand at first glance.
437 Reg combiners are described here:
438 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
440 Single texturing setup, without honoring vertex colors (non default setup) is:
441 Alpha RC 0 : a_0 * 1 + 0 * 0
442 RGB RC 0 : rgb_0 * 1 + 0 * 0
444 Final combiner uses default setup
446 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
448 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
449 Alpha RC 0 : a_0 * a_1 + 0 * 0
450 RGB RC0 : rgb_0 * a_1 + 0 * 0
452 Final combiner uses default setup
456 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
457 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
458 unsigned int color0 = 0, color1 = 0;
459 #define A_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
460 #define B_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
461 #define C_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
462 #define D_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
464 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
465 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
466 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
467 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
469 #define A_RGB_ZERO (NV10TCL_RC_IN_RGB_A_INPUT_ZERO | NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
470 #define B_RGB_ZERO (NV10TCL_RC_IN_RGB_B_INPUT_ZERO | NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
471 #define C_RGB_ZERO (NV10TCL_RC_IN_RGB_C_INPUT_ZERO | NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
472 #define D_RGB_ZERO (NV10TCL_RC_IN_RGB_D_INPUT_ZERO | NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
474 #define A_RGB_ONE (A_RGB_ZERO | NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
475 #define B_RGB_ONE (B_RGB_ZERO | NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
476 #define C_RGB_ONE (C_RGB_ZERO | NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
477 #define D_RGB_ONE (D_RGB_ZERO | NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
479 rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
480 if (src->format == PICT_x8r8g8b8)
481 rc0_in_alpha |= A_ALPHA_ONE;
483 rc0_in_alpha |= 0x18000000;
486 rc0_in_alpha |= B_ALPHA_ONE;
488 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
489 rc0_in_alpha |= B_ALPHA_ONE;
491 rc0_in_alpha |= 0x00190000; /*B = a_1*/
493 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
494 if (src->format == PICT_a8 )
495 rc0_in_rgb |= A_RGB_ZERO;
497 rc0_in_rgb |= 0x08000000; /*A = rgb_0*/
500 rc0_in_rgb |= B_RGB_ONE;
502 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
503 rc0_in_rgb |= B_RGB_ONE;
505 rc0_in_rgb |= 0x00190000; /*B = a_1*/
507 BEGIN_RING(Nv3D, NV10TCL_RC_IN_ALPHA(0), 6);
508 OUT_RING(rc0_in_alpha);
509 OUT_RING (rc1_in_alpha);
510 OUT_RING (rc0_in_rgb);
511 OUT_RING (rc1_in_rgb);
512 OUT_RING (color0); /*COLOR 0*/
513 OUT_RING (color1); /*COLOR 1*/
516 static void NV10SetRegCombs_A8plusA8(NVPtr pNv, int pass, int mask_out_bytes)
518 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
519 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
520 unsigned int color0 = 0, color1 = 0;
524 if ( mask_out_bytes & 1 )
525 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
526 else rc0_in_alpha = 0x19000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO;
528 rc0_in_rgb = C_RGB_ZERO | D_RGB_ZERO;
530 if ( mask_out_bytes & 2 )
531 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
532 else rc0_in_rgb |= 0x18000000 | 0x00010000;
534 color0 = 0x00ff0000; /*R = 1 G = 0 B = 0*/
537 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
543 if ( mask_out_bytes & 8 )
544 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
545 else rc0_in_rgb |= 0x18000000 | 0x00010000; /*A = a_0, B= cst color 0*/
549 if ( mask_out_bytes & 4)
550 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
551 else rc0_in_rgb |= 0x1900 | 0x02; /*C = a_1, D = cst color 1*/
553 color1 = 0x0000ff00; /*R = 0, G = 1, B = 0*/
556 BEGIN_RING(Nv3D, NV10TCL_RC_IN_ALPHA(0), 6);
557 OUT_RING(rc0_in_alpha);
558 OUT_RING (rc1_in_alpha);
559 OUT_RING (rc0_in_rgb);
560 OUT_RING (rc1_in_rgb);
561 OUT_RING (color0); /*COLOR 0*/
562 OUT_RING (color1); /*COLOR 1*/
565 static void NV10SetPictOp(NVPtr pNv,int op)
567 struct {int src;int dst;} pictops[] =
569 {0x0000,0x0000}, /* PictOpClear */
570 {0x0001,0x0000}, /* PictOpSrc */
571 {0x0000,0x0001}, /* PictOpDst */
572 {0x0001,0x0303}, /* PictOpOver */
573 {0x0305,0x0001}, /* PictOpOverReverse */
574 {0x0304,0x0000}, /* PictOpIn */
575 {0x0000,0x0302}, /* PictOpInReverse */
576 {0x0305,0x0000}, /* PictOpOut */
577 {0x0000,0x0303}, /* PictOpOutReverse */
578 {0x0304,0x0303}, /* PictOpAtop */
579 {0x0305,0x0302}, /* PictOpAtopReverse - DOES NOT WORK*/
580 {0x0305,0x0303}, /* PictOpXor */
581 {0x0001,0x0001}, /* PictOpAdd */
584 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_SRC, 2);
585 OUT_RING (pictops[op].src);
586 OUT_RING (pictops[op].dst);
587 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_ENABLE, 1);
591 Bool NV10PrepareComposite(int op,
592 PicturePtr pSrcPicture,
593 PicturePtr pMaskPicture,
594 PicturePtr pDstPicture,
599 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
600 NVPtr pNv = NVPTR(pScrn);
602 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
604 state.have_mask = FALSE;
605 state.is_a8_plus_a8 = TRUE;
606 NV10SetBuffer(pNv,pDstPicture,pDst);
607 NV10SetPictOp(pNv, op);
608 NV10SetTexture(pNv, 0, pSrcPicture, pSrc);
609 NV10SetTexture(pNv, 1, pSrcPicture, pSrc);
613 state.is_a8_plus_a8 = FALSE;
616 NV10SetBuffer(pNv,pDstPicture,pDst);
619 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
621 /* Set mask format */
623 NV10SetTexture(pNv,1,pMaskPicture,pMask);
625 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
628 NV10SetPictOp(pNv, op);
630 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
631 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
633 state.have_mask=(pMaskPicture!=NULL);
637 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
639 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_2F_S, 2);
642 BEGIN_RING(Nv3D, NV10TCL_VERTEX_POS_3F_X, 3);
648 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
650 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_2F_S, 2);
653 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX1_2F_S, 2);
656 BEGIN_RING(Nv3D, NV10TCL_VERTEX_POS_3F_X, 3);
662 #define xFixedToFloat(v) \
663 ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
666 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
667 float *x_ret, float *y_ret)
672 v.vector[0] = IntToxFixed(x);
673 v.vector[1] = IntToxFixed(y);
674 v.vector[2] = xFixed1;
675 PictureTransformPoint(t, &v);
676 *x_ret = xFixedToFloat(v.vector[0]);
677 *y_ret = xFixedToFloat(v.vector[1]);
685 void NV10Composite(PixmapPtr pDst,
695 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
696 NVPtr pNv = NVPTR(pScrn);
697 float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
698 float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
700 NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
702 state.unit[0].height, &sX0, &sY0);
703 NV10EXATransformCoord(state.unit[0].transform,
706 state.unit[0].height, &sX1, &sY1);
707 NV10EXATransformCoord(state.unit[0].transform,
708 srcX + width, srcY + height,
710 state.unit[0].height, &sX2, &sY2);
711 NV10EXATransformCoord(state.unit[0].transform,
714 state.unit[0].height, &sX3, &sY3);
716 if ( state.is_a8_plus_a8 )
718 /*We do A8 + A8 in 2-pass : setup the source texture as A8 twice,
719 with different tex coords, do B and G on first pass
720 Then setup again and do R and A on second pass
724 int mask_out_bytes = 0;
726 part_pos_dX = (dstX &~ 3) >> 2; /*we start at the 4byte boundary to the left of the image*/
727 part_pos_sX = sX0 + (dstX &~ 3) - dstX;
729 /*xf86DrvMsg(0, X_INFO, "drawing - srcX %f dstX %d w %d\n", sX0, dstX, width);*/
730 for ( ; part_pos_dX <= (((dstX + width) &~ 3) >> 2); part_pos_sX += 4, part_pos_dX ++ )
733 if ( part_pos_dX == (dstX &~ 3) >> 2 ) /*then we're slightly on the left of the image, bytes to mask out*/
735 /*xf86DrvMsg(0, X_INFO, "on left border...\n");*/
736 switch ( dstX - (dstX &~ 3) ) /*mask out the extra pixels on the left*/
739 mask_out_bytes |= 1 << 0;
741 mask_out_bytes |= 1 << 1;
743 mask_out_bytes |= 1 << 2;
745 mask_out_bytes |= 1 << 3;
750 /*mask out extra pixels on the right, in case the picture never touches an alignment marker*/
751 switch ( width + (dstX & 3) )
754 mask_out_bytes |= 1 << 3;
756 mask_out_bytes |= 1 << 2;
758 mask_out_bytes |= 1 << 1;
760 mask_out_bytes |= 1 << 0;
764 else if ( part_pos_dX == (((dstX + width) &~ 3) >> 2) )
766 /*xf86DrvMsg(0, X_INFO, "on right border...\n");*/
767 switch (4 - ((dstX + width) & 3))
770 mask_out_bytes |= 1 << 3;
772 mask_out_bytes |= 1 << 2;
774 mask_out_bytes |= 1 << 1;
776 mask_out_bytes |= 1 << 0;
784 NV10SetRegCombs_A8plusA8(pNv, 0, mask_out_bytes);
785 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
786 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
788 NV10MVertex(pNv , part_pos_dX , dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
789 NV10MVertex(pNv , part_pos_dX + 1, dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
790 NV10MVertex(pNv , part_pos_dX + 1, dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
791 NV10MVertex(pNv , part_pos_dX , dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
793 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
794 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
798 NV10SetRegCombs_A8plusA8(pNv, 1, mask_out_bytes);
799 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
800 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
802 NV10MVertex(pNv , part_pos_dX, dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
803 NV10MVertex(pNv , part_pos_dX + 1 , dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
804 NV10MVertex(pNv , part_pos_dX + 1 , dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
805 NV10MVertex(pNv , part_pos_dX, dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
807 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
808 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
812 else if (state.have_mask) {
813 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
815 state.unit[1].height, &mX0, &mY0);
816 NV10EXATransformCoord(state.unit[1].transform,
817 maskX + width, maskY,
819 state.unit[1].height, &mX1, &mY1);
820 NV10EXATransformCoord(state.unit[1].transform,
821 maskX + width, maskY + height,
823 state.unit[1].height, &mX2, &mY2);
824 NV10EXATransformCoord(state.unit[1].transform,
825 maskX, maskY + height,
827 state.unit[1].height, &mX3, &mY3);
828 NV10MVertex(pNv , dstX , dstY,sX0 , sY0 , mX0 , mY0);
829 NV10MVertex(pNv , dstX + width , dstY,sX1 , sY1 , mX1 , mY1);
830 NV10MVertex(pNv , dstX + width , dstY + height,sX2 , sY2 , mX2 , mY2);
831 NV10MVertex(pNv , dstX , dstY + height,sX3 , sY3 , mX3 , mY3);
833 NV10Vertex(pNv , dstX , dstY , sX0 , sY0);
834 NV10Vertex(pNv , dstX + width , dstY , sX1 , sY1);
835 NV10Vertex(pNv , dstX + width , dstY + height , sX2 , sY2);
836 NV10Vertex(pNv , dstX , dstY + height , sX3 , sY3);
842 void NV10DoneComposite (PixmapPtr pDst)
844 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
845 NVPtr pNv = NVPTR(pScrn);
847 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
848 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
850 exaMarkSync(pDst->drawable.pScreen);
855 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
857 NVPtr pNv = NVPTR(pScrn);
858 uint32_t class = 0, chipset;
861 chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
862 if ( ((chipset & 0xf0) != NV_ARCH_10) &&
863 ((chipset & 0xf0) != NV_ARCH_20) )
868 else if (chipset>=0x17)
870 else if (chipset>=0x11)
876 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
880 BEGIN_RING(Nv3D, NV10TCL_DMA_NOTIFY, 1);
881 OUT_RING (pNv->NvNull->handle);
883 BEGIN_RING(Nv3D, NV10TCL_DMA_IN_MEMORY0, 2);
884 OUT_RING (pNv->chan->vram->handle);
885 OUT_RING (pNv->chan->gart->handle);
887 BEGIN_RING(Nv3D, NV10TCL_DMA_IN_MEMORY2, 2);
888 OUT_RING (pNv->chan->vram->handle);
889 OUT_RING (pNv->chan->vram->handle);
891 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
894 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_HORIZ, 2);
898 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
899 OUT_RING ((0x7ff<<16)|0x800);
900 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
901 OUT_RING ((0x7ff<<16)|0x800);
904 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
906 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
910 BEGIN_RING(Nv3D, 0x290, 1);
911 OUT_RING ((0x10<<16)|1);
912 BEGIN_RING(Nv3D, 0x3f4, 1);
915 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
918 if (class != NV10TCL) {
920 BEGIN_RING(Nv3D, 0x120, 3);
925 BEGIN_RING(NvImageBlit, 0x120, 3);
930 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
934 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
938 BEGIN_RING(Nv3D, NV10TCL_FOG_ENABLE, 1);
940 BEGIN_RING(Nv3D, NV10TCL_ALPHA_FUNC_ENABLE, 1);
942 BEGIN_RING(Nv3D, NV10TCL_ALPHA_FUNC_FUNC, 2);
945 BEGIN_RING(Nv3D, NV10TCL_TX_ENABLE(0), 2);
948 BEGIN_RING(Nv3D, NV10TCL_RC_OUT_ALPHA(0), 6);
949 OUT_RING (0x00000c00);
951 OUT_RING (0x00000c00);
952 OUT_RING (0x18000000);
953 OUT_RING (0x300c0000);
954 OUT_RING (0x00001c80);
955 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_ENABLE, 1);
957 BEGIN_RING(Nv3D, NV10TCL_DITHER_ENABLE, 2);
960 BEGIN_RING(Nv3D, NV10TCL_LINE_SMOOTH_ENABLE, 1);
962 BEGIN_RING(Nv3D, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
965 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_SRC, 4);
970 BEGIN_RING(Nv3D, NV10TCL_STENCIL_MASK, 8);
979 BEGIN_RING(Nv3D, NV10TCL_NORMALIZE_ENABLE, 1);
981 BEGIN_RING(Nv3D, NV10TCL_FOG_ENABLE, 2);
984 BEGIN_RING(Nv3D, NV10TCL_LIGHT_MODEL, 1);
986 BEGIN_RING(Nv3D, NV10TCL_COLOR_CONTROL, 1);
988 BEGIN_RING(Nv3D, NV10TCL_ENABLED_LIGHTS, 1);
990 BEGIN_RING(Nv3D, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
994 BEGIN_RING(Nv3D, NV10TCL_DEPTH_FUNC, 1);
996 BEGIN_RING(Nv3D, NV10TCL_DEPTH_WRITE_ENABLE, 1);
998 BEGIN_RING(Nv3D, NV10TCL_DEPTH_TEST_ENABLE, 1);
1000 BEGIN_RING(Nv3D, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
1003 BEGIN_RING(Nv3D, NV10TCL_POINT_SIZE, 1);
1005 BEGIN_RING(Nv3D, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
1008 BEGIN_RING(Nv3D, NV10TCL_LINE_WIDTH, 1);
1010 BEGIN_RING(Nv3D, NV10TCL_LINE_SMOOTH_ENABLE, 1);
1012 BEGIN_RING(Nv3D, NV10TCL_POLYGON_MODE_FRONT, 2);
1015 BEGIN_RING(Nv3D, NV10TCL_CULL_FACE, 2);
1018 BEGIN_RING(Nv3D, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
1020 BEGIN_RING(Nv3D, NV10TCL_CULL_FACE_ENABLE, 1);
1022 BEGIN_RING(Nv3D, NV10TCL_CLIP_PLANE_ENABLE(0), 8);
1026 BEGIN_RING(Nv3D, NV10TCL_FOG_EQUATION_CONSTANT, 3);
1027 OUT_RING (0x3fc00000); /* -1.50 */
1028 OUT_RING (0xbdb8aa0a); /* -0.09 */
1029 OUT_RING (0); /* 0.00 */
1031 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
1034 BEGIN_RING(Nv3D, NV10TCL_FOG_MODE, 2);
1037 /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
1038 * using texturing, except when using the texture matrix
1040 BEGIN_RING(Nv3D, NV10TCL_VIEW_MATRIX_ENABLE, 1);
1042 BEGIN_RING(Nv3D, NV10TCL_COLOR_MASK, 1);
1043 OUT_RING (0x01010101);
1045 /* Set vertex component */
1046 BEGIN_RING(Nv3D, NV10TCL_VERTEX_COL_4F_R, 4);
1051 BEGIN_RING(Nv3D, NV10TCL_VERTEX_COL2_3F_R, 3);
1055 BEGIN_RING(Nv3D, NV10TCL_VERTEX_NOR_3F_X, 3);
1059 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_4F_S, 4);
1064 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX1_4F_S, 4);
1069 BEGIN_RING(Nv3D, NV10TCL_VERTEX_FOG_1F, 1);
1071 BEGIN_RING(Nv3D, NV10TCL_EDGEFLAG_ENABLE, 1);