2 * Copyright 2007 Stephane Marchesin
3 * Copyright 2007 Arthur Huillet
4 * Copyright 2007 Peter Winters
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
21 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 #include "nv_include.h"
31 typedef struct nv10_exa_state {
33 Bool is_a8_plus_a8; /*as known as is_extremely_dirty :)*/
35 PictTransformPtr transform;
40 static nv10_exa_state_t state;
42 static int NV10TexFormat(int ExaFormat)
44 struct {int exa;int hw;} tex_format[] =
46 {PICT_a8r8g8b8, 0x900},
47 {PICT_x8r8g8b8, 0x900},
48 {PICT_r5g6b5, 0x880}, /*this one was only tested with rendercheck*/
49 /*{PICT_a1r5g5b5, NV10TCL_TX_FORMAT_FORMAT_R5G5B5A1},
50 {PICT_a4r4g4b4, NV10TCL_TX_FORMAT_FORMAT_R4G4B4A4},*/
51 {PICT_a8, 0x980}, /*this is a NV1x only format, corresponding NV2x is 0xD80, we hack it in below*/
55 for(i=0;i<sizeof(tex_format)/sizeof(tex_format[0]);i++)
57 if(tex_format[i].exa==ExaFormat)
58 return tex_format[i].hw;
64 static int NV10DstFormat(int ExaFormat)
66 struct {int exa;int hw;} dst_format[] =
68 {PICT_a8r8g8b8, 0x108},
69 {PICT_x8r8g8b8, 0x108},
74 for(i=0;i<sizeof(dst_format)/sizeof(dst_format[0]);i++)
76 if(dst_format[i].exa==ExaFormat)
77 return dst_format[i].hw;
83 static Bool NV10CheckTexture(PicturePtr Picture)
85 int w = Picture->pDrawable->width;
86 int h = Picture->pDrawable->height;
88 if ((w > 2046) || (h>2046))
90 if (!NV10TexFormat(Picture->format))
92 if (Picture->filter != PictFilterNearest && Picture->filter != PictFilterBilinear)
94 if (Picture->componentAlpha)
96 /* we cannot repeat on NV10 because NPOT textures do not support this. unfortunately. */
97 if (Picture->repeat != RepeatNone)
98 /* we can repeat 1x1 textures */
99 if (!(w == 1 && h == 1))
104 static Bool NV10CheckBuffer(PicturePtr Picture)
106 int w = Picture->pDrawable->width;
107 int h = Picture->pDrawable->height;
109 if ((w > 4096) || (h>4096))
111 if (Picture->componentAlpha)
113 if (!NV10DstFormat(Picture->format))
118 static Bool NV10CheckPictOp(int op)
120 if ( op == PictOpAtopReverse ) /*this op doesn't work*/
124 if ( op >= PictOpSaturate )
125 { /*we do no saturate, disjoint, conjoint, though we could do e.g. DisjointClear which really is Clear*/
131 /* Check if the current operation is a doable A8 + A8 */
132 /* A8 destination is a special case, because we do it by having the card think
133 it's ARGB. For now we support PictOpAdd which is the only important op for this dst format,
134 and without transformation or funny things.*/
135 static Bool NV10Check_A8plusA8_Feasability(PicturePtr src, PicturePtr msk, PicturePtr dst, int op)
137 #if X_BYTE_ORDER == X_BIG_ENDIAN
140 if ((!msk) && (src->format == PICT_a8) && (dst->format == PICT_a8) && (!src->transform) &&
141 (op == PictOpAdd) && (src->repeat == RepeatNone))
149 #define NV10EXAFallbackInfo(X,Y,Z,S,T) NV10EXAFallbackInfo_real(X,Y,Z,S,T)
151 #define NV10EXAFallbackInfo(X,Y,Z,S,T) do { ; } while (0)
154 static void NV10EXAFallbackInfo_real(char * reason, int op, PicturePtr pSrcPicture,
155 PicturePtr pMaskPicture,
156 PicturePtr pDstPicture)
160 sprintf(out, "%s ", reason);
161 out = out + strlen(out);
165 sprintf(out, "PictOpClear ");
168 sprintf(out, "PictOpSrc ");
171 sprintf(out, "PictOpDst ");
174 sprintf(out, "PictOpOver ");
176 case PictOpOutReverse:
177 sprintf(out, "PictOpOutReverse ");
180 sprintf(out, "PictOpAdd ");
183 sprintf(out, "PictOp%d ", op);
185 out = out + strlen(out);
186 switch ( pSrcPicture->format )
189 sprintf(out, "A8R8G8B8 ");
192 sprintf(out, "X8R8G8B8 ");
195 sprintf(out, "X8B8G8R8 ");
198 sprintf(out, "R5G6B5 ");
207 sprintf(out, "%x ", pSrcPicture->format);
210 sprintf(out, "(%dx%d) ", pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
211 if ( pSrcPicture->repeat != RepeatNone )
216 switch ( pDstPicture->format )
219 sprintf(out, "A8R8G8B8 ");
222 sprintf(out, "X8R8G8B8 ");
225 sprintf(out, "X8B8G8R8 ");
228 sprintf(out, "R5G6B5 ");
237 sprintf(out, "%x ", pDstPicture->format);
240 sprintf(out, "(%dx%d) ", pDstPicture->pDrawable->width, pDstPicture->pDrawable->height);
241 if ( pDstPicture->repeat != RepeatNone )
245 sprintf(out, "& NONE");
247 switch ( pMaskPicture->format )
250 sprintf(out, "& A8R8G8B8 ");
253 sprintf(out, "& X8R8G8B8 ");
256 sprintf(out, "& X8B8G8R8 ");
259 sprintf(out, "& A8 ");
262 sprintf(out, "& A1 ");
265 sprintf(out, "& %x ", pMaskPicture->format);
268 sprintf(out, "(%dx%d) ", pMaskPicture->pDrawable->width, pMaskPicture->pDrawable->height);
269 if ( pMaskPicture->repeat != RepeatNone )
274 xf86DrvMsg(0, X_INFO, out2);
278 Bool NV10CheckComposite(int op,
279 PicturePtr pSrcPicture,
280 PicturePtr pMaskPicture,
281 PicturePtr pDstPicture)
284 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
286 NV10EXAFallbackInfo("Hackelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
290 if (!NV10CheckPictOp(op))
292 NV10EXAFallbackInfo("pictop", op, pSrcPicture, pMaskPicture, pDstPicture);
295 if (!NV10CheckBuffer(pDstPicture))
297 NV10EXAFallbackInfo("dst", op, pSrcPicture, pMaskPicture, pDstPicture);
301 if (!NV10CheckTexture(pSrcPicture))
303 NV10EXAFallbackInfo("src", op, pSrcPicture, pMaskPicture, pDstPicture);
307 if ((pMaskPicture) &&(!NV10CheckTexture(pMaskPicture)))
309 NV10EXAFallbackInfo("mask", op, pSrcPicture, pMaskPicture, pDstPicture);
313 NV10EXAFallbackInfo("Accelerating", op, pSrcPicture, pMaskPicture, pDstPicture);
317 static void NV10SetTexture(NVPtr pNv,int unit,PicturePtr Pict,PixmapPtr pixmap)
319 BEGIN_RING(Nv3D, NV10TCL_TX_OFFSET(unit), 1 );
320 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
321 int log2w = log2i(Pict->pDrawable->width);
322 int log2h = log2i(Pict->pDrawable->height);
325 (NV10TCL_TX_FORMAT_WRAP_T_CLAMP_TO_EDGE) |
326 (NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE) |
329 (1<<12) | /* lod == 1 */
332 /* if repeat is set we're always handling a 1x1 texture with ARGB/XRGB destination,
333 in that case we change the format to use the POT (swizzled) matching format */
334 if (Pict->repeat != RepeatNone)
336 if (Pict->format == PICT_a8)
337 txfmt |= 0x80; /* A8 */
338 else if (Pict->format == PICT_r5g6b5 )
339 txfmt |= 0x280; /* R5G6B5 */
341 txfmt |= 0x300; /* ARGB format */
345 if (pNv->Architecture == NV_ARCH_20 && Pict->format == PICT_a8 )
347 else txfmt |= NV10TexFormat(Pict->format);
348 w = Pict->pDrawable->width;
349 /* NPOT_SIZE expects an even number for width, we can round up uneven
350 * numbers here because EXA always gives 64 byte aligned pixmaps
351 * and for all formats we support 64 bytes represents an even number
356 BEGIN_RING(Nv3D, NV10TCL_TX_NPOT_PITCH(unit), 1);
357 OUT_RING (exaGetPixmapPitch(pixmap) << 16);
359 BEGIN_RING(Nv3D, NV10TCL_TX_NPOT_SIZE(unit), 1);
360 OUT_RING ((w<<16) | Pict->pDrawable->height);
363 BEGIN_RING(Nv3D, NV10TCL_TX_FORMAT(unit), 1 );
366 BEGIN_RING(Nv3D, NV10TCL_TX_ENABLE(unit), 1 );
367 OUT_RING (NV10TCL_TX_ENABLE_ENABLE);
369 BEGIN_RING(Nv3D, NV10TCL_TX_FILTER(unit), 1);
370 if (Pict->filter == PictFilterNearest)
371 OUT_RING ((NV10TCL_TX_FILTER_MAGNIFY_NEAREST) |
372 (NV10TCL_TX_FILTER_MINIFY_NEAREST));
374 OUT_RING ((NV10TCL_TX_FILTER_MAGNIFY_LINEAR) |
375 (NV10TCL_TX_FILTER_MINIFY_LINEAR));
377 state.unit[unit].width = (float)pixmap->drawable.width;
378 state.unit[unit].height = (float)pixmap->drawable.height;
379 state.unit[unit].transform = Pict->transform;
382 static void NV10SetBuffer(NVPtr pNv,PicturePtr Pict,PixmapPtr pixmap)
390 BEGIN_RING(Nv3D, NV10TCL_BUFFER_FORMAT, 4);
391 if ( state.is_a8_plus_a8 )
393 OUT_RING (NV10DstFormat(PICT_a8r8g8b8));
396 OUT_RING (NV10DstFormat(Pict->format));
399 OUT_RING (((uint32_t)exaGetPixmapPitch(pixmap) << 16) |(uint32_t)exaGetPixmapPitch(pixmap));
400 OUT_PIXMAPl(pixmap, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
403 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_HORIZ, 2);
404 OUT_RING ((w<<16)|x);
405 OUT_RING ((h<<16)|y);
406 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_MODE, 1); /* clip_mode */
408 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
409 OUT_RING (((w-1+x)<<16)|x|0x08000800);
410 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
411 OUT_RING (((h-1+y)<<16)|y|0x08000800);
413 BEGIN_RING(Nv3D, NV10TCL_PROJECTION_MATRIX(0), 16);
420 BEGIN_RING(Nv3D, NV10TCL_DEPTH_RANGE_NEAR, 2);
423 OUT_RINGf (16777216.0);
427 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_SCALE_X, 4);
434 static void NV10SetRegCombs(NVPtr pNv, PicturePtr src, PicturePtr mask)
436 /*This can be a bit difficult to understand at first glance.
437 Reg combiners are described here:
438 http://icps.u-strasbg.fr/~marchesin/perso/extensions/NV/register_combiners.html
440 Single texturing setup, without honoring vertex colors (non default setup) is:
441 Alpha RC 0 : a_0 * 1 + 0 * 0
442 RGB RC 0 : rgb_0 * 1 + 0 * 0
444 Final combiner uses default setup
446 Default setup uses vertex rgb/alpha in place of 1s above, but we don't need that in 2D.
448 Multi texturing setup, where we do TEX0 in TEX1 (masking) is:
449 Alpha RC 0 : a_0 * a_1 + 0 * 0
450 RGB RC0 : rgb_0 * a_1 + 0 * 0
452 Final combiner uses default setup
456 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
457 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
458 unsigned int color0 = 0, color1 = 0;
459 #define A_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_A_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_A_COMPONENT_USAGE_ALPHA)
460 #define B_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_B_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_B_COMPONENT_USAGE_ALPHA)
461 #define C_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_C_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_C_COMPONENT_USAGE_ALPHA)
462 #define D_ALPHA_ZERO (NV10TCL_RC_IN_ALPHA_D_INPUT_ZERO | NV10TCL_RC_IN_ALPHA_D_COMPONENT_USAGE_ALPHA)
464 #define A_ALPHA_ONE (A_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_A_MAPPING_UNSIGNED_INVERT_NV))
465 #define B_ALPHA_ONE (B_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_B_MAPPING_UNSIGNED_INVERT_NV))
466 #define C_ALPHA_ONE (C_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_C_MAPPING_UNSIGNED_INVERT_NV))
467 #define D_ALPHA_ONE (D_ALPHA_ZERO | (NV10TCL_RC_IN_ALPHA_D_MAPPING_UNSIGNED_INVERT_NV))
469 #define A_RGB_ZERO (NV10TCL_RC_IN_RGB_A_INPUT_ZERO | NV10TCL_RC_IN_RGB_A_COMPONENT_USAGE_RGB)
470 #define B_RGB_ZERO (NV10TCL_RC_IN_RGB_B_INPUT_ZERO | NV10TCL_RC_IN_RGB_B_COMPONENT_USAGE_RGB)
471 #define C_RGB_ZERO (NV10TCL_RC_IN_RGB_C_INPUT_ZERO | NV10TCL_RC_IN_RGB_C_COMPONENT_USAGE_RGB)
472 #define D_RGB_ZERO (NV10TCL_RC_IN_RGB_D_INPUT_ZERO | NV10TCL_RC_IN_RGB_D_COMPONENT_USAGE_RGB)
474 #define A_RGB_ONE (A_RGB_ZERO | NV10TCL_RC_IN_RGB_A_MAPPING_UNSIGNED_INVERT_NV)
475 #define B_RGB_ONE (B_RGB_ZERO | NV10TCL_RC_IN_RGB_B_MAPPING_UNSIGNED_INVERT_NV)
476 #define C_RGB_ONE (C_RGB_ZERO | NV10TCL_RC_IN_RGB_C_MAPPING_UNSIGNED_INVERT_NV)
477 #define D_RGB_ONE (D_RGB_ZERO | NV10TCL_RC_IN_RGB_D_MAPPING_UNSIGNED_INVERT_NV)
479 rc0_in_alpha |= C_ALPHA_ZERO | D_ALPHA_ZERO;
480 if (src->format == PICT_x8r8g8b8)
481 rc0_in_alpha |= A_ALPHA_ONE;
483 rc0_in_alpha |= 0x18000000;
486 rc0_in_alpha |= B_ALPHA_ONE;
488 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
489 rc0_in_alpha |= B_ALPHA_ONE;
491 rc0_in_alpha |= 0x00190000; /*B = a_1*/
493 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
494 if (src->format == PICT_a8 )
495 rc0_in_rgb |= A_RGB_ZERO;
497 rc0_in_rgb |= 0x08000000; /*A = rgb_0*/
500 rc0_in_rgb |= B_RGB_ONE;
502 if ( mask->format == PICT_x8r8g8b8 ) /*no alpha? ignore it*/
503 rc0_in_rgb |= B_RGB_ONE;
505 rc0_in_rgb |= 0x00190000; /*B = a_1*/
507 BEGIN_RING(Nv3D, NV10TCL_RC_IN_ALPHA(0), 12);
508 OUT_RING(rc0_in_alpha);
509 OUT_RING (rc1_in_alpha);
510 OUT_RING (rc0_in_rgb);
511 OUT_RING (rc1_in_rgb);
512 OUT_RING (color0); /*COLOR 0*/
513 OUT_RING (color1); /*COLOR 1*/
514 OUT_RING (0x00000c00);
516 OUT_RING (0x000010cd);
517 OUT_RING (0x18000000);
518 OUT_RING (0x300e0300);
519 OUT_RING (0x0c091c80);
522 static void NV10SetRegCombs_A8plusA8(NVPtr pNv, int pass, int mask_out_bytes)
524 unsigned int rc0_in_alpha = 0, rc0_in_rgb = 0;
525 unsigned int rc1_in_alpha = 0, rc1_in_rgb = 0;
526 unsigned int color0 = 0, color1 = 0;
530 if ( mask_out_bytes & 1 )
531 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
532 else rc0_in_alpha = 0x19000000 | B_ALPHA_ONE | C_ALPHA_ZERO | D_ALPHA_ZERO;
534 rc0_in_rgb = C_RGB_ZERO | D_RGB_ZERO;
536 if ( mask_out_bytes & 2 )
537 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
538 else rc0_in_rgb |= 0x18000000 | 0x00010000;
540 color0 = 0x00ff0000; /*R = 1 G = 0 B = 0*/
543 rc0_in_alpha = A_ALPHA_ZERO | B_ALPHA_ZERO | C_ALPHA_ZERO | D_ALPHA_ZERO;
549 if ( mask_out_bytes & 8 )
550 rc0_in_rgb |= A_RGB_ZERO | B_RGB_ZERO;
551 else rc0_in_rgb |= 0x18000000 | 0x00010000; /*A = a_0, B= cst color 0*/
555 if ( mask_out_bytes & 4)
556 rc0_in_rgb |= C_RGB_ZERO | D_RGB_ZERO;
557 else rc0_in_rgb |= 0x1900 | 0x02; /*C = a_1, D = cst color 1*/
559 color1 = 0x0000ff00; /*R = 0, G = 1, B = 0*/
562 BEGIN_RING(Nv3D, NV10TCL_RC_IN_ALPHA(0), 12);
563 OUT_RING(rc0_in_alpha);
564 OUT_RING (rc1_in_alpha);
565 OUT_RING (rc0_in_rgb);
566 OUT_RING (rc1_in_rgb);
567 OUT_RING (color0); /*COLOR 0*/
568 OUT_RING (color1); /*COLOR 1*/
569 OUT_RING (0x00000c00);
571 OUT_RING (0x00000c00);
572 OUT_RING (0x18000000);
573 OUT_RING (0x300c0000);
574 OUT_RING (0x00001c80);
577 static void NV10SetPictOp(NVPtr pNv,int op)
579 struct {int src;int dst;} pictops[] =
581 {0x0000,0x0000}, /* PictOpClear */
582 {0x0001,0x0000}, /* PictOpSrc */
583 {0x0000,0x0001}, /* PictOpDst */
584 {0x0001,0x0303}, /* PictOpOver */
585 {0x0305,0x0001}, /* PictOpOverReverse */
586 {0x0304,0x0000}, /* PictOpIn */
587 {0x0000,0x0302}, /* PictOpInReverse */
588 {0x0305,0x0000}, /* PictOpOut */
589 {0x0000,0x0303}, /* PictOpOutReverse */
590 {0x0304,0x0303}, /* PictOpAtop */
591 {0x0305,0x0302}, /* PictOpAtopReverse - DOES NOT WORK*/
592 {0x0305,0x0303}, /* PictOpXor */
593 {0x0001,0x0001}, /* PictOpAdd */
596 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_SRC, 2);
597 OUT_RING (pictops[op].src);
598 OUT_RING (pictops[op].dst);
599 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_ENABLE, 1);
603 Bool NV10PrepareComposite(int op,
604 PicturePtr pSrcPicture,
605 PicturePtr pMaskPicture,
606 PicturePtr pDstPicture,
611 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
612 NVPtr pNv = NVPTR(pScrn);
614 if (NV10Check_A8plusA8_Feasability(pSrcPicture,pMaskPicture,pDstPicture,op))
616 state.have_mask = FALSE;
617 state.is_a8_plus_a8 = TRUE;
618 NV10SetBuffer(pNv,pDstPicture,pDst);
619 NV10SetPictOp(pNv, op);
620 NV10SetTexture(pNv, 0, pSrcPicture, pSrc);
621 NV10SetTexture(pNv, 1, pSrcPicture, pSrc);
625 state.is_a8_plus_a8 = FALSE;
628 NV10SetBuffer(pNv,pDstPicture,pDst);
631 NV10SetTexture(pNv,0,pSrcPicture,pSrc);
633 /* Set mask format */
635 NV10SetTexture(pNv,1,pMaskPicture,pMask);
637 NV10SetRegCombs(pNv, pSrcPicture, pMaskPicture);
640 NV10SetPictOp(pNv, op);
642 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
643 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
645 state.have_mask=(pMaskPicture!=NULL);
649 static inline void NV10Vertex(NVPtr pNv,float vx,float vy,float tx,float ty)
651 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_2F_S, 2);
654 BEGIN_RING(Nv3D, NV10TCL_VERTEX_POS_3F_X, 3);
660 static inline void NV10MVertex(NVPtr pNv,float vx,float vy,float t0x,float t0y,float t1x,float t1y)
662 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_2F_S, 2);
665 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX1_2F_S, 2);
668 BEGIN_RING(Nv3D, NV10TCL_VERTEX_POS_3F_X, 3);
674 #define xFixedToFloat(v) \
675 ((float)xFixedToInt((v)) + ((float)xFixedFrac(v) / 65536.0))
678 NV10EXATransformCoord(PictTransformPtr t, int x, int y, float sx, float sy,
679 float *x_ret, float *y_ret)
684 v.vector[0] = IntToxFixed(x);
685 v.vector[1] = IntToxFixed(y);
686 v.vector[2] = xFixed1;
687 PictureTransformPoint(t, &v);
688 *x_ret = xFixedToFloat(v.vector[0]);
689 *y_ret = xFixedToFloat(v.vector[1]);
697 void NV10Composite(PixmapPtr pDst,
707 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
708 NVPtr pNv = NVPTR(pScrn);
709 float sX0, sX1, sX2, sY0, sY1, sY2, sX3, sY3;
710 float mX0, mX1, mX2, mY0, mY1, mY2, mX3, mY3;
712 NV10EXATransformCoord(state.unit[0].transform, srcX, srcY,
714 state.unit[0].height, &sX0, &sY0);
715 NV10EXATransformCoord(state.unit[0].transform,
718 state.unit[0].height, &sX1, &sY1);
719 NV10EXATransformCoord(state.unit[0].transform,
720 srcX + width, srcY + height,
722 state.unit[0].height, &sX2, &sY2);
723 NV10EXATransformCoord(state.unit[0].transform,
726 state.unit[0].height, &sX3, &sY3);
728 if ( state.is_a8_plus_a8 )
730 /*We do A8 + A8 in 2-pass : setup the source texture as A8 twice,
731 with different tex coords, do B and G on first pass
732 Then setup again and do R and A on second pass
736 int mask_out_bytes = 0;
738 part_pos_dX = (dstX &~ 3) >> 2; /*we start at the 4byte boundary to the left of the image*/
739 part_pos_sX = sX0 + (dstX &~ 3) - dstX;
741 /*xf86DrvMsg(0, X_INFO, "drawing - srcX %f dstX %d w %d\n", sX0, dstX, width);*/
742 for ( ; part_pos_dX <= (((dstX + width) &~ 3) >> 2); part_pos_sX += 4, part_pos_dX ++ )
745 if ( part_pos_dX == (dstX &~ 3) >> 2 ) /*then we're slightly on the left of the image, bytes to mask out*/
747 /*xf86DrvMsg(0, X_INFO, "on left border...\n");*/
748 switch ( dstX - (dstX &~ 3) ) /*mask out the extra pixels on the left*/
751 mask_out_bytes |= 1 << 0;
753 mask_out_bytes |= 1 << 1;
755 mask_out_bytes |= 1 << 2;
757 mask_out_bytes |= 1 << 3;
762 /*mask out extra pixels on the right, in case the picture never touches an alignment marker*/
763 switch ( width + (dstX & 3) )
766 mask_out_bytes |= 1 << 3;
768 mask_out_bytes |= 1 << 2;
770 mask_out_bytes |= 1 << 1;
772 mask_out_bytes |= 1 << 0;
776 else if ( part_pos_dX == (((dstX + width) &~ 3) >> 2) )
778 /*xf86DrvMsg(0, X_INFO, "on right border...\n");*/
779 switch (4 - ((dstX + width) & 3))
782 mask_out_bytes |= 1 << 3;
784 mask_out_bytes |= 1 << 2;
786 mask_out_bytes |= 1 << 1;
788 mask_out_bytes |= 1 << 0;
796 NV10SetRegCombs_A8plusA8(pNv, 0, mask_out_bytes);
797 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
798 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
800 NV10MVertex(pNv , part_pos_dX , dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
801 NV10MVertex(pNv , part_pos_dX + 1, dstY , part_pos_sX, sY0, part_pos_sX + 1, sY0);
802 NV10MVertex(pNv , part_pos_dX + 1, dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
803 NV10MVertex(pNv , part_pos_dX , dstY + height, part_pos_sX, sY2, part_pos_sX + 1, sY2);
805 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
806 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
810 NV10SetRegCombs_A8plusA8(pNv, 1, mask_out_bytes);
811 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
812 OUT_RING (NV10TCL_VERTEX_BEGIN_END_QUADS);
814 NV10MVertex(pNv , part_pos_dX, dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
815 NV10MVertex(pNv , part_pos_dX + 1 , dstY , part_pos_sX + 2, sY0, part_pos_sX + 3, sY0);
816 NV10MVertex(pNv , part_pos_dX + 1 , dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
817 NV10MVertex(pNv , part_pos_dX, dstY + height, part_pos_sX + 2, sY2, part_pos_sX + 3, sY2);
819 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
820 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
825 if (state.have_mask) {
826 NV10EXATransformCoord(state.unit[1].transform, maskX, maskY,
828 state.unit[1].height, &mX0, &mY0);
829 NV10EXATransformCoord(state.unit[1].transform,
830 maskX + width, maskY,
832 state.unit[1].height, &mX1, &mY1);
833 NV10EXATransformCoord(state.unit[1].transform,
834 maskX + width, maskY + height,
836 state.unit[1].height, &mX2, &mY2);
837 NV10EXATransformCoord(state.unit[1].transform,
838 maskX, maskY + height,
840 state.unit[1].height, &mX3, &mY3);
841 NV10MVertex(pNv , dstX , dstY,sX0 , sY0 , mX0 , mY0);
842 NV10MVertex(pNv , dstX + width , dstY,sX1 , sY1 , mX1 , mY1);
843 NV10MVertex(pNv , dstX + width , dstY + height,sX2 , sY2 , mX2 , mY2);
844 NV10MVertex(pNv , dstX , dstY + height,sX3 , sY3 , mX3 , mY3);
846 NV10Vertex(pNv , dstX , dstY , sX0 , sY0);
847 NV10Vertex(pNv , dstX + width , dstY , sX1 , sY1);
848 NV10Vertex(pNv , dstX + width , dstY + height , sX2 , sY2);
849 NV10Vertex(pNv , dstX , dstY + height , sX3 , sY3);
855 void NV10DoneComposite (PixmapPtr pDst)
857 ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
858 NVPtr pNv = NVPTR(pScrn);
860 BEGIN_RING(Nv3D, NV10TCL_VERTEX_BEGIN_END, 1);
861 OUT_RING (NV10TCL_VERTEX_BEGIN_END_STOP);
863 exaMarkSync(pDst->drawable.pScreen);
868 NVAccelInitNV10TCL(ScrnInfoPtr pScrn)
870 NVPtr pNv = NVPTR(pScrn);
871 uint32_t class = 0, chipset;
874 chipset = (nvReadMC(pNv, 0) >> 20) & 0xff;
875 if ( ((chipset & 0xf0) != NV_ARCH_10) &&
876 ((chipset & 0xf0) != NV_ARCH_20) )
881 else if (chipset>=0x17)
883 else if (chipset>=0x11)
889 if (nouveau_grobj_alloc(pNv->chan, Nv3D, class, &pNv->Nv3D))
893 BEGIN_RING(Nv3D, NV10TCL_DMA_NOTIFY, 1);
894 OUT_RING (pNv->NvNull->handle);
896 BEGIN_RING(Nv3D, NV10TCL_DMA_IN_MEMORY0, 2);
897 OUT_RING (pNv->chan->vram->handle);
898 OUT_RING (pNv->chan->gart->handle);
900 BEGIN_RING(Nv3D, NV10TCL_DMA_IN_MEMORY2, 2);
901 OUT_RING (pNv->chan->vram->handle);
902 OUT_RING (pNv->chan->vram->handle);
904 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
907 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_HORIZ, 2);
911 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1);
912 OUT_RING ((0x7ff<<16)|0x800);
913 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(0), 1);
914 OUT_RING ((0x7ff<<16)|0x800);
917 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1);
919 BEGIN_RING(Nv3D, NV10TCL_VIEWPORT_CLIP_VERT(i), 1);
923 BEGIN_RING(Nv3D, 0x290, 1);
924 OUT_RING ((0x10<<16)|1);
925 BEGIN_RING(Nv3D, 0x3f4, 1);
928 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
931 if (class != NV10TCL) {
933 BEGIN_RING(Nv3D, 0x120, 3);
938 BEGIN_RING(NvImageBlit, 0x120, 3);
943 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
947 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
951 BEGIN_RING(Nv3D, NV10TCL_FOG_ENABLE, 1);
953 BEGIN_RING(Nv3D, NV10TCL_ALPHA_FUNC_ENABLE, 1);
955 BEGIN_RING(Nv3D, NV10TCL_ALPHA_FUNC_FUNC, 2);
958 BEGIN_RING(Nv3D, NV10TCL_TX_ENABLE(0), 2);
961 BEGIN_RING(Nv3D, NV10TCL_RC_IN_ALPHA(0), 12);
962 OUT_RING (0x30141010);
964 OUT_RING (0x20040000);
968 OUT_RING (0x00000c00);
970 OUT_RING (0x00000c00);
971 OUT_RING (0x18000000);
972 OUT_RING (0x300e0300);
973 OUT_RING (0x0c091c80);
974 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_ENABLE, 1);
976 BEGIN_RING(Nv3D, NV10TCL_DITHER_ENABLE, 2);
979 BEGIN_RING(Nv3D, NV10TCL_LINE_SMOOTH_ENABLE, 1);
981 BEGIN_RING(Nv3D, NV10TCL_VERTEX_WEIGHT_ENABLE, 2);
984 BEGIN_RING(Nv3D, NV10TCL_BLEND_FUNC_SRC, 4);
989 BEGIN_RING(Nv3D, NV10TCL_STENCIL_MASK, 8);
998 BEGIN_RING(Nv3D, NV10TCL_NORMALIZE_ENABLE, 1);
1000 BEGIN_RING(Nv3D, NV10TCL_FOG_ENABLE, 2);
1003 BEGIN_RING(Nv3D, NV10TCL_LIGHT_MODEL, 1);
1005 BEGIN_RING(Nv3D, NV10TCL_COLOR_CONTROL, 1);
1007 BEGIN_RING(Nv3D, NV10TCL_ENABLED_LIGHTS, 1);
1009 BEGIN_RING(Nv3D, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3);
1013 BEGIN_RING(Nv3D, NV10TCL_DEPTH_FUNC, 1);
1015 BEGIN_RING(Nv3D, NV10TCL_DEPTH_WRITE_ENABLE, 1);
1017 BEGIN_RING(Nv3D, NV10TCL_DEPTH_TEST_ENABLE, 1);
1019 BEGIN_RING(Nv3D, NV10TCL_POLYGON_OFFSET_FACTOR, 2);
1022 BEGIN_RING(Nv3D, NV10TCL_POINT_SIZE, 1);
1024 BEGIN_RING(Nv3D, NV10TCL_POINT_PARAMETERS_ENABLE, 2);
1027 BEGIN_RING(Nv3D, NV10TCL_LINE_WIDTH, 1);
1029 BEGIN_RING(Nv3D, NV10TCL_LINE_SMOOTH_ENABLE, 1);
1031 BEGIN_RING(Nv3D, NV10TCL_POLYGON_MODE_FRONT, 2);
1034 BEGIN_RING(Nv3D, NV10TCL_CULL_FACE, 2);
1037 BEGIN_RING(Nv3D, NV10TCL_POLYGON_SMOOTH_ENABLE, 1);
1039 BEGIN_RING(Nv3D, NV10TCL_CULL_FACE_ENABLE, 1);
1041 BEGIN_RING(Nv3D, NV10TCL_CLIP_PLANE_ENABLE(0), 8);
1045 BEGIN_RING(Nv3D, NV10TCL_FOG_EQUATION_CONSTANT, 3);
1046 OUT_RING (0x3fc00000); /* -1.50 */
1047 OUT_RING (0xbdb8aa0a); /* -0.09 */
1048 OUT_RING (0); /* 0.00 */
1050 BEGIN_RING(Nv3D, NV10TCL_NOP, 1);
1053 BEGIN_RING(Nv3D, NV10TCL_FOG_MODE, 2);
1056 /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when
1057 * using texturing, except when using the texture matrix
1059 BEGIN_RING(Nv3D, NV10TCL_VIEW_MATRIX_ENABLE, 1);
1061 BEGIN_RING(Nv3D, NV10TCL_COLOR_MASK, 1);
1062 OUT_RING (0x01010101);
1064 /* Set vertex component */
1065 BEGIN_RING(Nv3D, NV10TCL_VERTEX_COL_4F_R, 4);
1070 BEGIN_RING(Nv3D, NV10TCL_VERTEX_COL2_3F_R, 3);
1074 BEGIN_RING(Nv3D, NV10TCL_VERTEX_NOR_3F_X, 3);
1078 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX0_4F_S, 4);
1083 BEGIN_RING(Nv3D, NV10TCL_VERTEX_TX1_4F_S, 4);
1088 BEGIN_RING(Nv3D, NV10TCL_VERTEX_FOG_1F, 1);
1090 BEGIN_RING(Nv3D, NV10TCL_EDGEFLAG_ENABLE, 1);