Nuke XAA.
[nouveau] / src / nv_exa.c
1  /***************************************************************************\
2 |*                                                                           *|
3 |*       Copyright 2003 NVIDIA, Corporation.  All rights reserved.           *|
4 |*                                                                           *|
5 |*     NOTICE TO USER:   The source code  is copyrighted under  U.S. and     *|
6 |*     international laws.  Users and possessors of this source code are     *|
7 |*     hereby granted a nonexclusive,  royalty-free copyright license to     *|
8 |*     use this code in individual and commercial software.                  *|
9 |*                                                                           *|
10 |*     Any use of this source code must include,  in the user documenta-     *|
11 |*     tion and  internal comments to the code,  notices to the end user     *|
12 |*     as follows:                                                           *|
13 |*                                                                           *|
14 |*       Copyright 2003 NVIDIA, Corporation.  All rights reserved.           *|
15 |*                                                                           *|
16 |*     NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY     *|
17 |*     OF  THIS SOURCE  CODE  FOR ANY PURPOSE.  IT IS  PROVIDED  "AS IS"     *|
18 |*     WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.  NVIDIA, CORPOR-     *|
19 |*     ATION DISCLAIMS ALL WARRANTIES  WITH REGARD  TO THIS SOURCE CODE,     *|
20 |*     INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE-     *|
21 |*     MENT,  AND FITNESS  FOR A PARTICULAR PURPOSE.   IN NO EVENT SHALL     *|
22 |*     NVIDIA, CORPORATION  BE LIABLE FOR ANY SPECIAL,  INDIRECT,  INCI-     *|
23 |*     DENTAL, OR CONSEQUENTIAL DAMAGES,  OR ANY DAMAGES  WHATSOEVER RE-     *|
24 |*     SULTING FROM LOSS OF USE,  DATA OR PROFITS,  WHETHER IN AN ACTION     *|
25 |*     OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF     *|
26 |*     OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE.     *|
27 |*                                                                           *|
28 |*     U.S. Government  End  Users.   This source code  is a "commercial     *|
29 |*     item,"  as that  term is  defined at  48 C.F.R. 2.101 (OCT 1995),     *|
30 |*     consisting  of "commercial  computer  software"  and  "commercial     *|
31 |*     computer  software  documentation,"  as such  terms  are  used in     *|
32 |*     48 C.F.R. 12.212 (SEPT 1995)  and is provided to the U.S. Govern-     *|
33 |*     ment only as  a commercial end item.   Consistent with  48 C.F.R.     *|
34 |*     12.212 and  48 C.F.R. 227.7202-1 through  227.7202-4 (JUNE 1995),     *|
35 |*     all U.S. Government End Users  acquire the source code  with only     *|
36 |*     those rights set forth herein.                                        *|
37 |*                                                                           *|
38  \***************************************************************************/
39
40 /*
41   Exa Modifications (c) Lars Knoll (lars@trolltech.com)
42  */
43
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47
48 #include "nv_include.h"
49 #include "exa.h"
50
51 #include "nv_dma.h"
52 #include "nv_local.h"
53
54 #include <sys/time.h>
55
56 const int NVCopyROP[16] =
57 {
58    0x00,            /* GXclear */
59    0x88,            /* GXand */
60    0x44,            /* GXandReverse */
61    0xCC,            /* GXcopy */
62    0x22,            /* GXandInverted */
63    0xAA,            /* GXnoop */
64    0x66,            /* GXxor */
65    0xEE,            /* GXor */
66    0x11,            /* GXnor */
67    0x99,            /* GXequiv */
68    0x55,            /* GXinvert*/
69    0xDD,            /* GXorReverse */
70    0x33,            /* GXcopyInverted */
71    0xBB,            /* GXorInverted */
72    0x77,            /* GXnand */
73    0xFF             /* GXset */
74 };
75
76 static void 
77 NVSetPattern(ScrnInfoPtr pScrn, CARD32 clr0, CARD32 clr1,
78                                 CARD32 pat0, CARD32 pat1)
79 {
80         NVPtr pNv = NVPTR(pScrn);
81
82         NVDmaStart(pNv, NvImagePattern, PATTERN_COLOR_0, 4);
83         NVDmaNext (pNv, clr0);
84         NVDmaNext (pNv, clr1);
85         NVDmaNext (pNv, pat0);
86         NVDmaNext (pNv, pat1);
87 }
88
89 static void 
90 NVSetROP(ScrnInfoPtr pScrn, CARD32 alu, CARD32 planemask)
91 {
92         NVPtr pNv = NVPTR(pScrn);
93         int rop = NVCopyROP[alu] & 0xf0;
94
95         if (planemask != ~0) {
96                 NVSetPattern(pScrn, 0, planemask, ~0, ~0);
97                 if (pNv->currentRop != (alu + 32)) {
98                         NVDmaStart(pNv, NvRop, ROP_SET, 1);
99                         NVDmaNext (pNv, rop | 0x0a);
100                         pNv->currentRop = alu + 32;
101                 }
102         } else
103         if (pNv->currentRop != alu) {
104                 if(pNv->currentRop >= 16)
105                         NVSetPattern(pScrn, ~0, ~0, ~0, ~0);
106                 NVDmaStart(pNv, NvRop, ROP_SET, 1);
107                 NVDmaNext (pNv, rop | (rop >> 4));
108                 pNv->currentRop = alu;
109         }
110 }
111
112 static void setM2MFDirection(ScrnInfoPtr pScrn, int dir)
113 {
114         NVPtr pNv = NVPTR(pScrn);
115
116         if (pNv->M2MFDirection != dir) {
117
118                 NVDmaStart(pNv, NvMemFormat, MEMFORMAT_DMA_OBJECT_IN, 2);
119                 NVDmaNext (pNv, dir ? NvDmaTT : NvDmaFB);
120                 NVDmaNext (pNv, dir ? NvDmaFB : NvDmaTT);
121                 pNv->M2MFDirection = dir;
122         }
123 }
124
125 static CARD32 rectFormat(DrawablePtr pDrawable)
126 {
127         switch(pDrawable->bitsPerPixel) {
128         case 32:
129         case 24:
130                 return RECT_FORMAT_DEPTH24;
131                 break;
132         case 16:
133                 return RECT_FORMAT_DEPTH16;
134                 break;
135         default:
136                 return RECT_FORMAT_DEPTH8;
137                 break;
138         }
139 }
140
141 /* EXA acceleration hooks */
142 static void NVExaWaitMarker(ScreenPtr pScreen, int marker)
143 {
144         NVSync(xf86Screens[pScreen->myNum]);
145 }
146
147 static Bool NVExaPrepareSolid(PixmapPtr pPixmap,
148                               int   alu,
149                               Pixel planemask,
150                               Pixel fg)
151 {
152         ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
153         NVPtr pNv = NVPTR(pScrn);
154         int fmt;
155
156         planemask |= ~0 << pPixmap->drawable.bitsPerPixel;
157         if (planemask != ~0 || alu != GXcopy) {
158                 if (pPixmap->drawable.bitsPerPixel == 32)
159                         return FALSE;
160                 NVDmaStart(pNv, NvRectangle, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
161                 NVDmaNext (pNv, 1 /* ROP_AND */);
162                 NVSetROP(pScrn, alu, planemask);
163         } else {
164                 NVDmaStart(pNv, NvRectangle, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
165                 NVDmaNext (pNv, 3 /* SRCCOPY */);
166         }
167
168         if (!NVAccelGetCtxSurf2DFormatFromPixmap(pPixmap, &fmt))
169                 return FALSE;
170
171         /* When SURFACE_FORMAT_A8R8G8B8 is used with GDI_RECTANGLE_TEXT, the 
172          * alpha channel gets forced to 0xFF for some reason.  We're using 
173          * SURFACE_FORMAT_Y32 as a workaround
174          */
175         if (fmt == SURFACE_FORMAT_A8R8G8B8)
176                 fmt = 0xb;
177
178         if (!NVAccelSetCtxSurf2D(pPixmap, pPixmap, fmt))
179                 return FALSE;
180
181         NVDmaStart(pNv, NvRectangle, RECT_FORMAT, 1);
182         NVDmaNext (pNv, rectFormat(&pPixmap->drawable));
183         NVDmaStart(pNv, NvRectangle, RECT_SOLID_COLOR, 1);
184         NVDmaNext (pNv, fg);
185
186         pNv->DMAKickoffCallback = NVDmaKickoffCallback;
187         return TRUE;
188 }
189
190 static void NVExaSolid (PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
191 {
192         ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
193         NVPtr pNv = NVPTR(pScrn);
194         int width = x2-x1;
195         int height = y2-y1;
196
197         NVDmaStart(pNv, NvRectangle, RECT_SOLID_RECTS(0), 2);
198         NVDmaNext (pNv, (x1 << 16) | y1);
199         NVDmaNext (pNv, (width << 16) | height);
200
201         if((width * height) >= 512)
202                 NVDmaKickoff(pNv);
203 }
204
205 static void NVExaDoneSolid (PixmapPtr pPixmap)
206 {
207 }
208
209 static Bool NVExaPrepareCopy(PixmapPtr pSrcPixmap,
210                              PixmapPtr pDstPixmap,
211                              int       dx,
212                              int       dy,
213                              int       alu,
214                              Pixel     planemask)
215 {
216         ScrnInfoPtr pScrn = xf86Screens[pSrcPixmap->drawable.pScreen->myNum];
217         NVPtr pNv = NVPTR(pScrn);
218         int fmt;
219
220         if (pSrcPixmap->drawable.bitsPerPixel !=
221                         pDstPixmap->drawable.bitsPerPixel)
222                 return FALSE;
223
224         planemask |= ~0 << pDstPixmap->drawable.bitsPerPixel;
225         if (planemask != ~0 || alu != GXcopy) {
226                 if (pDstPixmap->drawable.bitsPerPixel == 32)
227                         return FALSE;
228                 NVDmaStart(pNv, NvImageBlit, NV_IMAGE_BLIT_OPERATION, 1);
229                 NVDmaNext (pNv, 1 /* ROP_AND */);
230                 NVSetROP(pScrn, alu, planemask);
231         } else {
232                 NVDmaStart(pNv, NvImageBlit, NV_IMAGE_BLIT_OPERATION, 1);
233                 NVDmaNext (pNv, 3 /* SRCCOPY */);
234         }
235
236         if (!NVAccelGetCtxSurf2DFormatFromPixmap(pDstPixmap, &fmt))
237                 return FALSE;
238         if (!NVAccelSetCtxSurf2D(pSrcPixmap, pDstPixmap, fmt))
239                 return FALSE;
240
241         pNv->DMAKickoffCallback = NVDmaKickoffCallback;
242         return TRUE;
243 }
244
245 static void NVExaCopy(PixmapPtr pDstPixmap,
246                       int       srcX,
247                       int       srcY,
248                       int       dstX,
249                       int       dstY,
250                       int       width,
251                       int       height)
252 {
253         ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
254         NVPtr pNv = NVPTR(pScrn);
255
256         /* Now check whether we have the same values for srcY and dstY and
257            whether the used chipset is buggy. Currently we flag all of G70
258            cards as buggy, which is probably much to broad. KoalaBR 
259            16 is an abritrary threshold. It should define the maximum number
260            of lines between dstY and srcY  If the number of lines is below
261            we guess, that the bug won't trigger...
262          */
263         if ( ((abs(srcY - dstY)< 16)||(abs(srcX-dstX)<16)) &&
264                 ((((pNv->Chipset & 0xfff0) == CHIPSET_G70) ||
265                  ((pNv->Chipset & 0xfff0) == CHIPSET_G71) ||
266                  ((pNv->Chipset & 0xfff0) == CHIPSET_G72) ||
267                  ((pNv->Chipset & 0xfff0) == CHIPSET_G73) ||
268                  ((pNv->Chipset & 0xfff0) == CHIPSET_C512))) )
269         {
270                 int dx=abs(srcX - dstX),dy=abs(srcY - dstY);
271                 // Ok, let's do it manually unless someone comes up with a better idea
272                 // 1. If dstY and srcY are really the same, do a copy rowwise
273                 if (dy<dx) {
274                         int i,xpos,inc;
275                         NVDEBUG("ExaCopy: Lines identical:\n");
276                         if (srcX>=dstX) {
277                                 xpos=0;
278                                 inc=1;
279                         } else {
280                                 xpos=width-1;
281                                 inc=-1;
282                         }
283                         for (i = 0; i < width; i++) {
284                                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
285                                 NVDmaNext (pNv, (srcY << 16) | (srcX+xpos));
286                                 NVDmaNext (pNv, (dstY << 16) | (dstX+xpos));
287                                 NVDmaNext (pNv, (height  << 16) | 1);
288                                 xpos+=inc;
289                         }
290                 } else {
291                         // 2. Otherwise we will try a line by line copy in the hope to avoid
292                         //    the card's bug.
293                         int i,ypos,inc;
294                         NVDEBUG("ExaCopy: Lines nearly the same srcY=%d, dstY=%d:\n", srcY, dstY);
295                         if (srcY>=dstY) {
296                                 ypos=0;
297                                 inc=1;
298                         } else {
299                                 ypos=height-1;
300                                 inc=-1;
301                         }
302                         for (i = 0; i < height; i++) {
303                                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
304                                 NVDmaNext (pNv, ((srcY+ypos) << 16) | srcX);
305                                 NVDmaNext (pNv, ((dstY+ypos) << 16) | dstX);
306                                 NVDmaNext (pNv, (1  << 16) | width);
307                                 ypos+=inc;
308                         }
309                 } 
310         } else {
311                 NVDEBUG("ExaCopy: Using default path\n");
312                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
313                 NVDmaNext (pNv, (srcY << 16) | srcX);
314                 NVDmaNext (pNv, (dstY << 16) | dstX);
315                 NVDmaNext (pNv, (height  << 16) | width);
316         }
317
318         if((width * height) >= 512)
319                 NVDmaKickoff(pNv); 
320 }
321
322 static void NVExaDoneCopy (PixmapPtr pDstPixmap) {}
323
324 Bool NVAccelMemcpyRect(char *dst, const char *src, int height,
325                        int dst_pitch, int src_pitch, int line_len)
326 {
327         if ((src_pitch == line_len) && (src_pitch == dst_pitch)) {
328                 memcpy(dst, src, line_len*height);
329         } else {
330                 while (height--) {
331                         memcpy(dst, src, line_len);
332                         src += src_pitch;
333                         dst += dst_pitch;
334                 }
335         }
336
337         return TRUE;
338 }
339
340 Bool
341 NVAccelDownloadM2MF(ScrnInfoPtr pScrn, char *dst, uint64_t src_offset,
342                                      int dst_pitch, int src_pitch,
343                                      int line_len, int line_count)
344 {
345         NVPtr pNv = NVPTR(pScrn);
346
347         setM2MFDirection(pScrn, 0);
348
349         while (line_count) {
350                 char *src = pNv->GARTScratch->map;
351                 int lc, i;
352
353                 if (line_count * line_len <= pNv->GARTScratch->size) {
354                         lc = line_count;
355                 } else {
356                         lc = pNv->GARTScratch->size / line_len;
357                         if (lc > line_count)
358                                 lc = line_count;
359                 }
360
361                 /* HW limitations */
362                 if (lc > 2047)
363                         lc = 2047;
364
365                 if (pNv->Architecture >= NV_ARCH_50) {
366                         NVDmaStart(pNv, NvMemFormat, 0x200, 1);
367                         NVDmaNext (pNv, 1);
368                         NVDmaStart(pNv, NvMemFormat, 0x21c, 1);
369                         NVDmaNext (pNv, 1);
370                         /* probably high-order bits of address */
371                         NVDmaStart(pNv, NvMemFormat, 0x238, 2);
372                         NVDmaNext (pNv, 0);
373                         NVDmaNext (pNv, 0);
374                 }
375
376                 NVDmaStart(pNv, NvMemFormat,
377                                 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
378                 NVDmaNext (pNv, (uint32_t)src_offset);
379                 NVDmaNext (pNv, (uint32_t)pNv->GARTScratch->offset);
380                 NVDmaNext (pNv, src_pitch);
381                 NVDmaNext (pNv, line_len);
382                 NVDmaNext (pNv, line_len);
383                 NVDmaNext (pNv, lc);
384                 NVDmaNext (pNv, (1<<8)|1);
385                 NVDmaNext (pNv, 0);
386
387                 NVNotifierReset(pScrn, pNv->Notifier0);
388                 NVDmaStart(pNv, NvMemFormat,
389                                 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
390                 NVDmaNext (pNv, 0);
391                 NVDmaStart(pNv, NvMemFormat, 0x100, 1);
392                 NVDmaNext (pNv, 0);
393                 NVDmaKickoff(pNv);
394                 if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 2000))
395                         return FALSE;
396
397                 if (dst_pitch == line_len) {
398                         memcpy(dst, src, dst_pitch * lc);
399                         dst += dst_pitch * lc;
400                 } else {
401                         for (i = 0; i < lc; i++) {
402                                 memcpy(dst, src, line_len);
403                                 src += line_len;
404                                 dst += dst_pitch;
405                         }
406                 }
407
408                 line_count -= lc;
409                 src_offset += lc * src_pitch;
410         }
411
412         return TRUE;
413 }
414
415 static Bool NVDownloadFromScreen(PixmapPtr pSrc,
416                                  int x,  int y,
417                                  int w,  int h,
418                                  char *dst,  int dst_pitch)
419 {
420         ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
421         NVPtr pNv = NVPTR(pScrn);
422         int src_pitch, cpp, offset;
423         const char *src;
424
425         src_pitch  = exaGetPixmapPitch(pSrc);
426         cpp = pSrc->drawable.bitsPerPixel >> 3;
427         offset = (y * src_pitch) + (x * cpp);
428
429         if (pNv->GARTScratch) {
430                 if (NVAccelDownloadM2MF(pScrn, dst,
431                                         NVAccelGetPixmapOffset(pSrc) + offset,
432                                         dst_pitch, src_pitch, w * cpp, h))
433                         return TRUE;
434         }
435
436         src = pSrc->devPrivate.ptr + offset;
437         exaWaitSync(pSrc->drawable.pScreen);
438         if (NVAccelMemcpyRect(dst, src, h, dst_pitch, src_pitch, w*cpp))
439                 return TRUE;
440
441         return FALSE;
442 }
443
444 Bool
445 NVAccelUploadIFC(ScrnInfoPtr pScrn, const char *src, int src_pitch,
446                                     int x, int y, int w, int h, int cpp)
447 {
448         NVPtr pNv = NVPTR(pScrn);
449         int line_len = w * cpp;
450         int iw, id, fmt;
451
452         if (pNv->Architecture >= NV_ARCH_50)
453                 return FALSE;
454
455         if (h > 1024)
456                 return FALSE;
457
458         switch (cpp) {
459         case 2: fmt = 1; break;
460         case 4: fmt = 4; break;
461         default:
462                 return FALSE;
463         }
464
465         /* Pad out input width to cover both COLORA() and COLORB() */
466         iw  = (line_len + 7) & ~7;
467         id  = iw / 4; /* line push size */
468         iw /= cpp;
469
470         /* Don't support lines longer than max push size yet.. */
471         if (id > 1792)
472                 return FALSE;
473
474         NVDmaStart(pNv, NvClipRectangle, CLIP_POINT, 2);
475         NVDmaNext (pNv, 0x0); 
476         NVDmaNext (pNv, 0x7FFF7FFF);
477
478         NVDmaStart(pNv, NvImageFromCpu, NV05_IMAGE_FROM_CPU_OPERATION, 2);
479         NVDmaNext (pNv, 0x3 /* SRCCOPY */);
480         NVDmaNext (pNv, fmt);
481         NVDmaStart(pNv, NvImageFromCpu, NV05_IMAGE_FROM_CPU_POINT, 3);
482         NVDmaNext (pNv, (y << 16) | x); /* dst point */
483         NVDmaNext (pNv, (h << 16) | w); /* width/height out */
484         NVDmaNext (pNv, (h << 16) | iw); /* width/height in */
485
486         while (h--) {
487                 char *dst;
488                 /* send a line */
489                 NVDmaStart(pNv, NvImageFromCpu,
490                                 NV10_IMAGE_FROM_CPU_HLINE, id);
491                 dst = (char *)pNv->dmaBase + (pNv->dmaCurrent << 2);
492                 memcpy(dst, src, line_len);
493                 pNv->dmaCurrent += id;
494
495                 src += src_pitch;
496         }
497
498         return TRUE;
499 }
500
501 Bool
502 NVAccelUploadM2MF(ScrnInfoPtr pScrn, uint64_t dst_offset, const char *src,
503                                      int dst_pitch, int src_pitch,
504                                      int line_len, int line_count)
505 {
506         NVPtr pNv = NVPTR(pScrn);
507
508         setM2MFDirection(pScrn, 1);
509
510         while (line_count) {
511                 char *dst = pNv->GARTScratch->map;
512                 int lc, i;
513
514                 /* Determine max amount of data we can DMA at once */
515                 if (line_count * line_len <= pNv->GARTScratch->size) {
516                         lc = line_count;
517                 } else {
518                         lc = pNv->GARTScratch->size / line_len;
519                         if (lc > line_count)
520                                 lc = line_count;
521                 }
522
523                 /* HW limitations */
524                 if (lc > 2047)
525                         lc = 2047;
526
527                 /* Upload to GART */
528                 if (src_pitch == line_len) {
529                         memcpy(dst, src, src_pitch * lc);
530                         src += src_pitch * lc;
531                 } else {
532                         for (i = 0; i < lc; i++) {
533                                 memcpy(dst, src, line_len);
534                                 src += src_pitch;
535                                 dst += line_len;
536                         }
537                 }
538
539                 if (pNv->Architecture >= NV_ARCH_50) {
540                         NVDmaStart(pNv, NvMemFormat, 0x200, 1);
541                         NVDmaNext (pNv, 1);
542                         NVDmaStart(pNv, NvMemFormat, 0x21c, 1);
543                         NVDmaNext (pNv, 1);
544                         /* probably high-order bits of address */
545                         NVDmaStart(pNv, NvMemFormat, 0x238, 2);
546                         NVDmaNext (pNv, 0);
547                         NVDmaNext (pNv, 0);
548                 }
549
550                 /* DMA to VRAM */
551                 NVDmaStart(pNv, NvMemFormat,
552                                 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
553                 NVDmaNext (pNv, (uint32_t)pNv->GARTScratch->offset);
554                 NVDmaNext (pNv, (uint32_t)dst_offset);
555                 NVDmaNext (pNv, line_len);
556                 NVDmaNext (pNv, dst_pitch);
557                 NVDmaNext (pNv, line_len);
558                 NVDmaNext (pNv, lc);
559                 NVDmaNext (pNv, (1<<8)|1);
560                 NVDmaNext (pNv, 0);
561
562                 NVNotifierReset(pScrn, pNv->Notifier0);
563                 NVDmaStart(pNv, NvMemFormat,
564                                 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
565                 NVDmaNext (pNv, 0);
566                 NVDmaStart(pNv, NvMemFormat, 0x100, 1);
567                 NVDmaNext (pNv, 0);
568                 NVDmaKickoff(pNv);
569                 if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 2000))
570                         return FALSE;
571
572                 dst_offset += lc * dst_pitch;
573                 line_count -= lc;
574         }
575
576         return TRUE;
577 }
578
579 static Bool NVUploadToScreen(PixmapPtr pDst,
580                              int x, int y, int w, int h,
581                              char *src, int src_pitch)
582 {
583         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
584         NVPtr pNv = NVPTR(pScrn);
585         int dst_offset, dst_pitch, cpp;
586         char *dst;
587
588         dst_offset = NVAccelGetPixmapOffset(pDst);
589         dst_pitch  = exaGetPixmapPitch(pDst);
590         cpp = pDst->drawable.bitsPerPixel >> 3;
591
592         /* try hostdata transfer */
593         if (pNv->Architecture < NV_ARCH_50 && w*h*cpp<16*1024) /* heuristic */
594         {
595                 int fmt;
596
597                 if (NVAccelGetCtxSurf2DFormatFromPixmap(pDst, &fmt)) {
598                         NVAccelSetCtxSurf2D(pDst, pDst, fmt);
599                         if (NVAccelUploadIFC(pScrn, src, src_pitch,
600                                                     x, y, w, h, cpp)) {
601                                 exaMarkSync(pDst->drawable.pScreen);
602                                 return TRUE;
603                         }
604                 }
605         }
606
607         /* try gart-based transfer */
608         if (pNv->GARTScratch) {
609                 dst_offset += (y * dst_pitch) + (x * cpp);
610                 if (NVAccelUploadM2MF(pScrn, dst_offset, src, dst_pitch,
611                                         src_pitch, w * cpp, h))
612                         return TRUE;
613         }
614
615         /* fallback to memcpy-based transfer */
616         dst = pDst->devPrivate.ptr + (y * dst_pitch) + (x * cpp);
617         exaWaitSync(pDst->drawable.pScreen);
618         if (NVAccelMemcpyRect(dst, src, h, dst_pitch, src_pitch, w*cpp))
619                 return TRUE;
620
621         return FALSE;
622 }
623
624
625 static Bool NVCheckComposite(int        op,
626                              PicturePtr pSrcPicture,
627                              PicturePtr pMaskPicture,
628                              PicturePtr pDstPicture)
629 {
630         CARD32 ret = 0;
631
632         /* PictOpOver doesn't work correctly. The HW command assumes
633          * non premuliplied alpha
634          */
635         if (pMaskPicture)
636                 ret = 0x1;
637         else if (op != PictOpOver &&  op != PictOpSrc)
638                 ret = 0x2;
639         else if (!pSrcPicture->pDrawable)
640                 ret = 0x4;
641         else if (pSrcPicture->transform || pSrcPicture->repeat)
642                 ret = 0x8;
643         else if (pSrcPicture->alphaMap || pDstPicture->alphaMap)
644                 ret = 0x10;
645         else if (pSrcPicture->format != PICT_a8r8g8b8 &&
646                         pSrcPicture->format != PICT_x8r8g8b8 &&
647                         pSrcPicture->format != PICT_r5g6b5)
648                 ret = 0x20;
649         else if (pDstPicture->format != PICT_a8r8g8b8 &&
650                         pDstPicture->format != PICT_x8r8g8b8 &&
651                         pDstPicture->format != PICT_r5g6b5)
652                 ret = 0x40;
653
654         return ret == 0;
655 }
656
657 static CARD32 src_size, src_pitch, src_offset;
658
659 static Bool NVPrepareComposite(int        op,
660                                PicturePtr pSrcPicture,
661                                PicturePtr pMaskPicture,
662                                PicturePtr pDstPicture,
663                                PixmapPtr  pSrc,
664                                PixmapPtr  pMask,
665                                PixmapPtr  pDst)
666 {
667         ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
668         NVPtr pNv = NVPTR(pScrn);
669         int srcFormat, dstFormat;
670
671         if (pSrcPicture->format == PICT_a8r8g8b8)
672                 srcFormat = STRETCH_BLIT_FORMAT_A8R8G8B8;
673         else if (pSrcPicture->format == PICT_x8r8g8b8)
674                 srcFormat = STRETCH_BLIT_FORMAT_X8R8G8B8;
675         else if (pSrcPicture->format == PICT_r5g6b5)
676                 srcFormat = STRETCH_BLIT_FORMAT_DEPTH16;
677         else
678                 return FALSE;
679
680         if (!NVAccelGetCtxSurf2DFormatFromPicture(pDstPicture, &dstFormat))
681                 return FALSE;
682         if (!NVAccelSetCtxSurf2D(pDst, pDst, dstFormat))
683                 return FALSE;
684
685         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_FORMAT, 2);
686         NVDmaNext (pNv, srcFormat);
687         NVDmaNext (pNv, (op == PictOpSrc) ? STRETCH_BLIT_OPERATION_COPY : STRETCH_BLIT_OPERATION_BLEND);
688
689         src_size = ((pSrcPicture->pDrawable->width+3)&~3) |
690                 (pSrcPicture->pDrawable->height << 16);
691         src_pitch  = exaGetPixmapPitch(pSrc)
692                 | (STRETCH_BLIT_SRC_FORMAT_ORIGIN_CORNER << 16)
693                 | (STRETCH_BLIT_SRC_FORMAT_FILTER_POINT_SAMPLE << 24);
694         src_offset = NVAccelGetPixmapOffset(pSrc);
695
696         return TRUE;
697 }
698
699 static void NVComposite(PixmapPtr pDst,
700                         int       srcX,
701                         int       srcY,
702                         int       maskX,
703                         int       maskY,
704                         int       dstX,
705                         int       dstY,
706                         int       width,
707                         int       height)
708 {
709         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
710         NVPtr pNv = NVPTR(pScrn);
711
712         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_CLIP_POINT, 6);
713         NVDmaNext (pNv, dstX | (dstY << 16));
714         NVDmaNext (pNv, width | (height << 16));
715         NVDmaNext (pNv, dstX | (dstY << 16));
716         NVDmaNext (pNv, width | (height << 16));
717         NVDmaNext (pNv, 1<<20);
718         NVDmaNext (pNv, 1<<20);
719
720         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_SRC_SIZE, 4);
721         NVDmaNext (pNv, src_size);
722         NVDmaNext (pNv, src_pitch);
723         NVDmaNext (pNv, src_offset);
724         NVDmaNext (pNv, srcX | (srcY<<16));
725
726         NVDmaKickoff(pNv);
727 }
728
729 static void NVDoneComposite (PixmapPtr pDst)
730 {
731         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
732         NVPtr pNv = NVPTR(pScrn);
733         CARD32 format;
734
735         if (pNv->CurrentLayout.depth == 8)
736                 format = SURFACE_FORMAT_Y8;
737         else if (pNv->CurrentLayout.depth == 16)
738                 format = SURFACE_FORMAT_R5G6B5;
739         else
740                 format = SURFACE_FORMAT_X8R8G8B8;
741
742         NVDmaStart(pNv, NvContextSurfaces, SURFACE_FORMAT, 1);
743         NVDmaNext (pNv, format);
744
745         exaMarkSync(pDst->drawable.pScreen);
746 }
747
748 Bool NVExaInit(ScreenPtr pScreen) 
749 {
750         ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
751         NVPtr pNv = NVPTR(pScrn);
752
753         if(!(pNv->EXADriverPtr = (ExaDriverPtr) xnfcalloc(sizeof(ExaDriverRec), 1))) {
754                 pNv->NoAccel = TRUE;
755                 return FALSE;
756         }
757
758         pNv->EXADriverPtr->exa_major = EXA_VERSION_MAJOR;
759         pNv->EXADriverPtr->exa_minor = EXA_VERSION_MINOR;
760
761         pNv->EXADriverPtr->memoryBase           = pNv->FB->map;
762         pNv->EXADriverPtr->offScreenBase        =
763                 pScrn->virtualX * pScrn->virtualY*(pScrn->bitsPerPixel/8); 
764         pNv->EXADriverPtr->memorySize           = pNv->FB->size; 
765         pNv->EXADriverPtr->pixmapOffsetAlign    = 256; 
766         pNv->EXADriverPtr->pixmapPitchAlign     = 64; 
767         pNv->EXADriverPtr->flags                = EXA_OFFSCREEN_PIXMAPS;
768         pNv->EXADriverPtr->maxX                 = 32768;
769         pNv->EXADriverPtr->maxY                 = 32768;
770
771         pNv->EXADriverPtr->WaitMarker = NVExaWaitMarker;
772
773         /* Install default hooks */
774         pNv->EXADriverPtr->DownloadFromScreen = NVDownloadFromScreen; 
775         pNv->EXADriverPtr->UploadToScreen = NVUploadToScreen; 
776
777         if (pNv->Architecture < NV_ARCH_50) {
778                 pNv->EXADriverPtr->PrepareCopy = NVExaPrepareCopy;
779                 pNv->EXADriverPtr->Copy = NVExaCopy;
780                 pNv->EXADriverPtr->DoneCopy = NVExaDoneCopy;
781
782                 pNv->EXADriverPtr->PrepareSolid = NVExaPrepareSolid;
783                 pNv->EXADriverPtr->Solid = NVExaSolid;
784                 pNv->EXADriverPtr->DoneSolid = NVExaDoneSolid;
785         } else {
786                 pNv->EXADriverPtr->PrepareCopy = NV50EXAPrepareCopy;
787                 pNv->EXADriverPtr->Copy = NV50EXACopy;
788                 pNv->EXADriverPtr->DoneCopy = NV50EXADoneCopy;
789
790                 pNv->EXADriverPtr->PrepareSolid = NV50EXAPrepareSolid;
791                 pNv->EXADriverPtr->Solid = NV50EXASolid;
792                 pNv->EXADriverPtr->DoneSolid = NV50EXADoneSolid;
793         }
794
795         switch (pNv->Architecture) {
796 #if (X_BYTE_ORDER == X_LITTLE_ENDIAN) && defined(ENABLE_NV30EXA)
797 //      not working yet
798 //      case NV_ARCH_30:
799         case NV_ARCH_40:
800                 pNv->EXADriverPtr->CheckComposite   = NV30EXACheckComposite;
801                 pNv->EXADriverPtr->PrepareComposite = NV30EXAPrepareComposite;
802                 pNv->EXADriverPtr->Composite        = NV30EXAComposite;
803                 pNv->EXADriverPtr->DoneComposite    = NV30EXADoneComposite;
804                 break;
805 #endif
806         case NV_ARCH_50:
807                 break;
808         default:
809                 if (!pNv->BlendingPossible)
810                         break;
811                 pNv->EXADriverPtr->CheckComposite   = NVCheckComposite;
812                 pNv->EXADriverPtr->PrepareComposite = NVPrepareComposite;
813                 pNv->EXADriverPtr->Composite        = NVComposite;
814                 pNv->EXADriverPtr->DoneComposite    = NVDoneComposite;
815                 break;
816         }
817
818         return exaDriverInit(pScreen, pNv->EXADriverPtr);
819 }
820