EXA: Starting from version 2.4 we can't use devPrivate.ptr anymore.
[nouveau] / src / nv_exa.c
1  /***************************************************************************\
2 |*                                                                           *|
3 |*       Copyright 2003 NVIDIA, Corporation.  All rights reserved.           *|
4 |*                                                                           *|
5 |*     NOTICE TO USER:   The source code  is copyrighted under  U.S. and     *|
6 |*     international laws.  Users and possessors of this source code are     *|
7 |*     hereby granted a nonexclusive,  royalty-free copyright license to     *|
8 |*     use this code in individual and commercial software.                  *|
9 |*                                                                           *|
10 |*     Any use of this source code must include,  in the user documenta-     *|
11 |*     tion and  internal comments to the code,  notices to the end user     *|
12 |*     as follows:                                                           *|
13 |*                                                                           *|
14 |*       Copyright 2003 NVIDIA, Corporation.  All rights reserved.           *|
15 |*                                                                           *|
16 |*     NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY     *|
17 |*     OF  THIS SOURCE  CODE  FOR ANY PURPOSE.  IT IS  PROVIDED  "AS IS"     *|
18 |*     WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.  NVIDIA, CORPOR-     *|
19 |*     ATION DISCLAIMS ALL WARRANTIES  WITH REGARD  TO THIS SOURCE CODE,     *|
20 |*     INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE-     *|
21 |*     MENT,  AND FITNESS  FOR A PARTICULAR PURPOSE.   IN NO EVENT SHALL     *|
22 |*     NVIDIA, CORPORATION  BE LIABLE FOR ANY SPECIAL,  INDIRECT,  INCI-     *|
23 |*     DENTAL, OR CONSEQUENTIAL DAMAGES,  OR ANY DAMAGES  WHATSOEVER RE-     *|
24 |*     SULTING FROM LOSS OF USE,  DATA OR PROFITS,  WHETHER IN AN ACTION     *|
25 |*     OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF     *|
26 |*     OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE.     *|
27 |*                                                                           *|
28 |*     U.S. Government  End  Users.   This source code  is a "commercial     *|
29 |*     item,"  as that  term is  defined at  48 C.F.R. 2.101 (OCT 1995),     *|
30 |*     consisting  of "commercial  computer  software"  and  "commercial     *|
31 |*     computer  software  documentation,"  as such  terms  are  used in     *|
32 |*     48 C.F.R. 12.212 (SEPT 1995)  and is provided to the U.S. Govern-     *|
33 |*     ment only as  a commercial end item.   Consistent with  48 C.F.R.     *|
34 |*     12.212 and  48 C.F.R. 227.7202-1 through  227.7202-4 (JUNE 1995),     *|
35 |*     all U.S. Government End Users  acquire the source code  with only     *|
36 |*     those rights set forth herein.                                        *|
37 |*                                                                           *|
38  \***************************************************************************/
39
40 /*
41   Exa Modifications (c) Lars Knoll (lars@trolltech.com)
42  */
43
44 #ifdef HAVE_CONFIG_H
45 #include "config.h"
46 #endif
47
48 #include "nv_include.h"
49 #include "exa.h"
50
51 #include "nv_dma.h"
52 #include "nv_local.h"
53
54 #include <sys/time.h>
55
56 const int NVCopyROP[16] =
57 {
58    0x00,            /* GXclear */
59    0x88,            /* GXand */
60    0x44,            /* GXandReverse */
61    0xCC,            /* GXcopy */
62    0x22,            /* GXandInverted */
63    0xAA,            /* GXnoop */
64    0x66,            /* GXxor */
65    0xEE,            /* GXor */
66    0x11,            /* GXnor */
67    0x99,            /* GXequiv */
68    0x55,            /* GXinvert*/
69    0xDD,            /* GXorReverse */
70    0x33,            /* GXcopyInverted */
71    0xBB,            /* GXorInverted */
72    0x77,            /* GXnand */
73    0xFF             /* GXset */
74 };
75
76 static void 
77 NVSetPattern(ScrnInfoPtr pScrn, CARD32 clr0, CARD32 clr1,
78                                 CARD32 pat0, CARD32 pat1)
79 {
80         NVPtr pNv = NVPTR(pScrn);
81
82         NVDmaStart(pNv, NvImagePattern, PATTERN_COLOR_0, 4);
83         NVDmaNext (pNv, clr0);
84         NVDmaNext (pNv, clr1);
85         NVDmaNext (pNv, pat0);
86         NVDmaNext (pNv, pat1);
87 }
88
89 static void 
90 NVSetROP(ScrnInfoPtr pScrn, CARD32 alu, CARD32 planemask)
91 {
92         NVPtr pNv = NVPTR(pScrn);
93         int rop = NVCopyROP[alu] & 0xf0;
94
95         if (planemask != ~0) {
96                 NVSetPattern(pScrn, 0, planemask, ~0, ~0);
97                 if (pNv->currentRop != (alu + 32)) {
98                         NVDmaStart(pNv, NvRop, ROP_SET, 1);
99                         NVDmaNext (pNv, rop | 0x0a);
100                         pNv->currentRop = alu + 32;
101                 }
102         } else
103         if (pNv->currentRop != alu) {
104                 if(pNv->currentRop >= 16)
105                         NVSetPattern(pScrn, ~0, ~0, ~0, ~0);
106                 NVDmaStart(pNv, NvRop, ROP_SET, 1);
107                 NVDmaNext (pNv, rop | (rop >> 4));
108                 pNv->currentRop = alu;
109         }
110 }
111
112 static void setM2MFDirection(ScrnInfoPtr pScrn, int dir)
113 {
114         NVPtr pNv = NVPTR(pScrn);
115
116         if (pNv->M2MFDirection != dir) {
117
118                 NVDmaStart(pNv, NvMemFormat, MEMFORMAT_DMA_OBJECT_IN, 2);
119                 NVDmaNext (pNv, dir ? NvDmaTT : NvDmaFB);
120                 NVDmaNext (pNv, dir ? NvDmaFB : NvDmaTT);
121                 pNv->M2MFDirection = dir;
122         }
123 }
124
125 static CARD32 rectFormat(DrawablePtr pDrawable)
126 {
127         switch(pDrawable->bitsPerPixel) {
128         case 32:
129         case 24:
130                 return RECT_FORMAT_DEPTH24;
131                 break;
132         case 16:
133                 return RECT_FORMAT_DEPTH16;
134                 break;
135         default:
136                 return RECT_FORMAT_DEPTH8;
137                 break;
138         }
139 }
140
141 /* EXA acceleration hooks */
142 static void NVExaWaitMarker(ScreenPtr pScreen, int marker)
143 {
144         NVSync(xf86Screens[pScreen->myNum]);
145 }
146
147 static Bool NVExaPrepareSolid(PixmapPtr pPixmap,
148                               int   alu,
149                               Pixel planemask,
150                               Pixel fg)
151 {
152         ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
153         NVPtr pNv = NVPTR(pScrn);
154         int fmt;
155
156         planemask |= ~0 << pPixmap->drawable.bitsPerPixel;
157         if (planemask != ~0 || alu != GXcopy) {
158                 if (pPixmap->drawable.bitsPerPixel == 32)
159                         return FALSE;
160                 NVDmaStart(pNv, NvRectangle, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
161                 NVDmaNext (pNv, 1 /* ROP_AND */);
162                 NVSetROP(pScrn, alu, planemask);
163         } else {
164                 NVDmaStart(pNv, NvRectangle, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
165                 NVDmaNext (pNv, 3 /* SRCCOPY */);
166         }
167
168         if (!NVAccelGetCtxSurf2DFormatFromPixmap(pPixmap, &fmt))
169                 return FALSE;
170
171         /* When SURFACE_FORMAT_A8R8G8B8 is used with GDI_RECTANGLE_TEXT, the 
172          * alpha channel gets forced to 0xFF for some reason.  We're using 
173          * SURFACE_FORMAT_Y32 as a workaround
174          */
175         if (fmt == SURFACE_FORMAT_A8R8G8B8)
176                 fmt = 0xb;
177
178         if (!NVAccelSetCtxSurf2D(pPixmap, pPixmap, fmt))
179                 return FALSE;
180
181         NVDmaStart(pNv, NvRectangle, RECT_FORMAT, 1);
182         NVDmaNext (pNv, rectFormat(&pPixmap->drawable));
183         NVDmaStart(pNv, NvRectangle, RECT_SOLID_COLOR, 1);
184         NVDmaNext (pNv, fg);
185
186         pNv->DMAKickoffCallback = NVDmaKickoffCallback;
187         return TRUE;
188 }
189
190 static void NVExaSolid (PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
191 {
192         ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
193         NVPtr pNv = NVPTR(pScrn);
194         int width = x2-x1;
195         int height = y2-y1;
196
197         NVDmaStart(pNv, NvRectangle, RECT_SOLID_RECTS(0), 2);
198         NVDmaNext (pNv, (x1 << 16) | y1);
199         NVDmaNext (pNv, (width << 16) | height);
200
201         if((width * height) >= 512)
202                 NVDmaKickoff(pNv);
203 }
204
205 static void NVExaDoneSolid (PixmapPtr pPixmap)
206 {
207 }
208
209 static Bool NVExaPrepareCopy(PixmapPtr pSrcPixmap,
210                              PixmapPtr pDstPixmap,
211                              int       dx,
212                              int       dy,
213                              int       alu,
214                              Pixel     planemask)
215 {
216         ScrnInfoPtr pScrn = xf86Screens[pSrcPixmap->drawable.pScreen->myNum];
217         NVPtr pNv = NVPTR(pScrn);
218         int fmt;
219
220         if (pSrcPixmap->drawable.bitsPerPixel !=
221                         pDstPixmap->drawable.bitsPerPixel)
222                 return FALSE;
223
224         planemask |= ~0 << pDstPixmap->drawable.bitsPerPixel;
225         if (planemask != ~0 || alu != GXcopy) {
226                 if (pDstPixmap->drawable.bitsPerPixel == 32)
227                         return FALSE;
228                 NVDmaStart(pNv, NvImageBlit, NV_IMAGE_BLIT_OPERATION, 1);
229                 NVDmaNext (pNv, 1 /* ROP_AND */);
230                 NVSetROP(pScrn, alu, planemask);
231         } else {
232                 NVDmaStart(pNv, NvImageBlit, NV_IMAGE_BLIT_OPERATION, 1);
233                 NVDmaNext (pNv, 3 /* SRCCOPY */);
234         }
235
236         if (!NVAccelGetCtxSurf2DFormatFromPixmap(pDstPixmap, &fmt))
237                 return FALSE;
238         if (!NVAccelSetCtxSurf2D(pSrcPixmap, pDstPixmap, fmt))
239                 return FALSE;
240
241         pNv->DMAKickoffCallback = NVDmaKickoffCallback;
242         return TRUE;
243 }
244
245 static void NVExaCopy(PixmapPtr pDstPixmap,
246                       int       srcX,
247                       int       srcY,
248                       int       dstX,
249                       int       dstY,
250                       int       width,
251                       int       height)
252 {
253         ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
254         NVPtr pNv = NVPTR(pScrn);
255
256         /* Now check whether we have the same values for srcY and dstY and
257            whether the used chipset is buggy. Currently we flag all of G70
258            cards as buggy, which is probably much to broad. KoalaBR 
259            16 is an abritrary threshold. It should define the maximum number
260            of lines between dstY and srcY  If the number of lines is below
261            we guess, that the bug won't trigger...
262          */
263         if ( ((abs(srcY - dstY)< 16)||(abs(srcX-dstX)<16)) &&
264                 ((((pNv->Chipset & 0xfff0) == CHIPSET_G70) ||
265                  ((pNv->Chipset & 0xfff0) == CHIPSET_G71) ||
266                  ((pNv->Chipset & 0xfff0) == CHIPSET_G72) ||
267                  ((pNv->Chipset & 0xfff0) == CHIPSET_G73) ||
268                  ((pNv->Chipset & 0xfff0) == CHIPSET_C512))) )
269         {
270                 int dx=abs(srcX - dstX),dy=abs(srcY - dstY);
271                 // Ok, let's do it manually unless someone comes up with a better idea
272                 // 1. If dstY and srcY are really the same, do a copy rowwise
273                 if (dy<dx) {
274                         int i,xpos,inc;
275                         NVDEBUG("ExaCopy: Lines identical:\n");
276                         if (srcX>=dstX) {
277                                 xpos=0;
278                                 inc=1;
279                         } else {
280                                 xpos=width-1;
281                                 inc=-1;
282                         }
283                         for (i = 0; i < width; i++) {
284                                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
285                                 NVDmaNext (pNv, (srcY << 16) | (srcX+xpos));
286                                 NVDmaNext (pNv, (dstY << 16) | (dstX+xpos));
287                                 NVDmaNext (pNv, (height  << 16) | 1);
288                                 xpos+=inc;
289                         }
290                 } else {
291                         // 2. Otherwise we will try a line by line copy in the hope to avoid
292                         //    the card's bug.
293                         int i,ypos,inc;
294                         NVDEBUG("ExaCopy: Lines nearly the same srcY=%d, dstY=%d:\n", srcY, dstY);
295                         if (srcY>=dstY) {
296                                 ypos=0;
297                                 inc=1;
298                         } else {
299                                 ypos=height-1;
300                                 inc=-1;
301                         }
302                         for (i = 0; i < height; i++) {
303                                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
304                                 NVDmaNext (pNv, ((srcY+ypos) << 16) | srcX);
305                                 NVDmaNext (pNv, ((dstY+ypos) << 16) | dstX);
306                                 NVDmaNext (pNv, (1  << 16) | width);
307                                 ypos+=inc;
308                         }
309                 } 
310         } else {
311                 NVDEBUG("ExaCopy: Using default path\n");
312                 NVDmaStart(pNv, NvImageBlit, BLIT_POINT_SRC, 3);
313                 NVDmaNext (pNv, (srcY << 16) | srcX);
314                 NVDmaNext (pNv, (dstY << 16) | dstX);
315                 NVDmaNext (pNv, (height  << 16) | width);
316         }
317
318         if((width * height) >= 512)
319                 NVDmaKickoff(pNv); 
320 }
321
322 static void NVExaDoneCopy (PixmapPtr pDstPixmap) {}
323
324 Bool NVAccelMemcpyRect(char *dst, const char *src, int height,
325                        int dst_pitch, int src_pitch, int line_len)
326 {
327         if ((src_pitch == line_len) && (src_pitch == dst_pitch)) {
328                 memcpy(dst, src, line_len*height);
329         } else {
330                 while (height--) {
331                         memcpy(dst, src, line_len);
332                         src += src_pitch;
333                         dst += dst_pitch;
334                 }
335         }
336
337         return TRUE;
338 }
339
340 Bool
341 NVAccelDownloadM2MF(ScrnInfoPtr pScrn, char *dst, uint64_t src_offset,
342                                      int dst_pitch, int src_pitch,
343                                      int line_len, int line_count)
344 {
345         NVPtr pNv = NVPTR(pScrn);
346
347         setM2MFDirection(pScrn, 0);
348
349         while (line_count) {
350                 char *src = pNv->GARTScratch->map;
351                 int lc, i;
352
353                 if (line_count * line_len <= pNv->GARTScratch->size) {
354                         lc = line_count;
355                 } else {
356                         lc = pNv->GARTScratch->size / line_len;
357                         if (lc > line_count)
358                                 lc = line_count;
359                 }
360
361                 /* HW limitations */
362                 if (lc > 2047)
363                         lc = 2047;
364
365                 if (pNv->Architecture >= NV_ARCH_50) {
366                         NVDmaStart(pNv, NvMemFormat, 0x200, 1);
367                         NVDmaNext (pNv, 1);
368                         NVDmaStart(pNv, NvMemFormat, 0x21c, 1);
369                         NVDmaNext (pNv, 1);
370                         /* probably high-order bits of address */
371                         NVDmaStart(pNv, NvMemFormat, 0x238, 2);
372                         NVDmaNext (pNv, 0);
373                         NVDmaNext (pNv, 0);
374                 }
375
376                 NVDmaStart(pNv, NvMemFormat,
377                                 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
378                 NVDmaNext (pNv, (uint32_t)src_offset);
379                 NVDmaNext (pNv, (uint32_t)pNv->GARTScratch->offset);
380                 NVDmaNext (pNv, src_pitch);
381                 NVDmaNext (pNv, line_len);
382                 NVDmaNext (pNv, line_len);
383                 NVDmaNext (pNv, lc);
384                 NVDmaNext (pNv, (1<<8)|1);
385                 NVDmaNext (pNv, 0);
386
387                 NVNotifierReset(pScrn, pNv->Notifier0);
388                 NVDmaStart(pNv, NvMemFormat,
389                                 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
390                 NVDmaNext (pNv, 0);
391                 NVDmaStart(pNv, NvMemFormat, 0x100, 1);
392                 NVDmaNext (pNv, 0);
393                 NVDmaKickoff(pNv);
394                 if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 2000))
395                         return FALSE;
396
397                 if (dst_pitch == line_len) {
398                         memcpy(dst, src, dst_pitch * lc);
399                         dst += dst_pitch * lc;
400                 } else {
401                         for (i = 0; i < lc; i++) {
402                                 memcpy(dst, src, line_len);
403                                 src += line_len;
404                                 dst += dst_pitch;
405                         }
406                 }
407
408                 line_count -= lc;
409                 src_offset += lc * src_pitch;
410         }
411
412         return TRUE;
413 }
414
415 static Bool NVDownloadFromScreen(PixmapPtr pSrc,
416                                  int x,  int y,
417                                  int w,  int h,
418                                  char *dst,  int dst_pitch)
419 {
420         ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
421         NVPtr pNv = NVPTR(pScrn);
422         int src_offset, src_pitch, cpp, offset;
423         const char *src;
424
425         src_offset = NVAccelGetPixmapOffset(pSrc);
426         src_pitch  = exaGetPixmapPitch(pSrc);
427         cpp = pSrc->drawable.bitsPerPixel >> 3;
428         offset = (y * src_pitch) + (x * cpp);
429
430         if (pNv->GARTScratch) {
431                 if (NVAccelDownloadM2MF(pScrn, dst,
432                                         src_offset + offset,
433                                         dst_pitch, src_pitch, w * cpp, h))
434                         return TRUE;
435         }
436
437         src = (char *) src_offset + offset;
438         exaWaitSync(pSrc->drawable.pScreen);
439         if (NVAccelMemcpyRect(dst, src, h, dst_pitch, src_pitch, w*cpp))
440                 return TRUE;
441
442         return FALSE;
443 }
444
445 Bool
446 NVAccelUploadIFC(ScrnInfoPtr pScrn, const char *src, int src_pitch,
447                                     int x, int y, int w, int h, int cpp)
448 {
449         NVPtr pNv = NVPTR(pScrn);
450         int line_len = w * cpp;
451         int iw, id, fmt;
452
453         if (pNv->Architecture >= NV_ARCH_50)
454                 return FALSE;
455
456         if (h > 1024)
457                 return FALSE;
458
459         switch (cpp) {
460         case 2: fmt = 1; break;
461         case 4: fmt = 4; break;
462         default:
463                 return FALSE;
464         }
465
466         /* Pad out input width to cover both COLORA() and COLORB() */
467         iw  = (line_len + 7) & ~7;
468         id  = iw / 4; /* line push size */
469         iw /= cpp;
470
471         /* Don't support lines longer than max push size yet.. */
472         if (id > 1792)
473                 return FALSE;
474
475         NVDmaStart(pNv, NvClipRectangle, CLIP_POINT, 2);
476         NVDmaNext (pNv, 0x0); 
477         NVDmaNext (pNv, 0x7FFF7FFF);
478
479         NVDmaStart(pNv, NvImageFromCpu, NV05_IMAGE_FROM_CPU_OPERATION, 2);
480         NVDmaNext (pNv, 0x3 /* SRCCOPY */);
481         NVDmaNext (pNv, fmt);
482         NVDmaStart(pNv, NvImageFromCpu, NV05_IMAGE_FROM_CPU_POINT, 3);
483         NVDmaNext (pNv, (y << 16) | x); /* dst point */
484         NVDmaNext (pNv, (h << 16) | w); /* width/height out */
485         NVDmaNext (pNv, (h << 16) | iw); /* width/height in */
486
487         while (h--) {
488                 char *dst;
489                 /* send a line */
490                 NVDmaStart(pNv, NvImageFromCpu,
491                                 NV10_IMAGE_FROM_CPU_HLINE, id);
492                 dst = (char *)pNv->dmaBase + (pNv->dmaCurrent << 2);
493                 memcpy(dst, src, line_len);
494                 pNv->dmaCurrent += id;
495
496                 src += src_pitch;
497         }
498
499         return TRUE;
500 }
501
502 Bool
503 NVAccelUploadM2MF(ScrnInfoPtr pScrn, uint64_t dst_offset, const char *src,
504                                      int dst_pitch, int src_pitch,
505                                      int line_len, int line_count)
506 {
507         NVPtr pNv = NVPTR(pScrn);
508
509         setM2MFDirection(pScrn, 1);
510
511         while (line_count) {
512                 char *dst = pNv->GARTScratch->map;
513                 int lc, i;
514
515                 /* Determine max amount of data we can DMA at once */
516                 if (line_count * line_len <= pNv->GARTScratch->size) {
517                         lc = line_count;
518                 } else {
519                         lc = pNv->GARTScratch->size / line_len;
520                         if (lc > line_count)
521                                 lc = line_count;
522                 }
523
524                 /* HW limitations */
525                 if (lc > 2047)
526                         lc = 2047;
527
528                 /* Upload to GART */
529                 if (src_pitch == line_len) {
530                         memcpy(dst, src, src_pitch * lc);
531                         src += src_pitch * lc;
532                 } else {
533                         for (i = 0; i < lc; i++) {
534                                 memcpy(dst, src, line_len);
535                                 src += src_pitch;
536                                 dst += line_len;
537                         }
538                 }
539
540                 if (pNv->Architecture >= NV_ARCH_50) {
541                         NVDmaStart(pNv, NvMemFormat, 0x200, 1);
542                         NVDmaNext (pNv, 1);
543                         NVDmaStart(pNv, NvMemFormat, 0x21c, 1);
544                         NVDmaNext (pNv, 1);
545                         /* probably high-order bits of address */
546                         NVDmaStart(pNv, NvMemFormat, 0x238, 2);
547                         NVDmaNext (pNv, 0);
548                         NVDmaNext (pNv, 0);
549                 }
550
551                 /* DMA to VRAM */
552                 NVDmaStart(pNv, NvMemFormat,
553                                 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
554                 NVDmaNext (pNv, (uint32_t)pNv->GARTScratch->offset);
555                 NVDmaNext (pNv, (uint32_t)dst_offset);
556                 NVDmaNext (pNv, line_len);
557                 NVDmaNext (pNv, dst_pitch);
558                 NVDmaNext (pNv, line_len);
559                 NVDmaNext (pNv, lc);
560                 NVDmaNext (pNv, (1<<8)|1);
561                 NVDmaNext (pNv, 0);
562
563                 NVNotifierReset(pScrn, pNv->Notifier0);
564                 NVDmaStart(pNv, NvMemFormat,
565                                 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
566                 NVDmaNext (pNv, 0);
567                 NVDmaStart(pNv, NvMemFormat, 0x100, 1);
568                 NVDmaNext (pNv, 0);
569                 NVDmaKickoff(pNv);
570                 if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 2000))
571                         return FALSE;
572
573                 dst_offset += lc * dst_pitch;
574                 line_count -= lc;
575         }
576
577         return TRUE;
578 }
579
580 static Bool NVUploadToScreen(PixmapPtr pDst,
581                              int x, int y, int w, int h,
582                              char *src, int src_pitch)
583 {
584         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
585         NVPtr pNv = NVPTR(pScrn);
586         int dst_offset, dst_pitch, cpp;
587         char *dst;
588
589         dst_offset = NVAccelGetPixmapOffset(pDst);
590         dst_pitch  = exaGetPixmapPitch(pDst);
591         cpp = pDst->drawable.bitsPerPixel >> 3;
592
593         /* try hostdata transfer */
594         if (pNv->Architecture < NV_ARCH_50 && w*h*cpp<16*1024) /* heuristic */
595         {
596                 int fmt;
597
598                 if (NVAccelGetCtxSurf2DFormatFromPixmap(pDst, &fmt)) {
599                         NVAccelSetCtxSurf2D(pDst, pDst, fmt);
600                         if (NVAccelUploadIFC(pScrn, src, src_pitch,
601                                                     x, y, w, h, cpp)) {
602                                 exaMarkSync(pDst->drawable.pScreen);
603                                 return TRUE;
604                         }
605                 }
606         }
607
608         /* try gart-based transfer */
609         if (pNv->GARTScratch) {
610                 dst_offset += (y * dst_pitch) + (x * cpp);
611                 if (NVAccelUploadM2MF(pScrn, dst_offset, src, dst_pitch,
612                                         src_pitch, w * cpp, h))
613                         return TRUE;
614         }
615
616         /* fallback to memcpy-based transfer */
617         dst = (char *) dst_offset + (y * dst_pitch) + (x * cpp);
618         exaWaitSync(pDst->drawable.pScreen);
619         if (NVAccelMemcpyRect(dst, src, h, dst_pitch, src_pitch, w*cpp))
620                 return TRUE;
621
622         return FALSE;
623 }
624
625
626 static Bool NVCheckComposite(int        op,
627                              PicturePtr pSrcPicture,
628                              PicturePtr pMaskPicture,
629                              PicturePtr pDstPicture)
630 {
631         CARD32 ret = 0;
632
633         /* PictOpOver doesn't work correctly. The HW command assumes
634          * non premuliplied alpha
635          */
636         if (pMaskPicture)
637                 ret = 0x1;
638         else if (op != PictOpOver &&  op != PictOpSrc)
639                 ret = 0x2;
640         else if (!pSrcPicture->pDrawable)
641                 ret = 0x4;
642         else if (pSrcPicture->transform || pSrcPicture->repeat)
643                 ret = 0x8;
644         else if (pSrcPicture->alphaMap || pDstPicture->alphaMap)
645                 ret = 0x10;
646         else if (pSrcPicture->format != PICT_a8r8g8b8 &&
647                         pSrcPicture->format != PICT_x8r8g8b8 &&
648                         pSrcPicture->format != PICT_r5g6b5)
649                 ret = 0x20;
650         else if (pDstPicture->format != PICT_a8r8g8b8 &&
651                         pDstPicture->format != PICT_x8r8g8b8 &&
652                         pDstPicture->format != PICT_r5g6b5)
653                 ret = 0x40;
654
655         return ret == 0;
656 }
657
658 static CARD32 src_size, src_pitch, src_offset;
659
660 static Bool NVPrepareComposite(int        op,
661                                PicturePtr pSrcPicture,
662                                PicturePtr pMaskPicture,
663                                PicturePtr pDstPicture,
664                                PixmapPtr  pSrc,
665                                PixmapPtr  pMask,
666                                PixmapPtr  pDst)
667 {
668         ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
669         NVPtr pNv = NVPTR(pScrn);
670         int srcFormat, dstFormat;
671
672         if (pSrcPicture->format == PICT_a8r8g8b8)
673                 srcFormat = STRETCH_BLIT_FORMAT_A8R8G8B8;
674         else if (pSrcPicture->format == PICT_x8r8g8b8)
675                 srcFormat = STRETCH_BLIT_FORMAT_X8R8G8B8;
676         else if (pSrcPicture->format == PICT_r5g6b5)
677                 srcFormat = STRETCH_BLIT_FORMAT_DEPTH16;
678         else
679                 return FALSE;
680
681         if (!NVAccelGetCtxSurf2DFormatFromPicture(pDstPicture, &dstFormat))
682                 return FALSE;
683         if (!NVAccelSetCtxSurf2D(pDst, pDst, dstFormat))
684                 return FALSE;
685
686         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_FORMAT, 2);
687         NVDmaNext (pNv, srcFormat);
688         NVDmaNext (pNv, (op == PictOpSrc) ? STRETCH_BLIT_OPERATION_COPY : STRETCH_BLIT_OPERATION_BLEND);
689
690         src_size = ((pSrcPicture->pDrawable->width+3)&~3) |
691                 (pSrcPicture->pDrawable->height << 16);
692         src_pitch  = exaGetPixmapPitch(pSrc)
693                 | (STRETCH_BLIT_SRC_FORMAT_ORIGIN_CORNER << 16)
694                 | (STRETCH_BLIT_SRC_FORMAT_FILTER_POINT_SAMPLE << 24);
695         src_offset = NVAccelGetPixmapOffset(pSrc);
696
697         return TRUE;
698 }
699
700 static void NVComposite(PixmapPtr pDst,
701                         int       srcX,
702                         int       srcY,
703                         int       maskX,
704                         int       maskY,
705                         int       dstX,
706                         int       dstY,
707                         int       width,
708                         int       height)
709 {
710         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
711         NVPtr pNv = NVPTR(pScrn);
712
713         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_CLIP_POINT, 6);
714         NVDmaNext (pNv, dstX | (dstY << 16));
715         NVDmaNext (pNv, width | (height << 16));
716         NVDmaNext (pNv, dstX | (dstY << 16));
717         NVDmaNext (pNv, width | (height << 16));
718         NVDmaNext (pNv, 1<<20);
719         NVDmaNext (pNv, 1<<20);
720
721         NVDmaStart(pNv, NvScaledImage, STRETCH_BLIT_SRC_SIZE, 4);
722         NVDmaNext (pNv, src_size);
723         NVDmaNext (pNv, src_pitch);
724         NVDmaNext (pNv, src_offset);
725         NVDmaNext (pNv, srcX | (srcY<<16));
726
727         NVDmaKickoff(pNv);
728 }
729
730 static void NVDoneComposite (PixmapPtr pDst)
731 {
732         ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
733         NVPtr pNv = NVPTR(pScrn);
734         CARD32 format;
735
736         if (pNv->CurrentLayout.depth == 8)
737                 format = SURFACE_FORMAT_Y8;
738         else if (pNv->CurrentLayout.depth == 16)
739                 format = SURFACE_FORMAT_R5G6B5;
740         else
741                 format = SURFACE_FORMAT_X8R8G8B8;
742
743         NVDmaStart(pNv, NvContextSurfaces, SURFACE_FORMAT, 1);
744         NVDmaNext (pNv, format);
745
746         exaMarkSync(pDst->drawable.pScreen);
747 }
748
749 Bool NVExaInit(ScreenPtr pScreen) 
750 {
751         ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
752         NVPtr pNv = NVPTR(pScrn);
753
754         if(!(pNv->EXADriverPtr = (ExaDriverPtr) xnfcalloc(sizeof(ExaDriverRec), 1))) {
755                 pNv->NoAccel = TRUE;
756                 return FALSE;
757         }
758
759         pNv->EXADriverPtr->exa_major = EXA_VERSION_MAJOR;
760         pNv->EXADriverPtr->exa_minor = EXA_VERSION_MINOR;
761
762         pNv->EXADriverPtr->memoryBase           = pNv->FB->map;
763         pNv->EXADriverPtr->offScreenBase        =
764                 pScrn->virtualX * pScrn->virtualY*(pScrn->bitsPerPixel/8); 
765         pNv->EXADriverPtr->memorySize           = pNv->FB->size; 
766         pNv->EXADriverPtr->pixmapOffsetAlign    = 256; 
767         pNv->EXADriverPtr->pixmapPitchAlign     = 64; 
768         pNv->EXADriverPtr->flags                = EXA_OFFSCREEN_PIXMAPS;
769         pNv->EXADriverPtr->maxX                 = 32768;
770         pNv->EXADriverPtr->maxY                 = 32768;
771
772         pNv->EXADriverPtr->WaitMarker = NVExaWaitMarker;
773
774         /* Install default hooks */
775         pNv->EXADriverPtr->DownloadFromScreen = NVDownloadFromScreen; 
776         pNv->EXADriverPtr->UploadToScreen = NVUploadToScreen; 
777
778         if (pNv->Architecture < NV_ARCH_50) {
779                 pNv->EXADriverPtr->PrepareCopy = NVExaPrepareCopy;
780                 pNv->EXADriverPtr->Copy = NVExaCopy;
781                 pNv->EXADriverPtr->DoneCopy = NVExaDoneCopy;
782
783                 pNv->EXADriverPtr->PrepareSolid = NVExaPrepareSolid;
784                 pNv->EXADriverPtr->Solid = NVExaSolid;
785                 pNv->EXADriverPtr->DoneSolid = NVExaDoneSolid;
786         } else {
787                 pNv->EXADriverPtr->PrepareCopy = NV50EXAPrepareCopy;
788                 pNv->EXADriverPtr->Copy = NV50EXACopy;
789                 pNv->EXADriverPtr->DoneCopy = NV50EXADoneCopy;
790
791                 pNv->EXADriverPtr->PrepareSolid = NV50EXAPrepareSolid;
792                 pNv->EXADriverPtr->Solid = NV50EXASolid;
793                 pNv->EXADriverPtr->DoneSolid = NV50EXADoneSolid;
794         }
795
796         switch (pNv->Architecture) {
797 #if defined(ENABLE_NV30EXA)
798 //      not working yet
799 /*
800         case NV_ARCH_30:
801                 pNv->EXADriverPtr->CheckComposite   = NV30EXACheckComposite;
802                 pNv->EXADriverPtr->PrepareComposite = NV30EXAPrepareComposite;
803                 pNv->EXADriverPtr->Composite        = NV30EXAComposite;
804                 pNv->EXADriverPtr->DoneComposite    = NV30EXADoneComposite;
805                 break;
806 */
807 #endif
808 #if (X_BYTE_ORDER == X_LITTLE_ENDIAN) && defined(ENABLE_NV30EXA)
809         case NV_ARCH_40:
810                 pNv->EXADriverPtr->CheckComposite   = NV40EXACheckComposite;
811                 pNv->EXADriverPtr->PrepareComposite = NV40EXAPrepareComposite;
812                 pNv->EXADriverPtr->Composite        = NV40EXAComposite;
813                 pNv->EXADriverPtr->DoneComposite    = NV40EXADoneComposite;
814                 break;
815 #endif
816         case NV_ARCH_50:
817                 break;
818         default:
819                 if (!pNv->BlendingPossible)
820                         break;
821                 pNv->EXADriverPtr->CheckComposite   = NVCheckComposite;
822                 pNv->EXADriverPtr->PrepareComposite = NVPrepareComposite;
823                 pNv->EXADriverPtr->Composite        = NVComposite;
824                 pNv->EXADriverPtr->DoneComposite    = NVDoneComposite;
825                 break;
826         }
827
828         return exaDriverInit(pScreen, pNv->EXADriverPtr);
829 }
830