Preliminary implementation of "host-side double buffering" to improve performance...
[nouveau] / src / nv_video.c
1
2 #ifdef HAVE_CONFIG_H
3 #include "config.h"
4 #endif
5
6 #include "xf86.h"
7 #include "xf86_OSproc.h"
8 #include "xf86Resources.h"
9 #include "compiler.h"
10 #include "xf86PciInfo.h"
11 #include "xf86Pci.h"
12 #include "xf86fbman.h"
13 #include "regionstr.h"
14
15 #include "xf86xv.h"
16 #include <X11/extensions/Xv.h>
17 #include "xaa.h"
18 #include "xaalocal.h"
19 #include "exa.h"
20 #include "damage.h"
21 #include "dixstruct.h"
22 #include "fourcc.h"
23
24 #include "nv_include.h"
25 #include "nv_dma.h"
26
27 #define IMAGE_MAX_W 2046
28 #define IMAGE_MAX_H 2046
29
30 #define OFF_DELAY       500  /* milliseconds */
31 #define FREE_DELAY      5000
32
33 #define OFF_TIMER       0x01
34 #define FREE_TIMER      0x02
35 #define CLIENT_VIDEO_ON 0x04
36
37 #define TIMER_MASK      (OFF_TIMER | FREE_TIMER)
38
39 #define NUM_BLIT_PORTS 32
40
41 typedef struct _NVPortPrivRec {
42         short           brightness;
43         short           contrast;
44         short           saturation;
45         short           hue;
46         RegionRec       clip;
47         CARD32          colorKey;
48         Bool            autopaintColorKey;
49         Bool            doubleBuffer;
50         CARD32          videoStatus;
51         int             currentBuffer;
52         Time            videoTime;
53         Bool            grabbedByV4L;
54         Bool            iturbt_709;
55         Bool            blitter;
56         Bool            SyncToVBlank;
57         NVAllocRec *    video_mem;
58         int             pitch;
59         int             offset;
60         NVAllocRec *    TT_mem_chunk[2];
61         int             currentHostBuffer;
62 } NVPortPrivRec, *NVPortPrivPtr;
63
64 #define GET_OVERLAY_PRIVATE(pNv) \
65         (NVPortPrivPtr)((pNv)->overlayAdaptor->pPortPrivates[0].ptr)
66
67 #define GET_BLIT_PRIVATE(pNv) \
68         (NVPortPrivPtr)((pNv)->blitAdaptor->pPortPrivates[0].ptr)
69
70 #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE)
71
72 static Atom xvBrightness, xvContrast, xvColorKey, xvSaturation, 
73             xvHue, xvAutopaintColorKey, xvSetDefaults, xvDoubleBuffer,
74             xvITURBT709, xvSyncToVBlank;
75
76 /* client libraries expect an encoding */
77 static XF86VideoEncodingRec DummyEncoding =
78
79         0,
80         "XV_IMAGE",
81         IMAGE_MAX_W, IMAGE_MAX_H,
82         {1, 1}
83 };
84
85 #define NUM_FORMATS_ALL 6
86
87 XF86VideoFormatRec NVFormats[NUM_FORMATS_ALL] = 
88 {
89         {15, TrueColor}, {16, TrueColor}, {24, TrueColor},
90         {15, DirectColor}, {16, DirectColor}, {24, DirectColor}
91 };
92
93 #define NUM_OVERLAY_ATTRIBUTES 9
94 XF86AttributeRec NVOverlayAttributes[NUM_OVERLAY_ATTRIBUTES] =
95 {
96         {XvSettable | XvGettable, 0, 1, "XV_DOUBLE_BUFFER"},
97         {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"},
98         {XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"},
99         {XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
100         {XvSettable | XvGettable, -512, 511, "XV_BRIGHTNESS"},
101         {XvSettable | XvGettable, 0, 8191, "XV_CONTRAST"},
102         {XvSettable | XvGettable, 0, 8191, "XV_SATURATION"},
103         {XvSettable | XvGettable, 0, 360, "XV_HUE"},
104         {XvSettable | XvGettable, 0, 1, "XV_ITURBT_709"}
105 };
106
107 #define NUM_BLIT_ATTRIBUTES 2
108 XF86AttributeRec NVBlitAttributes[NUM_BLIT_ATTRIBUTES] =
109 {
110         {XvSettable             , 0, 0, "XV_SET_DEFAULTS"},
111         {XvSettable | XvGettable, 0, 1, "XV_SYNC_TO_VBLANK"}
112 };
113
114
115 #define NUM_IMAGES_YUV 4
116 #define NUM_IMAGES_ALL 5
117
118 #define FOURCC_RGB 0x0000003
119 #define XVIMAGE_RGB \
120    { \
121         FOURCC_RGB, \
122         XvRGB, \
123         LSBFirst, \
124         { 0x03, 0x00, 0x00, 0x00, \
125           0x00,0x00,0x00,0x10,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71}, \
126         32, \
127         XvPacked, \
128         1, \
129         24, 0x00ff0000, 0x0000ff00, 0x000000ff, \
130         0, 0, 0, \
131         0, 0, 0, \
132         0, 0, 0, \
133         {'B','G','R','X',\
134           0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, \
135         XvTopToBottom \
136    }
137
138 static XF86ImageRec NVImages[NUM_IMAGES_ALL] =
139 {
140         XVIMAGE_YUY2,
141         XVIMAGE_YV12,
142         XVIMAGE_UYVY,
143         XVIMAGE_I420,
144         XVIMAGE_RGB
145 };
146
147 /**
148  * NVSetPortDefaults
149  * set attributes of port "pPriv" to compiled-in (except for colorKey) defaults
150  * 
151  * @param pScrn screen to get the default colorKey from
152  * @param pPriv port to reset to defaults
153  */
154 static void 
155 NVSetPortDefaults (ScrnInfoPtr pScrn, NVPortPrivPtr pPriv)
156 {
157         NVPtr pNv = NVPTR(pScrn);
158
159         pPriv->brightness               = 0;
160         pPriv->contrast                 = 4096;
161         pPriv->saturation               = 4096;
162         pPriv->hue                      = 0;
163         pPriv->colorKey                 = pNv->videoKey;
164         pPriv->autopaintColorKey        = TRUE;
165         pPriv->doubleBuffer             = TRUE;
166         pPriv->iturbt_709               = FALSE;
167 }
168
169 /**
170  * NVResetVideo
171  * writes the current attributes from the overlay port to the hardware
172  */
173 void 
174 NVResetVideo (ScrnInfoPtr pScrn)
175 {
176         NVPtr          pNv     = NVPTR(pScrn);
177         NVPortPrivPtr  pPriv   = GET_OVERLAY_PRIVATE(pNv);
178         int            satSine, satCosine;
179         double         angle;
180
181         angle = (double)pPriv->hue * 3.1415927 / 180.0;
182
183         satSine = pPriv->saturation * sin(angle);
184         if (satSine < -1024)
185                 satSine = -1024;
186         satCosine = pPriv->saturation * cos(angle);
187         if (satCosine < -1024)
188                 satCosine = -1024;
189
190         nvWriteVIDEO(pNv, NV_PVIDEO_LUMINANCE(0), (pPriv->brightness << 16) |
191                                                    pPriv->contrast);
192         nvWriteVIDEO(pNv, NV_PVIDEO_LUMINANCE(1), (pPriv->brightness << 16) |
193                                                    pPriv->contrast);
194         nvWriteVIDEO(pNv, NV_PVIDEO_CHROMINANCE(0), (satSine << 16) |
195                                                     (satCosine & 0xffff));
196         nvWriteVIDEO(pNv, NV_PVIDEO_CHROMINANCE(1), (satSine << 16) |
197                                                     (satCosine & 0xffff));
198         nvWriteVIDEO(pNv, NV_PVIDEO_COLOR_KEY, pPriv->colorKey);
199 }
200
201 /**
202  * NVStopOverlay
203  * Tell the hardware to stop the overlay
204  */
205 static void 
206 NVStopOverlay (ScrnInfoPtr pScrn)
207 {
208         NVPtr pNv = NVPTR(pScrn);
209
210         nvWriteVIDEO(pNv, NV_PVIDEO_STOP, 1);
211 }
212
213 /**
214  * NVAllocateVideoMemory
215  * allocates video memory for a given port
216  * 
217  * @param pScrn screen which requests the memory
218  * @param mem pointer to previously allocated memory for reallocation
219  * @param size size of requested memory segment
220  * @return pointer to the allocated memory
221  */
222 static NVAllocRec *
223 NVAllocateVideoMemory(ScrnInfoPtr pScrn, NVAllocRec *mem, int size)
224 {
225         NVPtr pNv = NVPTR(pScrn);
226
227         /* 
228         We allocate in bytes, so we need to adapt.
229          */
230         size *= (pScrn->bitsPerPixel >> 3);
231
232         if(mem) {
233                 if(mem->size >= size)
234                         return mem;
235                 NVFreeMemory(pNv, mem);
236         }
237
238         return NVAllocateMemory(pNv, NOUVEAU_MEM_FB, size); /* align 32? */
239 }
240
241 /**
242  * NVAllocateTTMemory
243  * allocates TT memory for a given port
244  * 
245  * @param pScrn screen which requests the memory
246  * @param mem pointer to previously allocated memory for reallocation
247  * @param size size of requested memory segment
248  * @return pointer to the allocated memory
249  */
250 static NVAllocRec *
251 NVAllocateTTMemory(ScrnInfoPtr pScrn, NVAllocRec *mem, int size)
252 {
253         NVPtr pNv = NVPTR(pScrn);
254
255         /* 
256         We allocate in bytes, so we need to adapt.
257          */
258         size *= (pScrn->bitsPerPixel >> 3);
259
260         if(mem) {
261                 if(mem->size >= size)
262                         return mem;
263                 NVFreeMemory(pNv, mem);
264         }
265         /*We take only AGP memory, because PCI DMA is too slow and I prefer a fallback on CPU copy.*/
266         return NVAllocateMemory(pNv, NOUVEAU_MEM_AGP, size); /* align 32? */
267 }
268
269 /**
270  * NVFreeOverlayMemory
271  * frees memory held by the overlay port
272  * this function (unlike NVAllocateOverlayMemory) is "Overlay"-specific
273  * 
274  * @param pScrn screen whose overlay port wants to free memory
275  */
276 static void
277 NVFreeOverlayMemory(ScrnInfoPtr pScrn)
278 {
279         NVPtr         pNv   = NVPTR(pScrn);
280         NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
281
282         if(pPriv->video_mem) {
283                 NVFreeMemory(pNv, pPriv->video_mem);
284                 pPriv->video_mem = NULL;
285         }
286         
287         if(pPriv->TT_mem_chunk[0]) {
288                 NVFreeMemory(pNv, pPriv->video_mem);
289                 pPriv->video_mem = NULL;
290         }
291         
292         if(pPriv->TT_mem_chunk[1]) {
293                 NVFreeMemory(pNv, pPriv->video_mem);
294                 pPriv->video_mem = NULL;
295         }
296 }
297
298 /**
299  * NVFreeBlitMemory
300  * frees memory held by the blit port
301  * 
302  * @param pScrn screen whose blit port wants to free memory
303  */
304 static void
305 NVFreeBlitMemory(ScrnInfoPtr pScrn)
306 {
307         NVPtr         pNv   = NVPTR(pScrn);
308         NVPortPrivPtr pPriv = GET_BLIT_PRIVATE(pNv);
309
310         if(pPriv->video_mem) {
311                 NVFreeMemory(pNv, pPriv->video_mem);
312                 pPriv->video_mem = NULL;
313         }
314         
315         if(pPriv->TT_mem_chunk[0]) {
316                 NVFreeMemory(pNv, pPriv->video_mem);
317                 pPriv->video_mem = NULL;
318         }
319         
320         if(pPriv->TT_mem_chunk[1]) {
321                 NVFreeMemory(pNv, pPriv->video_mem);
322                 pPriv->video_mem = NULL;
323         }
324 }
325
326 /**
327  * NVVideoTimerCallback
328  * callback function which perform cleanup tasks (stop overlay, free memory).
329  * within the driver it is only called once from NVBlockHandler in nv_driver.c
330  */
331 static void
332 NVVideoTimerCallback(ScrnInfoPtr pScrn, Time currentTime)
333 {
334         NVPtr         pNv = NVPTR(pScrn);
335         NVPortPrivPtr pOverPriv = NULL;
336         NVPortPrivPtr pBlitPriv = NULL;
337         Bool needCallback = FALSE;
338
339         if (!pScrn->vtSema)
340                 return; 
341
342         if (pNv->overlayAdaptor) {
343                 pOverPriv = GET_OVERLAY_PRIVATE(pNv);
344                 if (!pOverPriv->videoStatus)
345                         pOverPriv = NULL;
346         }
347
348         if (pNv->blitAdaptor) {
349                 pBlitPriv = GET_BLIT_PRIVATE(pNv);
350                 if (!pBlitPriv->videoStatus)
351                         pBlitPriv = NULL;
352         }
353
354         if (pOverPriv) {
355                 if (pOverPriv->videoTime < currentTime) {
356                         if (pOverPriv->videoStatus & OFF_TIMER) {
357                                 NVStopOverlay(pScrn);
358                                 pOverPriv->videoStatus = FREE_TIMER;
359                                 pOverPriv->videoTime = currentTime + FREE_DELAY;
360                                 needCallback = TRUE;
361                         } else
362                         if (pOverPriv->videoStatus & FREE_TIMER) {
363                                 NVFreeOverlayMemory(pScrn);
364                                 pOverPriv->videoStatus = 0;
365                         }
366                 } else {
367                         needCallback = TRUE;
368                 }
369         }
370
371         if (pBlitPriv) {
372                 if (pBlitPriv->videoTime < currentTime) {
373                         NVFreeBlitMemory(pScrn);
374                         pBlitPriv->videoStatus = 0;              
375                 } else {
376                         needCallback = TRUE;
377                 }
378         }
379
380         pNv->VideoTimerCallback = needCallback ? NVVideoTimerCallback : NULL;
381 }
382
383 /**
384  * NVPutOverlayImage
385  * program hardware to overlay image into front buffer
386  * 
387  * @param pScrn screen
388  * @param src_offset
389  * @param id colorspace of image
390  * @param src_pitch
391  * @param dstBox
392  * @param x1
393  * @param y1
394  * @param x2
395  * @param y2
396  * @param width
397  * @param height
398  * @param src_w
399  * @param src_h
400  * @param drw_w
401  * @param drw_h
402  * @param clipBoxes
403  */
404 static void
405 NVPutOverlayImage(ScrnInfoPtr pScrn, int offset, int id,
406                   int dstPitch, BoxPtr dstBox,
407                   int x1, int y1, int x2, int y2,
408                   short width, short height,
409                   short src_w, short src_h,
410                   short drw_w, short drw_h,
411                   RegionPtr clipBoxes)
412 {
413         NVPtr         pNv    = NVPTR(pScrn);
414         NVPortPrivPtr pPriv  = GET_OVERLAY_PRIVATE(pNv);
415         int           buffer = pPriv->currentBuffer;
416
417         /* paint the color key */
418         if(pPriv->autopaintColorKey && (pPriv->grabbedByV4L ||
419                 !REGION_EQUAL(pScrn->pScreen, &pPriv->clip, clipBoxes))) {
420                 /* we always paint V4L's color key */
421                 if (!pPriv->grabbedByV4L)
422                         REGION_COPY(pScrn->pScreen, &pPriv->clip, clipBoxes);
423                 xf86XVFillKeyHelper(pScrn->pScreen, pPriv->colorKey, clipBoxes);
424         }
425
426         if(pNv->CurrentLayout.mode->Flags & V_DBLSCAN) {
427                 dstBox->y1 <<= 1;
428                 dstBox->y2 <<= 1;
429                 drw_h <<= 1;
430         }
431
432         nvWriteVIDEO(pNv, NV_PVIDEO_BASE(buffer)     , offset);
433         nvWriteVIDEO(pNv, NV_PVIDEO_SIZE_IN(buffer)  , (height << 16) | width);
434         nvWriteVIDEO(pNv, NV_PVIDEO_POINT_IN(buffer) ,
435                           ((y1 << 4) & 0xffff0000) | (x1 >> 12));
436         nvWriteVIDEO(pNv, NV_PVIDEO_DS_DX(buffer)    , (src_w << 20) / drw_w);
437         nvWriteVIDEO(pNv, NV_PVIDEO_DT_DY(buffer)    , (src_h << 20) / drw_h);
438         nvWriteVIDEO(pNv, NV_PVIDEO_POINT_OUT(buffer),
439                           (dstBox->y1 << 16) | dstBox->x1);
440         nvWriteVIDEO(pNv, NV_PVIDEO_SIZE_OUT(buffer) ,
441                           ((dstBox->y2 - dstBox->y1) << 16) |
442                            (dstBox->x2 - dstBox->x1));
443
444         dstPitch |= NV_PVIDEO_FORMAT_DISPLAY_COLOR_KEY;   /* use color key */
445         if(id != FOURCC_UYVY)
446                 dstPitch |= NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8;
447         if(pPriv->iturbt_709)
448                 dstPitch |= NV_PVIDEO_FORMAT_MATRIX_ITURBT709;
449
450         nvWriteVIDEO(pNv, NV_PVIDEO_FORMAT(buffer), dstPitch);
451         nvWriteVIDEO(pNv, NV_PVIDEO_STOP, 0);
452         nvWriteVIDEO(pNv, NV_PVIDEO_BUFFER, buffer ? 0x10 : 0x1);
453
454         pPriv->videoStatus = CLIENT_VIDEO_ON;
455 }
456
457 #ifndef ExaOffscreenMarkUsed
458 extern void ExaOffscreenMarkUsed(PixmapPtr);
459 #endif
460 #ifndef exaGetDrawablePixmap
461 extern PixmapPtr exaGetDrawablePixmap(DrawablePtr);
462 #endif
463 #ifndef exaPixmapIsOffscreen
464 extern Bool exaPixmapIsOffscreen(PixmapPtr p);
465 #endif
466 /* To support EXA 2.0, 2.1 has this in the header */
467 #ifndef exaMoveInPixmap
468 extern void exaMoveInPixmap(PixmapPtr pPixmap);
469 #endif
470
471 /**
472  * NVPutBlitImage
473  * 
474  * @param pScrn screen
475  * @param src_offset
476  * @param id colorspace of image
477  * @param src_pitch
478  * @param dstBox
479  * @param x1
480  * @param y1
481  * @param x2
482  * @param y2
483  * @param width
484  * @param height
485  * @param src_w
486  * @param src_h
487  * @param drw_w
488  * @param drw_h
489  * @param clipBoxes
490  * @param pDraw
491  */
492 static void
493 NVPutBlitImage(ScrnInfoPtr pScrn, int src_offset, int id,
494                int src_pitch, BoxPtr dstBox,
495                int x1, int y1, int x2, int y2,
496                short width, short height,
497                short src_w, short src_h,
498                short drw_w, short drw_h,
499                RegionPtr clipBoxes,
500                DrawablePtr pDraw)
501 {
502         NVPtr          pNv   = NVPTR(pScrn);
503         NVPortPrivPtr  pPriv = GET_BLIT_PRIVATE(pNv);
504         BoxPtr         pbox;
505         int            nbox;
506         CARD32         dsdx, dtdy;
507         CARD32         dst_size, dst_point;
508         CARD32         src_point, src_format;
509
510         if (pNv->useEXA) {
511                 ScreenPtr pScreen = pScrn->pScreen;
512                 PixmapPtr pPix    = exaGetDrawablePixmap(pDraw);
513                 int dst_format;
514
515                 /* Try to get the dest drawable into vram */
516                 if (!exaPixmapIsOffscreen(pPix)) {
517                         exaMoveInPixmap(pPix);
518                         ExaOffscreenMarkUsed(pPix);
519                 }
520
521                 /* If we failed, draw directly onto the screen pixmap.
522                  * Not sure if this is the best approach, maybe failing
523                  * with BadAlloc would be better?
524                  */
525                 if (!exaPixmapIsOffscreen(pPix)) {
526                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
527                                 "XV: couldn't move dst surface into vram\n");
528                         pPix = pScreen->GetScreenPixmap(pScreen);
529                 }
530
531                 NVAccelGetCtxSurf2DFormatFromPixmap(pPix, &dst_format);
532                 NVAccelSetCtxSurf2D(pPix, pPix, dst_format);
533
534 #ifdef COMPOSITE
535                 /* Adjust coordinates if drawing to an offscreen pixmap */
536                 if (pPix->screen_x || pPix->screen_y) {
537                         REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
538                                                              -pPix->screen_x,
539                                                              -pPix->screen_y);
540                         dstBox->x1 -= pPix->screen_x;
541                         dstBox->x2 -= pPix->screen_x;
542                         dstBox->y1 -= pPix->screen_y;
543                         dstBox->y2 -= pPix->screen_y;
544                 }
545
546                 DamageDamageRegion((DrawablePtr)pPix, clipBoxes);
547 #endif
548         } else {
549                 if (pNv->CurrentLayout.depth == 15) {
550                         NVDmaStart(pNv, NvSubContextSurfaces,
551                                         SURFACE_FORMAT, 1);
552                         NVDmaNext (pNv, SURFACE_FORMAT_X1R5G5B5);
553                 }
554         }
555
556         pbox = REGION_RECTS(clipBoxes);
557         nbox = REGION_NUM_RECTS(clipBoxes);
558
559         dsdx = (src_w << 20) / drw_w;
560         dtdy = (src_h << 20) / drw_h;
561
562         dst_size  = ((dstBox->y2 - dstBox->y1) << 16) |
563                      (dstBox->x2 - dstBox->x1);
564         dst_point = (dstBox->y1 << 16) | dstBox->x1;
565
566         src_pitch |= (STRETCH_BLIT_SRC_FORMAT_ORIGIN_CENTER << 16) |
567                     (STRETCH_BLIT_SRC_FORMAT_FILTER_BILINEAR << 24);
568         src_point = ((y1 << 4) & 0xffff0000) | (x1 >> 12);
569
570         switch(id) {
571         case FOURCC_RGB:
572                 src_format = STRETCH_BLIT_FORMAT_X8R8G8B8;
573                 break;
574         case FOURCC_UYVY:
575                 src_format = STRETCH_BLIT_FORMAT_UYVY;
576                 break;
577         default:
578                 src_format = STRETCH_BLIT_FORMAT_YUYV;
579                 break;
580         }
581
582         if(pPriv->SyncToVBlank) {
583                 NVDmaKickoff(pNv);
584                 NVWaitVSync(pScrn);
585         }
586
587         if(pNv->BlendingPossible) {
588                 NVDmaStart(pNv, NvSubScaledImage, STRETCH_BLIT_FORMAT, 2);
589                 NVDmaNext (pNv, src_format);
590                 NVDmaNext (pNv, STRETCH_BLIT_OPERATION_COPY);
591         } else {
592                 NVDmaStart(pNv, NvSubScaledImage, STRETCH_BLIT_FORMAT, 1);
593                 NVDmaNext (pNv, src_format);
594         }
595         
596         NVDmaStart(pNv, NvSubScaledImage,
597                         NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
598         NVDmaNext (pNv, NvDmaTT); /* source object */
599
600         while(nbox--) {
601                 NVDmaStart(pNv, NvSubRectangle, RECT_SOLID_COLOR, 1);
602                 NVDmaNext (pNv, 0);
603
604                 NVDmaStart(pNv, NvSubScaledImage, STRETCH_BLIT_CLIP_POINT, 6);
605                 NVDmaNext (pNv, (pbox->y1 << 16) | pbox->x1); 
606                 NVDmaNext (pNv, ((pbox->y2 - pbox->y1) << 16) |
607                                  (pbox->x2 - pbox->x1));
608                 NVDmaNext (pNv, dst_point);
609                 NVDmaNext (pNv, dst_size);
610                 NVDmaNext (pNv, dsdx);
611                 NVDmaNext (pNv, dtdy);
612
613                 NVDmaStart(pNv, NvSubScaledImage, STRETCH_BLIT_SRC_SIZE, 4);
614                 NVDmaNext (pNv, (height << 16) | width);
615                 NVDmaNext (pNv, src_pitch);
616                 NVDmaNext (pNv, src_offset);
617                 NVDmaNext (pNv, src_point);
618                 pbox++;
619         }
620
621         if (!pNv->useEXA) {
622                 if(pNv->CurrentLayout.depth == 15) {
623                         NVDmaStart(pNv, NvSubContextSurfaces,
624                                         SURFACE_FORMAT, 1);
625                         NVDmaNext (pNv, SURFACE_FORMAT_R5G6B5);
626                 }
627         }
628
629         NVDmaKickoff(pNv);
630
631         if (pNv->useEXA)
632                 exaMarkSync(pScrn->pScreen);
633         else
634                 SET_SYNC_FLAG(pNv->AccelInfoRec);
635
636         pPriv->videoStatus = FREE_TIMER;
637         pPriv->videoTime = currentTime.milliseconds + FREE_DELAY;
638         pNv->VideoTimerCallback = NVVideoTimerCallback;
639 }
640
641 /*
642  * StopVideo
643  */
644 static void
645 NVStopOverlayVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
646 {
647         NVPtr         pNv   = NVPTR(pScrn);
648         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
649
650         if(pPriv->grabbedByV4L) return;
651
652         REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
653
654         if(Exit) {
655                 if (pPriv->videoStatus & CLIENT_VIDEO_ON)
656                         NVStopOverlay(pScrn);
657                 NVFreeOverlayMemory(pScrn);
658                 pPriv->videoStatus = 0;
659         } else {
660                 if (pPriv->videoStatus & CLIENT_VIDEO_ON) {
661                         pPriv->videoStatus = OFF_TIMER | CLIENT_VIDEO_ON;
662                         pPriv->videoTime = currentTime.milliseconds + OFF_DELAY;
663                         pNv->VideoTimerCallback = NVVideoTimerCallback;
664                 }
665         }
666 }
667
668 /**
669  * NVStopBlitVideo
670  * TODO ?
671  */
672 static void
673 NVStopBlitVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
674 {
675 }
676
677 /**
678  * NVSetOverlayPortAttribute
679  * sets the attribute "attribute" of port "data" to value "value"
680  * calls NVResetVideo(pScrn) to apply changes to hardware
681  * 
682  * @param pScrenInfo
683  * @param attribute attribute to set
684  * @param value value to which attribute is to be set
685  * @param data port from which the attribute is to be set
686  * 
687  * @return Success, if setting is successful
688  * BadValue/BadMatch, if value/attribute are invalid
689  * @see NVResetVideo(ScrnInfoPtr pScrn)
690  */
691 static int
692 NVSetOverlayPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
693                           INT32 value, pointer data)
694 {
695         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
696
697         if (attribute == xvBrightness) {
698                 if ((value < -512) || (value > 512))
699                         return BadValue;
700                 pPriv->brightness = value;
701         } else
702         if (attribute == xvDoubleBuffer) {
703                 if ((value < 0) || (value > 1))
704                         return BadValue;
705                 pPriv->doubleBuffer = value;
706         } else
707         if (attribute == xvContrast) {
708                 if ((value < 0) || (value > 8191))
709                         return BadValue;
710                 pPriv->contrast = value;
711         } else
712         if (attribute == xvHue) {
713                 value %= 360;
714                 if (value < 0)
715                         value += 360;
716                 pPriv->hue = value;
717         } else
718         if (attribute == xvSaturation) {
719                 if ((value < 0) || (value > 8191))
720                         return BadValue;
721                 pPriv->saturation = value;
722         } else
723         if (attribute == xvColorKey) {
724                 pPriv->colorKey = value;
725                 REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
726         } else
727         if (attribute == xvAutopaintColorKey) {
728                 if ((value < 0) || (value > 1))
729                         return BadValue;
730                 pPriv->autopaintColorKey = value;
731         } else
732         if (attribute == xvITURBT709) {
733                 if ((value < 0) || (value > 1))
734                         return BadValue;
735                 pPriv->iturbt_709 = value;
736         } else
737         if (attribute == xvSetDefaults) {
738                 NVSetPortDefaults(pScrn, pPriv);
739         } else
740                 return BadMatch;
741
742         NVResetVideo(pScrn);
743         return Success;
744 }
745
746 /**
747  * NVGetOverlayPortAttribute
748  * 
749  * @param pScrn unused
750  * @param attribute attribute to be read
751  * @param value value of attribute will be stored in this pointer
752  * @param data port from which attribute will be read
753  * @return Success, if queried attribute exists
754  */
755 static int
756 NVGetOverlayPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
757                           INT32 *value, pointer data)
758 {
759         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
760
761         if (attribute == xvBrightness)
762                 *value = pPriv->brightness;
763         else if (attribute == xvDoubleBuffer)
764                 *value = (pPriv->doubleBuffer) ? 1 : 0;
765         else if (attribute == xvContrast)
766                 *value = pPriv->contrast;
767         else if (attribute == xvSaturation)
768                 *value = pPriv->saturation;
769         else if (attribute == xvHue)
770                 *value = pPriv->hue;
771         else if (attribute == xvColorKey)
772                 *value = pPriv->colorKey;
773         else if (attribute == xvAutopaintColorKey)
774                 *value = (pPriv->autopaintColorKey) ? 1 : 0;
775         else if (attribute == xvITURBT709)
776                 *value = (pPriv->iturbt_709) ? 1 : 0;
777         else
778                 return BadMatch;
779
780         return Success;
781 }
782
783 /**
784  * NVSetBlitPortAttribute
785  * sets the attribute "attribute" of port "data" to value "value"
786  * supported attributes:
787  * - xvSyncToVBlank (values: 0,1)
788  * - xvSetDefaults (values: NA; SyncToVBlank will be set, if hardware supports it)
789  * 
790  * @param pScrenInfo
791  * @param attribute attribute to set
792  * @param value value to which attribute is to be set
793  * @param data port from which the attribute is to be set
794  * 
795  * @return Success, if setting is successful
796  * BadValue/BadMatch, if value/attribute are invalid
797  */
798 static int
799 NVSetBlitPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
800                        INT32 value, pointer data)
801 {
802         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
803         NVPtr           pNv = NVPTR(pScrn);
804
805         if ((attribute == xvSyncToVBlank) && pNv->WaitVSyncPossible) {
806                 if ((value < 0) || (value > 1))
807                         return BadValue;
808                 pPriv->SyncToVBlank = value;
809         } else
810         if (attribute == xvSetDefaults) {
811                 pPriv->SyncToVBlank = pNv->WaitVSyncPossible;
812         } else
813                 return BadMatch;
814
815         return Success;
816 }
817
818 /**
819  * NVGetBlitPortAttribute
820  * reads the value of attribute "attribute" from port "data" into INT32 "*value"
821  * currently only one attribute supported: xvSyncToVBlank
822  * 
823  * @param pScrn unused
824  * @param attribute attribute to be read
825  * @param value value of attribute will be stored here
826  * @param data port from which attribute will be read
827  * @return Success, if queried attribute exists
828  */
829 static int
830 NVGetBlitPortAttribute(ScrnInfoPtr pScrn, Atom attribute,
831                        INT32 *value, pointer data)
832 {
833         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
834
835         if(attribute == xvSyncToVBlank)
836                 *value = (pPriv->SyncToVBlank) ? 1 : 0;
837         else
838                 return BadMatch;
839
840         return Success;
841 }
842
843
844 /**
845  * QueryBestSize
846  * used by client applications to ask the driver:
847  * how would you actually scale a video of dimensions
848  * vid_w, vid_h, if i wanted you to scale it to dimensions
849  * drw_w, drw_h?
850  * function stores actual scaling size in pointers p_w, p_h.
851  * 
852  * - currently the image cannot be scaled to less than
853  * 1/8th of the original size in either dimension. why?
854  * - what happens if the client requests a scaling to a larger value than
855  * the hardware is capable of (IMAGE_MAX_W, IMAGE_MAX_H)?
856  * 
857  * @param pScrn unused
858  * @param motion unused
859  * @param vid_w width of source video
860  * @param vid_h height of source video
861  * @param drw_w desired scaled width as requested by client
862  * @param drw_h desired scaled height as requested by client
863  * @param p_w actual scaled width as the driver is capable of
864  * @param p_h actual scaled height as the driver is capable of
865  * @param data unused
866  */
867 static void
868 NVQueryBestSize(ScrnInfoPtr pScrn, Bool motion,
869                 short vid_w, short vid_h, 
870                 short drw_w, short drw_h, 
871                 unsigned int *p_w, unsigned int *p_h, 
872                 pointer data)
873 {
874         if(vid_w > (drw_w << 3))
875                 drw_w = vid_w >> 3;
876         if(vid_h > (drw_h << 3))
877                 drw_h = vid_h >> 3;
878
879         *p_w = drw_w;
880         *p_h = drw_h; 
881 }
882
883 /**
884  * NVCopyData420
885  * used by NVPutImage() function to copy (image)data from
886  * system RAM to VRAM and change data order.
887  * 
888  * @param src1 source buffer of luma
889  * @param src2 source buffer of chroma1
890  * @param src3 source buffer of chroma2
891  * @param dst1 destination buffer
892  * @param srcPitch pitch of src1
893  * @param srcPitch2 pitch of src2, src3
894  * @param dstPitch pitch of dst1
895  * @param h number of lines to copy
896  * @param w length of lines to copy
897  */
898 static inline void NVCopyData420(unsigned char *src1, unsigned char *src2,
899                           unsigned char *src3, unsigned char *dst1,
900                           int srcPitch, int srcPitch2,
901                           int dstPitch,
902                           int h, int w)
903 {
904         CARD32 *dst;
905         CARD8 *s1, *s2, *s3;
906         int i, j;
907
908         w >>= 1;
909
910         for (j = 0; j < h; j++) {
911                 dst = (CARD32*)dst1;
912                 s1 = src1;  s2 = src2;  s3 = src3;
913                 i = w;
914
915                 while (i > 4) { // wouldn't it be better to write (i >= 4) ?
916 #if X_BYTE_ORDER == X_BIG_ENDIAN
917                 dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
918                 dst[1] = (s1[2] << 24) | (s1[3] << 8) | (s3[1] << 16) | s2[1];
919                 dst[2] = (s1[4] << 24) | (s1[5] << 8) | (s3[2] << 16) | s2[2];
920                 dst[3] = (s1[6] << 24) | (s1[7] << 8) | (s3[3] << 16) | s2[3];
921 #else
922                 dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
923                 dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
924                 dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
925                 dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
926 #endif
927                 dst += 4; s2 += 4; s3 += 4; s1 += 8;
928                 i -= 4;
929                 }
930
931                 while (i--) {
932 #if X_BYTE_ORDER == X_BIG_ENDIAN
933                 dst[0] = (s1[0] << 24) | (s1[1] << 8) | (s3[0] << 16) | s2[0];
934 #else
935                 dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
936 #endif
937                 dst++; s2++; s3++;
938                 s1 += 2;
939                 }
940
941                 dst1 += dstPitch;
942                 src1 += srcPitch;
943                 if (j & 1) {
944                         src2 += srcPitch2;
945                         src3 += srcPitch2;
946                 }
947         }
948 }
949
950
951 /**
952  * NVPutImage
953  * PutImage is "the" important function of the Xv extension.
954  * a client (e.g. video player) calls this function for every
955  * image (of the video) to be displayed. this function then
956  * scales and displays the image.
957  * 
958  * @param pScrn screen which hold the port where the image is put
959  * @param src_x
960  * @param src_y
961  * @param src_w
962  * @param src_h
963  * @param drw_x
964  * @param drw_y
965  * @param drw_w
966  * @param drw_h
967  * @param id colorspace of image
968  * @param buf pointer to buffer containing the source image
969  * @param width
970  * @param height
971  * @param Sync unused
972  * @param clipBoxes
973  * @param data pointer to port 
974  * @param pDraw
975  */
976 static int
977 NVPutImage(ScrnInfoPtr  pScrn, short src_x, short src_y,
978                                    short drw_x, short drw_y,
979                                    short src_w, short src_h, 
980                                    short drw_w, short drw_h,
981                                    int id,
982                                    unsigned char *buf, 
983                                    short width, short height, 
984                                    Bool         Sync,
985                                    RegionPtr    clipBoxes,
986                                    pointer      data,
987                                    DrawablePtr  pDraw
988 )
989 {
990         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
991         NVPtr pNv = NVPTR(pScrn);
992         INT32 xa, xb, ya, yb;
993         int newSize, offset, s2offset, s3offset;
994         int srcPitch, srcPitch2, dstPitch;
995         int top, left, right, bottom, npixels, nlines, bpp;
996         Bool skip = FALSE;
997         BoxRec dstBox;
998         CARD32 tmp;
999         int line_len;
1000         
1001         /* s2offset, s3offset - byte offsets into U and V plane of the
1002          *                      source where copying starts.  YV12 is indeed one plane of Y and two subsampled planes of U and V
1003          * offset - byte offset to the first line of the destination.
1004          * dst_start - byte address to the first displayed pel.
1005          */
1006
1007         if (pPriv->grabbedByV4L)
1008                 return Success;
1009
1010         /* make the compiler happy */
1011         s2offset = s3offset = srcPitch2 = 0;
1012
1013         if (!pPriv->blitter) { /* overlay hardware scaler limitation */
1014                 if (src_w > (drw_w << 3))
1015                         drw_w = src_w >> 3;
1016                 if (src_h > (drw_h << 3))
1017                         drw_h = src_h >> 3;
1018         }
1019
1020         /* Clip */
1021         xa = src_x;
1022         xb = src_x + src_w;
1023         ya = src_y;
1024         yb = src_y + src_h;
1025
1026         dstBox.x1 = drw_x;
1027         dstBox.x2 = drw_x + drw_w;
1028         dstBox.y1 = drw_y;
1029         dstBox.y2 = drw_y + drw_h;
1030
1031         if (!xf86XVClipVideoHelper(&dstBox, &xa, &xb, &ya, &yb, clipBoxes,
1032                                    width, height))
1033                 return Success;
1034
1035         if (!pPriv->blitter) {
1036                 dstBox.x1 -= pScrn->frameX0;
1037                 dstBox.x2 -= pScrn->frameX0;
1038                 dstBox.y1 -= pScrn->frameY0;
1039                 dstBox.y2 -= pScrn->frameY0;
1040         }
1041
1042         
1043         /* determine required memory size */
1044         bpp = pScrn->bitsPerPixel >> 3; // bytes per pixel
1045
1046         switch(id) {
1047         case FOURCC_YV12:
1048         case FOURCC_I420:
1049                 srcPitch = (width + 3) & ~3;    /* of luma */
1050                 s2offset = srcPitch * height;
1051                 srcPitch2 = ((width >> 1) + 3) & ~3; /*of chroma*/
1052                 s3offset = (srcPitch2 * (height >> 1)) + s2offset;
1053                 dstPitch = ((width << 1) + 63) & ~63;
1054                 break;
1055         case FOURCC_UYVY:
1056         case FOURCC_YUY2:
1057                 srcPitch = width << 1;
1058                 dstPitch = ((width << 1) + 63) & ~63;
1059                 break;
1060         case FOURCC_RGB:
1061                 srcPitch = width << 2;
1062                 dstPitch = ((width << 2) + 63) & ~63;
1063                 break;
1064         default:
1065                 return BadImplementation;
1066         }
1067         /* dstPitch = number of bytes per row
1068          but the allocation is done is pixel, hence the division to get the real number of bytes */
1069         newSize = height * dstPitch / bpp;
1070         
1071         if (pPriv->doubleBuffer) // double buffering ...
1072                 newSize <<= 1; // ... means double the amount of VRAM needed
1073         
1074         pPriv->video_mem = NVAllocateVideoMemory(pScrn, pPriv->video_mem, 
1075                                                               newSize);
1076         if (!pPriv->video_mem)
1077                 return BadAlloc;
1078
1079         offset = pPriv->video_mem->offset;
1080         if (pPriv->doubleBuffer) {
1081                 int mask = 1 << (pPriv->currentBuffer << 2);
1082
1083                 /* overwrite the newest buffer if there's not one free */
1084                 if (nvReadVIDEO(pNv, NV_PVIDEO_BUFFER) & mask) {
1085                         if (!pPriv->currentBuffer)
1086                                 offset += (height + 1) * dstPitch;
1087                         skip = TRUE;
1088                 } else
1089
1090                 if (pPriv->currentBuffer)
1091                         offset += (height + 1) * dstPitch;
1092         }
1093
1094
1095         /* We need to enlarge the copied rectangle by a pixel so the HW
1096          * filtering doesn't pick up junk laying outside of the source */
1097         /* fixed point arithmetic */
1098         left = (xa - 0x00010000) >> 16;
1099         if (left < 0) left = 0;
1100         top = (ya - 0x00010000) >> 16;
1101         if (top < 0) top = 0;
1102         right = (xb + 0x0001ffff) >> 16;
1103         if (right > width) right = width;
1104         bottom = (yb + 0x0001ffff) >> 16;
1105         if (bottom > height) bottom = height;
1106
1107         if(pPriv->blitter) NVSync(pScrn);
1108
1109         
1110         switch(id) {
1111         case FOURCC_YV12:
1112         case FOURCC_I420:
1113                 left &= ~1;
1114                 npixels = ((right + 1) & ~1) - left;
1115                 top &= ~1;
1116                 nlines = ((bottom + 1) & ~1) - top;
1117
1118                 offset += (left << 1) + (top * dstPitch);
1119                 tmp = ((top >> 1) * srcPitch2) + (left >> 1);
1120                 s2offset += tmp;
1121                 s3offset += tmp;
1122                 if (id == FOURCC_I420) {
1123                         tmp = s2offset;
1124                         s2offset = s3offset;
1125                         s3offset = tmp;
1126                 }
1127                 line_len = dstPitch;
1128                 break;
1129         case FOURCC_UYVY:
1130         case FOURCC_YUY2:
1131                 left &= ~1;
1132                 npixels = ((right + 1) & ~1) - left;
1133                 nlines = bottom - top;
1134
1135                 left <<= 1;
1136                 buf += (top * srcPitch) + left;
1137                 offset += left + (top * dstPitch);
1138                 line_len = width << 1;
1139                 break;
1140         case FOURCC_RGB:
1141                 npixels = right - left;
1142                 nlines = bottom - top;
1143                 left <<= 2;
1144                 buf += (top * srcPitch) + left;
1145                 offset += left + (top * dstPitch);
1146                 line_len = width << 2;
1147                 break;
1148         default:
1149                 return BadImplementation;
1150         }
1151
1152         //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Got fence handle %lld\n", fence_id);
1153
1154         /*Now we take a decision regarding the way we send the data to the card.
1155         Either we use double buffering of "private" TT memory
1156         Either we rely on X's GARTScratch 
1157         Either we fallback on CPU copy
1158         */
1159         pPriv->TT_mem_chunk[0] = NVAllocateTTMemory(pScrn, pPriv->TT_mem_chunk[0], 
1160                                                               newSize);
1161         pPriv->TT_mem_chunk[1] = NVAllocateTTMemory(pScrn, pPriv->TT_mem_chunk[1], 
1162                                                               newSize);
1163         
1164         
1165         NVAllocRec * destination_buffer;
1166         
1167         if ( pPriv->TT_mem_chunk[pPriv->currentHostBuffer] )
1168                 {
1169                 destination_buffer = pPriv->TT_mem_chunk[pPriv->currentHostBuffer];
1170                 xf86DrvMsg(0, X_INFO, "Using private TT memory chunk #%d\n", pPriv->currentHostBuffer);
1171                 }
1172         else 
1173                 {
1174                 destination_buffer = pNv->GARTScratch;
1175                 xf86DrvMsg(0, X_INFO, "Using global GART memory chunk\n", pPriv->currentHostBuffer);
1176                 }
1177         
1178         /*Below is *almost* a copypaste from NvAccelUploadM2MF, cannot use it directly because of YV12 -> YUY2 conversion */    
1179         if ( nlines * line_len <= destination_buffer->size)
1180                 {
1181                 unsigned char *dst = destination_buffer->map;
1182                 
1183                 /* Upload to GART */
1184                 switch(id) {
1185                 case FOURCC_YV12:
1186                 case FOURCC_I420:
1187                 
1188                         NVCopyData420(buf + (top * srcPitch) + left,
1189                                 buf + s2offset, buf + s3offset,
1190                                 dst, srcPitch, srcPitch2,
1191                                 dstPitch, nlines, npixels);
1192                         
1193                         break;
1194                 case FOURCC_UYVY:
1195                 case FOURCC_YUY2:
1196                 case FOURCC_RGB:
1197                         memcpy(dst, buf, srcPitch * nlines);
1198                         break;
1199                 default:
1200                         return BadImplementation;
1201                 }
1202                 
1203                 if ( !pPriv -> blitter ) 
1204                         {
1205                         NVDmaStart(pNv, NvSubMemFormat, MEMFORMAT_DMA_OBJECT_IN, 2);
1206                         NVDmaNext (pNv, NvDmaTT);
1207                         NVDmaNext (pNv, NvDmaFB);
1208                         pNv->M2MFDirection = 1;
1209                 
1210                         /* DMA to VRAM */
1211                         
1212                         NVDmaStart(pNv, NvSubMemFormat,
1213                                 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
1214                         NVDmaNext (pNv, (uint32_t)destination_buffer->offset);
1215                         NVDmaNext (pNv, (uint32_t)offset);
1216                         NVDmaNext (pNv, line_len);
1217                         NVDmaNext (pNv, dstPitch);
1218                         NVDmaNext (pNv, line_len);
1219                         NVDmaNext (pNv, nlines);
1220                         NVDmaNext (pNv, (1<<8)|1);
1221                         NVDmaNext (pNv, 0);
1222
1223                         NVNotifierReset(pScrn, pNv->Notifier0);
1224                         NVDmaStart(pNv, NvSubMemFormat,
1225                                 NV_MEMORY_TO_MEMORY_FORMAT_NOTIFY, 1);
1226                         NVDmaNext (pNv, 0);
1227                         NVDmaStart(pNv, NvSubMemFormat, 0x100, 1);
1228                         NVDmaNext (pNv, 0);
1229                         NVDmaKickoff(pNv);
1230
1231                         if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 0))
1232                                 return FALSE;
1233                         }
1234                 else 
1235                         {
1236                         NVDmaStart(pNv, NvSubScaledImage, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
1237                         NVDmaNext (pNv, NvDmaTT); /* source object */
1238                         
1239                         NVPutBlitImage(pScrn, destination_buffer->offset, id,
1240                                        dstPitch, &dstBox,
1241                                        xa, ya, xb, yb,
1242                                        width, height,
1243                                        src_w, src_h, drw_w, drw_h,
1244                                        clipBoxes, pDraw);
1245                         
1246                         NVNotifierReset(pScrn, pNv->Notifier0);
1247                         NVDmaStart(pNv, NvSubScaledImage,
1248                                 NV10_IMAGE_BLIT_NOTIFY, 1);
1249                         NVDmaNext (pNv, 0);
1250                         NVDmaStart(pNv, NvSubScaledImage, 0x100, 1);
1251                         NVDmaNext (pNv, 0);
1252                                 
1253                         NVDmaStart(pNv, NvSubScaledImage, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
1254                         NVDmaNext (pNv, NvDmaFB); /* source object */
1255                         NVDmaKickoff(pNv);
1256                         if (!NVNotifierWaitStatus(pScrn, pNv->Notifier0, 0, 0))
1257                                 return FALSE;
1258                         return Success;
1259                         }
1260                 }
1261         else //GART is too small, we fallback on CPU copy for simplicity
1262                 {
1263                 xf86DrvMsg(0, X_ERROR, "Fallback on CPU copy not implemented yet\n");
1264                 }
1265                 
1266         pPriv->currentHostBuffer ^= 1;
1267                 
1268         if (!skip) {
1269                 if (pPriv->blitter) {
1270                         NVPutBlitImage(pScrn, offset, id,
1271                                        dstPitch, &dstBox,
1272                                        xa, ya, xb, yb,
1273                                        width, height,
1274                                        src_w, src_h, drw_w, drw_h,
1275                                        clipBoxes, pDraw);
1276                 } else {
1277                         NVPutOverlayImage(pScrn, offset, id,
1278                                           dstPitch, &dstBox, 
1279                                           xa, ya, xb, yb,
1280                                           width, height,
1281                                           src_w, src_h, drw_w, drw_h,
1282                                           clipBoxes);
1283                         pPriv->currentBuffer ^= 1;
1284                 }
1285         }
1286         
1287
1288         return Success;
1289 }
1290
1291 /**
1292  * QueryImageAttributes
1293  * 
1294  * calculates
1295  * - size (memory required to store image),
1296  * - pitches,
1297  * - offsets
1298  * of image
1299  * depending on colorspace (id) and dimensions (w,h) of image
1300  * values of
1301  * - w,
1302  * - h
1303  * may be adjusted as needed
1304  * 
1305  * @param pScrn unused
1306  * @param id colorspace of image
1307  * @param w pointer to width of image
1308  * @param h pointer to height of image
1309  * @param pitches pitches[i] = length of a scanline in plane[i]
1310  * @param offsets offsets[i] = offset of plane i from the beginning of the image
1311  * @return size of the memory required for the XvImage queried
1312  */
1313 static int
1314 NVQueryImageAttributes(ScrnInfoPtr pScrn, int id, 
1315                        unsigned short *w, unsigned short *h, 
1316                        int *pitches, int *offsets)
1317 {
1318         int size, tmp;
1319
1320         if (*w > IMAGE_MAX_W)
1321                 *w = IMAGE_MAX_W;
1322         if (*h > IMAGE_MAX_H)
1323                 *h = IMAGE_MAX_H;
1324
1325         *w = (*w + 1) & ~1; // width rounded up to an even number
1326         if (offsets)
1327                 offsets[0] = 0;
1328
1329         switch (id) {
1330         case FOURCC_YV12:
1331         case FOURCC_I420:
1332                 *h = (*h + 1) & ~1; // height rounded up to an even number
1333                 size = (*w + 3) & ~3; // width rounded up to a multiple of 4
1334                 if (pitches)
1335                         pitches[0] = size; // width rounded up to a multiple of 4
1336                 size *= *h;
1337                 if (offsets)
1338                         offsets[1] = size; // number of pixels in "rounded up" image
1339                 tmp = ((*w >> 1) + 3) & ~3; // width/2 rounded up to a multiple of 4
1340                 if (pitches)
1341                         pitches[1] = pitches[2] = tmp; // width/2 rounded up to a multiple of 4
1342                 tmp *= (*h >> 1); // 1/4*number of pixels in "rounded up" image
1343                 size += tmp; // 5/4*number of pixels in "rounded up" image
1344                 if (offsets)
1345                         offsets[2] = size; // 5/4*number of pixels in "rounded up" image
1346                 size += tmp; // = 3/2*number of pixels in "rounded up" image
1347                 break;
1348         case FOURCC_UYVY:
1349         case FOURCC_YUY2:
1350                 size = *w << 1; // 2*width
1351                 if (pitches)
1352                         pitches[0] = size; // 2*width
1353                 size *= *h; // 2*width*height
1354                 break;
1355         case FOURCC_RGB:
1356                 size = *w << 2; // 4*width (32 bit per pixel)
1357                 if (pitches)
1358                         pitches[0] = size; // 4*width
1359                 size *= *h; // 4*width*height
1360                 break;
1361         default:
1362                 *w = *h = size = 0;
1363                 break;
1364         }
1365
1366         return size;
1367 }
1368
1369 /***** Exported offscreen surface stuff ****/
1370
1371
1372 static int
1373 NVAllocSurface(ScrnInfoPtr pScrn, int id,
1374                unsigned short w, unsigned short h,
1375                XF86SurfacePtr surface)
1376 {
1377         NVPtr pNv = NVPTR(pScrn);
1378         NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv); 
1379         int size, bpp;
1380
1381         bpp = pScrn->bitsPerPixel >> 3;
1382
1383         if (pPriv->grabbedByV4L)
1384                 return BadAlloc;
1385
1386         if ((w > IMAGE_MAX_W) || (h > IMAGE_MAX_H))
1387                 return BadValue;
1388
1389         w = (w + 1) & ~1;
1390         pPriv->pitch = ((w << 1) + 63) & ~63;
1391         size = h * pPriv->pitch / bpp;
1392
1393         pPriv->video_mem = NVAllocateVideoMemory(pScrn,
1394                                                    pPriv->video_mem,
1395                                                    size);
1396         if (!pPriv->video_mem)
1397                 return BadAlloc;
1398
1399         pPriv->offset = 0;
1400         
1401         surface->width = w;
1402         surface->height = h;
1403         surface->pScrn = pScrn;
1404         surface->pitches = &pPriv->pitch; 
1405         surface->offsets = &pPriv->offset;
1406         surface->devPrivate.ptr = (pointer)pPriv;
1407         surface->id = id;
1408
1409         /* grab the video */
1410         NVStopOverlay(pScrn);
1411         pPriv->videoStatus = 0;
1412         REGION_EMPTY(pScrn->pScreen, &pPriv->clip);
1413         pPriv->grabbedByV4L = TRUE;
1414
1415         return Success;
1416 }
1417
1418 static int
1419 NVStopSurface(XF86SurfacePtr surface)
1420 {
1421         NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1422
1423         if (pPriv->grabbedByV4L && pPriv->videoStatus) {
1424                 NVStopOverlay(surface->pScrn);
1425                 pPriv->videoStatus = 0;
1426         }
1427
1428         return Success;
1429 }
1430
1431 static int 
1432 NVFreeSurface(XF86SurfacePtr surface)
1433 {
1434         NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1435
1436         if (pPriv->grabbedByV4L) {
1437                 NVStopSurface(surface);
1438                 NVFreeOverlayMemory(surface->pScrn);
1439                 pPriv->grabbedByV4L = FALSE;
1440         }
1441
1442         return Success;
1443 }
1444
1445 static int
1446 NVGetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 *value)
1447 {
1448         NVPtr pNv = NVPTR(pScrn);
1449         NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1450
1451         return NVGetOverlayPortAttribute(pScrn, attribute,
1452                                          value, (pointer)pPriv);
1453 }
1454
1455 static int
1456 NVSetSurfaceAttribute(ScrnInfoPtr pScrn, Atom attribute, INT32 value)
1457 {
1458         NVPtr pNv = NVPTR(pScrn);
1459         NVPortPrivPtr pPriv = GET_OVERLAY_PRIVATE(pNv);
1460
1461         return NVSetOverlayPortAttribute(pScrn, attribute,
1462                                          value, (pointer)pPriv);
1463 }
1464
1465 static int
1466 NVDisplaySurface(XF86SurfacePtr surface,
1467                  short src_x, short src_y, 
1468                  short drw_x, short drw_y,
1469                  short src_w, short src_h, 
1470                  short drw_w, short drw_h,
1471                  RegionPtr clipBoxes)
1472 {
1473         ScrnInfoPtr pScrn = surface->pScrn;
1474         NVPortPrivPtr pPriv = (NVPortPrivPtr)(surface->devPrivate.ptr);
1475         INT32 xa, xb, ya, yb;
1476         BoxRec dstBox;
1477
1478         if (!pPriv->grabbedByV4L)
1479                 return Success;
1480
1481         if (src_w > (drw_w << 3))
1482                 drw_w = src_w >> 3;
1483         if (src_h > (drw_h << 3))
1484                 drw_h = src_h >> 3;
1485
1486         /* Clip */
1487         xa = src_x;
1488         xb = src_x + src_w;
1489         ya = src_y;
1490         yb = src_y + src_h;
1491
1492         dstBox.x1 = drw_x;
1493         dstBox.x2 = drw_x + drw_w;
1494         dstBox.y1 = drw_y;
1495         dstBox.y2 = drw_y + drw_h;
1496
1497         if(!xf86XVClipVideoHelper(&dstBox, &xa, &xb, &ya, &yb, clipBoxes, 
1498                                   surface->width, surface->height))
1499                 return Success;
1500
1501         dstBox.x1 -= pScrn->frameX0;
1502         dstBox.x2 -= pScrn->frameX0;
1503         dstBox.y1 -= pScrn->frameY0;
1504         dstBox.y2 -= pScrn->frameY0;
1505
1506         pPriv->currentBuffer = 0;
1507
1508         NVPutOverlayImage(pScrn, surface->offsets[0], surface->id,
1509                           surface->pitches[0], &dstBox, xa, ya, xb, yb,
1510                           surface->width, surface->height, src_w, src_h,
1511                           drw_w, drw_h, clipBoxes);
1512
1513         return Success;
1514 }
1515
1516 /**
1517  * NVSetupBlitVideo
1518  * this function does all the work setting up a blit port
1519  * 
1520  * @return blit port
1521  */
1522 static XF86VideoAdaptorPtr
1523 NVSetupBlitVideo (ScreenPtr pScreen)
1524 {
1525         ScrnInfoPtr         pScrn = xf86Screens[pScreen->myNum];
1526         NVPtr               pNv       = NVPTR(pScrn);
1527         XF86VideoAdaptorPtr adapt;
1528         NVPortPrivPtr       pPriv;
1529         int i;
1530
1531         if (!(adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec) +
1532                                         sizeof(NVPortPrivRec) +
1533                                         (sizeof(DevUnion) * NUM_BLIT_PORTS)))) {
1534                 return NULL;
1535         }
1536
1537         adapt->type             = XvWindowMask | XvInputMask | XvImageMask;
1538         adapt->flags            = 0;
1539         adapt->name             = "NV Video Blitter";
1540         adapt->nEncodings       = 1;
1541         adapt->pEncodings       = &DummyEncoding;
1542         adapt->nFormats         = NUM_FORMATS_ALL;
1543         adapt->pFormats         = NVFormats;
1544         adapt->nPorts           = NUM_BLIT_PORTS;
1545         adapt->pPortPrivates    = (DevUnion*)(&adapt[1]);
1546
1547         pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[NUM_BLIT_PORTS]);
1548         for(i = 0; i < NUM_BLIT_PORTS; i++)
1549                 adapt->pPortPrivates[i].ptr = (pointer)(pPriv);
1550
1551         if(pNv->WaitVSyncPossible) {
1552                 adapt->pAttributes = NVBlitAttributes;
1553                 adapt->nAttributes = NUM_BLIT_ATTRIBUTES;
1554         } else {
1555                 adapt->pAttributes = NULL;
1556                 adapt->nAttributes = 0;
1557         }
1558
1559         adapt->pImages                  = NVImages;
1560         adapt->nImages                  = NUM_IMAGES_ALL;
1561         adapt->PutVideo                 = NULL;
1562         adapt->PutStill                 = NULL;
1563         adapt->GetVideo                 = NULL;
1564         adapt->GetStill                 = NULL;
1565         adapt->StopVideo                = NVStopBlitVideo;
1566         adapt->SetPortAttribute         = NVSetBlitPortAttribute;
1567         adapt->GetPortAttribute         = NVGetBlitPortAttribute;
1568         adapt->QueryBestSize            = NVQueryBestSize;
1569         adapt->PutImage                 = NVPutImage;
1570         adapt->QueryImageAttributes     = NVQueryImageAttributes;
1571
1572         pPriv->videoStatus              = 0;
1573         pPriv->grabbedByV4L             = FALSE;
1574         pPriv->blitter                  = TRUE;
1575         pPriv->doubleBuffer             = FALSE;
1576         pPriv->SyncToVBlank             = pNv->WaitVSyncPossible;
1577
1578         pNv->blitAdaptor                = adapt;
1579         xvSyncToVBlank                  = MAKE_ATOM("XV_SYNC_TO_VBLANK");
1580
1581         return adapt;
1582 }
1583
1584 /**
1585  * NV10SetupOverlayVideo
1586  * this function does all the work setting up an overlay port
1587  * 
1588  * @return overlay port
1589  * @see NVResetVideo(ScrnInfoPtr pScrn)
1590  */
1591 static XF86VideoAdaptorPtr 
1592 NV10SetupOverlayVideo(ScreenPtr pScreen)
1593 {
1594         ScrnInfoPtr         pScrn = xf86Screens[pScreen->myNum];
1595         NVPtr               pNv       = NVPTR(pScrn);
1596         XF86VideoAdaptorPtr adapt;
1597         NVPortPrivPtr       pPriv;
1598
1599         if (!(adapt = xcalloc(1, sizeof(XF86VideoAdaptorRec) + 
1600                                         sizeof(NVPortPrivRec) + 
1601                                         sizeof(DevUnion)))) {
1602                 return NULL;
1603         }
1604
1605         adapt->type             = XvWindowMask | XvInputMask | XvImageMask;
1606         adapt->flags            = VIDEO_OVERLAID_IMAGES|VIDEO_CLIP_TO_VIEWPORT;
1607         adapt->name             = "NV Video Overlay";
1608         adapt->nEncodings       = 1;
1609         adapt->pEncodings       = &DummyEncoding;
1610         adapt->nFormats         = NUM_FORMATS_ALL;
1611         adapt->pFormats         = NVFormats;
1612         adapt->nPorts           = 1;
1613         adapt->pPortPrivates    = (DevUnion*)(&adapt[1]);
1614
1615         pPriv = (NVPortPrivPtr)(&adapt->pPortPrivates[1]);
1616         adapt->pPortPrivates[0].ptr     = (pointer)(pPriv);
1617
1618         adapt->pAttributes              = NVOverlayAttributes;
1619         adapt->nAttributes              = NUM_OVERLAY_ATTRIBUTES;
1620         adapt->pImages                  = NVImages;
1621         adapt->nImages                  = NUM_IMAGES_YUV;
1622         adapt->PutVideo                 = NULL;
1623         adapt->PutStill                 = NULL;
1624         adapt->GetVideo                 = NULL;
1625         adapt->GetStill                 = NULL;
1626         adapt->StopVideo                = NVStopOverlayVideo;
1627         adapt->SetPortAttribute         = NVSetOverlayPortAttribute;
1628         adapt->GetPortAttribute         = NVGetOverlayPortAttribute;
1629         adapt->QueryBestSize            = NVQueryBestSize;
1630         adapt->PutImage                 = NVPutImage;
1631         adapt->QueryImageAttributes     = NVQueryImageAttributes;
1632
1633         pPriv->videoStatus              = 0;
1634         pPriv->currentBuffer            = 0;
1635         pPriv->grabbedByV4L             = FALSE;
1636         pPriv->blitter                  = FALSE;
1637
1638         NVSetPortDefaults (pScrn, pPriv);
1639
1640         /* gotta uninit this someplace */
1641         REGION_NULL(pScreen, &pPriv->clip);
1642
1643         pNv->overlayAdaptor     = adapt;
1644
1645         xvBrightness            = MAKE_ATOM("XV_BRIGHTNESS");
1646         xvDoubleBuffer          = MAKE_ATOM("XV_DOUBLE_BUFFER");
1647         xvContrast              = MAKE_ATOM("XV_CONTRAST");
1648         xvColorKey              = MAKE_ATOM("XV_COLORKEY");
1649         xvSaturation            = MAKE_ATOM("XV_SATURATION");
1650         xvHue                   = MAKE_ATOM("XV_HUE");
1651         xvAutopaintColorKey     = MAKE_ATOM("XV_AUTOPAINT_COLORKEY");
1652         xvSetDefaults           = MAKE_ATOM("XV_SET_DEFAULTS");
1653         xvITURBT709             = MAKE_ATOM("XV_ITURBT_709");
1654
1655         NVResetVideo(pScrn);
1656
1657         return adapt;
1658 }
1659
1660 XF86OffscreenImageRec NVOffscreenImages[2] = {
1661         {
1662                 &NVImages[0],
1663                 VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1664                 NVAllocSurface,
1665                 NVFreeSurface,
1666                 NVDisplaySurface,
1667                 NVStopSurface,
1668                 NVGetSurfaceAttribute,
1669                 NVSetSurfaceAttribute,
1670                 IMAGE_MAX_W, IMAGE_MAX_H,
1671                 NUM_OVERLAY_ATTRIBUTES - 1,
1672                 &NVOverlayAttributes[1]
1673         },
1674         {
1675                 &NVImages[2],
1676                 VIDEO_OVERLAID_IMAGES | VIDEO_CLIP_TO_VIEWPORT,
1677                 NVAllocSurface,
1678                 NVFreeSurface,
1679                 NVDisplaySurface,
1680                 NVStopSurface,
1681                 NVGetSurfaceAttribute,
1682                 NVSetSurfaceAttribute,
1683                 IMAGE_MAX_W, IMAGE_MAX_H,
1684                 NUM_OVERLAY_ATTRIBUTES - 1,
1685                 &NVOverlayAttributes[1]
1686         }
1687 };
1688
1689 static void
1690 NVInitOffscreenImages (ScreenPtr pScreen)
1691 {
1692         xf86XVRegisterOffscreenImages(pScreen, NVOffscreenImages, 2);
1693 }
1694
1695 /**
1696  * NVChipsetHasOverlay
1697  * 
1698  * newer chips don't support overlay anymore.
1699  * overlay feature is emulated via textures.
1700  * 
1701  * @param pNv 
1702  * @return true, if chipset supports overlay
1703  */
1704 static Bool
1705 NVChipsetHasOverlay(NVPtr pNv)
1706 {
1707         switch (pNv->Architecture) {
1708         case NV_ARCH_10:
1709         case NV_ARCH_20:
1710         case NV_ARCH_30:
1711                 return TRUE;
1712         case NV_ARCH_40:
1713                 if ((pNv->Chipset & 0xfff0) == CHIPSET_NV40)
1714                         return TRUE;
1715                 break;
1716         default:
1717                 break;
1718         }
1719
1720         return FALSE;
1721 }
1722
1723 /**
1724  * NVSetupOverlayVideo
1725  * check if chipset supports Overlay and CompositeExtension is disabled.
1726  * if so, setup overlay port
1727  * 
1728  * @return overlay port
1729  * @see NVChipsetHasOverlay(NVPtr pNv)
1730  * @see NV10SetupOverlayVideo(ScreenPtr pScreen)
1731  * @see NVInitOffscreenImages(ScreenPtr pScreen)
1732  */
1733 static XF86VideoAdaptorPtr
1734 NVSetupOverlayVideo(ScreenPtr pScreen)
1735 {
1736         ScrnInfoPtr          pScrn = xf86Screens[pScreen->myNum];
1737         XF86VideoAdaptorPtr  overlayAdaptor = NULL;
1738         NVPtr                pNv   = NVPTR(pScrn);
1739
1740         if (!NVChipsetHasOverlay(pNv))
1741                 return NULL;
1742
1743         /*XXX: Do we still want to provide the overlay anyway, but make the
1744          *     blit adaptor the default if composite is enabled?
1745          */
1746 #ifdef COMPOSITE
1747 /*      if (!noCompositeExtension) {
1748                 xf86DrvMsg(pScrn->scrnIndex, X_INFO,
1749                         "XV: Video overlay not available, composite enabled\n");
1750                 return NULL;
1751         }*/
1752 #endif
1753         overlayAdaptor = NV10SetupOverlayVideo(pScreen);
1754         if (overlayAdaptor)
1755                 NVInitOffscreenImages(pScreen);
1756
1757         return overlayAdaptor;
1758 }
1759
1760 /**
1761  * NVInitVideo
1762  * tries to initialize one new overlay port and one new blit port
1763  * and add them to the list of ports on screen "pScreen".
1764  * 
1765  * @param pScreen
1766  * @see NVSetupOverlayVideo(ScreenPtr pScreen)
1767  * @see NVSetupBlitVideo(ScreenPtr pScreen)
1768  */
1769 void NVInitVideo (ScreenPtr pScreen)
1770 {
1771         ScrnInfoPtr          pScrn = xf86Screens[pScreen->myNum];
1772         XF86VideoAdaptorPtr *adaptors, *newAdaptors = NULL;
1773         XF86VideoAdaptorPtr  overlayAdaptor = NULL;
1774         XF86VideoAdaptorPtr  blitAdaptor = NULL;
1775         int                  num_adaptors;
1776
1777         if (pScrn->bitsPerPixel == 8)
1778                 return;
1779
1780         overlayAdaptor = NVSetupOverlayVideo(pScreen);
1781         blitAdaptor    = NVSetupBlitVideo(pScreen);
1782
1783         num_adaptors = xf86XVListGenericAdaptors(pScrn, &adaptors);
1784         if(blitAdaptor || overlayAdaptor) {
1785                 int size = num_adaptors;
1786
1787                 if(overlayAdaptor) size++;
1788                 if(blitAdaptor)    size++;
1789
1790                 newAdaptors = xalloc(size * sizeof(XF86VideoAdaptorPtr *));
1791                 if(newAdaptors) {
1792                         if(num_adaptors) {
1793                                 memcpy(newAdaptors, adaptors, num_adaptors *
1794                                                 sizeof(XF86VideoAdaptorPtr));
1795                         }
1796
1797                         if(overlayAdaptor) {
1798                                 newAdaptors[num_adaptors] = overlayAdaptor;
1799                                 num_adaptors++;
1800                         }
1801
1802                         if(blitAdaptor) {
1803                                 newAdaptors[num_adaptors] = blitAdaptor;
1804                                 num_adaptors++;
1805                         }
1806
1807                         adaptors = newAdaptors;
1808                 }
1809         }
1810
1811         if (num_adaptors)
1812                 xf86XVScreenInit(pScreen, adaptors, num_adaptors);
1813         if (newAdaptors)
1814                 xfree(newAdaptors);
1815 }
1816