2 * Copyright 2007-2008 Maarten Maathuis
3 * Copyright 2008 Stephane Marchesin
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
30 #include <X11/extensions/Xv.h>
33 #include "dixstruct.h"
36 #include "nv_include.h"
39 #include "nv30_shaders.h"
41 extern Atom xvSyncToVBlank, xvSetDefaults;
44 * The filtering function used for video scaling. We use a cubic filter as defined in
45 * "Reconstruction Filters in Computer Graphics"
46 * Mitchell & Netravali in SIGGRAPH '88
48 static float filter_func(float x)
51 const double C=(1.0-B)/2.0;
57 return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0;
59 return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
62 static int8_t f32tosb8(float v)
64 return (int8_t)(v*127.0);
68 * 512 means 2048 bytes of VRAM
70 #define TABLE_SIZE 512
71 static void compute_filter_table(int8_t *t) {
74 for(i=0;i<TABLE_SIZE;i++) {
77 float w0=filter_func(x+1.0);
78 float w1=filter_func(x);
79 float w2=filter_func(x-1.0);
80 float w3=filter_func(x-2.0);
82 t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
83 t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
84 t[4*i+0]=f32tosb8(w0+w1);
85 t[4*i+3]=f32tosb8(0.0);
89 static uint64_t NV30_LoadFilterTable(ScrnInfoPtr pScrn)
91 NVPtr pNv = NVPTR(pScrn);
92 static struct nouveau_bo *table_mem = NULL;
95 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
96 0, TABLE_SIZE*sizeof(float)*4, &table_mem)) {
97 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
98 "Couldn't alloc filter table!\n");
102 if (nouveau_bo_map(table_mem, NOUVEAU_BO_RDWR)) {
103 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
104 "Couldn't map filter table!\n");
107 int8_t *t=table_mem->map;
108 compute_filter_table(t);
110 return table_mem->offset;
113 #define SWIZZLE(ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
115 NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \
116 NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \
117 NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \
118 NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w \
122 * Texture 0 : filter table
124 * Texture 2 : UV data
127 NV30VideoTexture(ScrnInfoPtr pScrn, int offset, uint16_t width, uint16_t height, uint16_t src_pitch, int unit)
129 NVPtr pNv = NVPTR(pScrn);
131 uint32_t card_fmt = 0;
132 uint32_t card_swz = 0;
136 card_fmt = NV34TCL_TX_FORMAT_FORMAT_A8R8G8B8;
137 card_swz = SWIZZLE(S1, S1, S1, S1, X, Y, Z, W);
140 card_fmt = NV34TCL_TX_FORMAT_FORMAT_A8_RECT2;
141 card_swz = SWIZZLE(S1, S1, S1, S1, X, X, X, X);
144 card_fmt = NV34TCL_TX_FORMAT_FORMAT_L8A8_RECT;
145 #if X_BYTE_ORDER == X_BIG_ENDIAN
146 card_swz = SWIZZLE(S1, S1, S1, S1, Z, W, X, Y); /* x = V, y = U */
148 card_swz = SWIZZLE(S1, S1, S1, S1, W, Z, Y, X); /* x = V, y = U */
153 BEGIN_RING(Nv3D, NV34TCL_TX_OFFSET(unit), 8);
154 /* We get an absolute offset, which needs to be corrected. */
155 OUT_RELOCl(pNv->FB, (uint32_t)(offset - pNv->FB->offset), NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
157 OUT_RELOCd(pNv->FB, NV34TCL_TX_FORMAT_DIMS_1D |
159 (1 << NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT) |
160 (log2i(width) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT) |
161 (log2i(height) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT) |
163 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
164 NV34TCL_TX_FORMAT_DMA0, 0);
165 OUT_RING(NV34TCL_TX_WRAP_S_REPEAT |
166 NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE |
167 NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE);
169 OUT_RELOCd(pNv->FB, NV34TCL_TX_FORMAT_DIMS_2D |
171 (1 << NV34TCL_TX_FORMAT_MIPMAP_LEVELS_SHIFT) |
172 (log2i(width) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT) |
173 (log2i(height) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT) |
175 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
176 NV34TCL_TX_FORMAT_DMA0, 0);
177 OUT_RING(NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE |
178 NV34TCL_TX_WRAP_T_CLAMP_TO_EDGE |
179 NV34TCL_TX_WRAP_R_CLAMP_TO_EDGE);
182 OUT_RING(NV34TCL_TX_ENABLE_ENABLE);
183 OUT_RING( ( src_pitch << NV34TCL_TX_SWIZZLE_RECT_PITCH_SHIFT ) |
186 OUT_RING(NV34TCL_TX_FILTER_SIGNED_ALPHA |
187 NV34TCL_TX_FILTER_SIGNED_RED |
188 NV34TCL_TX_FILTER_SIGNED_GREEN |
189 NV34TCL_TX_FILTER_SIGNED_BLUE |
190 NV34TCL_TX_FILTER_MINIFY_LINEAR |
191 NV34TCL_TX_FILTER_MAGNIFY_LINEAR |
194 OUT_RING(NV34TCL_TX_FILTER_MINIFY_LINEAR |
195 NV34TCL_TX_FILTER_MAGNIFY_LINEAR |
197 OUT_RING((width << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | height);
198 OUT_RING(0); /* border ARGB */
204 NV30GetSurfaceFormat(PixmapPtr pPix, int *fmt_ret)
206 switch (pPix->drawable.bitsPerPixel) {
208 *fmt_ret = NV34TCL_RT_FORMAT_COLOR_A8R8G8B8;
211 *fmt_ret = NV34TCL_RT_FORMAT_COLOR_X8R8G8B8;
214 *fmt_ret = NV34TCL_RT_FORMAT_COLOR_R5G6B5;
217 *fmt_ret = NV34TCL_RT_FORMAT_COLOR_B8;
227 NV30StopTexturedVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
231 /* To support EXA 2.0, 2.1 has this in the header */
232 #ifndef exaMoveInPixmap
233 extern void exaMoveInPixmap(PixmapPtr pPixmap);
236 #define VERTEX_OUT(sx,sy,dx,dy) do { \
237 BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2F_X(8), 4); \
238 OUT_RINGf ((sx)); OUT_RINGf ((sy)); \
239 OUT_RINGf ((sx)/2.0); OUT_RINGf ((sy)/2.0); \
240 BEGIN_RING(Nv3D, NV34TCL_VERTEX_ATTR_2I(0), 1); \
241 OUT_RING (((dy)<<16)|(dx)); \
244 int NV30PutTextureImage(ScrnInfoPtr pScrn, int src_offset,
245 int src_offset2, int id,
246 int src_pitch, BoxPtr dstBox,
247 int x1, int y1, int x2, int y2,
248 uint16_t width, uint16_t height,
249 uint16_t src_w, uint16_t src_h,
250 uint16_t drw_w, uint16_t drw_h,
255 NVPtr pNv = NVPTR(pScrn);
256 Bool redirected = FALSE;
258 if (drw_w > 4096 || drw_h > 4096) {
259 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
260 "XV: Draw size too large.\n");
264 float X1, X2, Y1, Y2;
265 PixmapPtr pPix = NVGetDrawablePixmap(pDraw);
269 uint64_t filter_table_offset=0;
271 if (!NV30GetSurfaceFormat(pPix, &dst_format)) {
272 ErrorF("No surface format, bad.\n");
275 /* This has to be called always, since it does more than just migration. */
276 exaMoveInPixmap(pPix);
277 ExaOffscreenMarkUsed(pPix);
280 /* Adjust coordinates if drawing to an offscreen pixmap */
281 if (pPix->screen_x || pPix->screen_y) {
282 REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
285 dstBox->x1 -= pPix->screen_x;
286 dstBox->x2 -= pPix->screen_x;
287 dstBox->y1 -= pPix->screen_y;
288 dstBox->y2 -= pPix->screen_y;
291 /* I suspect that pDraw itself is not offscreen, hence not suited for damage tracking. */
292 DamageDamageRegion(&pPix->drawable, clipBoxes);
294 /* This is test is unneeded for !COMPOSITE. */
295 if (!NVExaPixmapIsOnscreen(pPix))
299 pbox = REGION_RECTS(clipBoxes);
300 nbox = REGION_NUM_RECTS(clipBoxes);
302 /* Disable blending */
303 BEGIN_RING(Nv3D, NV34TCL_BLEND_FUNC_ENABLE, 1);
307 BEGIN_RING(Nv3D, NV34TCL_RT_FORMAT, 3);
308 OUT_RING (NV34TCL_RT_FORMAT_TYPE_LINEAR |
309 NV34TCL_RT_FORMAT_ZETA_Z24S8 |
311 OUT_RING ((exaGetPixmapPitch(pPix) << 16) | exaGetPixmapPitch(pPix));
312 OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
314 if (pNv->NVArch == 0x30) {
317 int w = pDraw->x+pDraw->width;
318 int h = pDraw->y+pDraw->height;
320 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_HORIZ, 2);
321 OUT_RING ((w<<16)|x);
322 OUT_RING ((h<<16)|y);
323 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2);
324 OUT_RING ((w-1+x)<<16);
325 OUT_RING ((h-1+y)<<16);
326 BEGIN_RING(Nv3D, NV34TCL_VIEWPORT_TX_ORIGIN, 1);
330 filter_table_offset=NV30_LoadFilterTable(pScrn);
332 BEGIN_RING(Nv3D, NV34TCL_TX_UNITS_ENABLE, 1);
333 OUT_RING (NV34TCL_TX_UNITS_ENABLE_TX0 |
334 NV34TCL_TX_UNITS_ENABLE_TX1);
336 NV30VideoTexture(pScrn, filter_table_offset, TABLE_SIZE, 1, 0 , 0);
337 NV30VideoTexture(pScrn, src_offset, src_w, src_h, src_pitch, 1);
338 /* We've got NV12 format, which means half width and half height texture of chroma channels. */
339 NV30VideoTexture(pScrn, src_offset2, src_w/2, src_h/2, src_pitch, 2);
341 BEGIN_RING(Nv3D, NV34TCL_TX_ENABLE(3), 1);
345 NV30_LoadFragProg(pScrn, &nv30_fp_yv12_bicubic);
347 NV30_LoadFragProg(pScrn, &nv30_fp_yv12_bilinear);
349 /* Just before rendering we wait for vblank in the non-composited case. */
350 if (pPriv->SyncToVBlank && !redirected) {
351 uint8_t crtcs = nv_window_belongs_to_crtc(pScrn, dstBox->x1, dstBox->y1,
352 dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1);
356 NVWaitVSync(pScrn, 0);
357 else if (crtcs & 0x2)
358 NVWaitVSync(pScrn, 1);
361 /* These are fixed point values in the 16.16 format. */
362 X1 = (float)(x1>>16)+(float)(x1&0xFFFF)/(float)0x10000;
363 Y1 = (float)(y1>>16)+(float)(y1&0xFFFF)/(float)0x10000;
364 X2 = (float)(x2>>16)+(float)(x2&0xFFFF)/(float)0x10000;
365 Y2 = (float)(y2>>16)+(float)(y2&0xFFFF)/(float)0x10000;
367 BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
368 OUT_RING (NV34TCL_VERTEX_BEGIN_END_TRIANGLES);
371 float tx1=X1+(float)(pbox->x1 - dstBox->x1)*(X2-X1)/(float)(drw_w);
372 float tx2=X1+(float)(pbox->x2 - dstBox->x1)*(src_w)/(float)(drw_w);
373 float ty1=Y1+(float)(pbox->y1 - dstBox->y1)*(Y2-Y1)/(float)(drw_h);
374 float ty2=Y1+(float)(pbox->y2 - dstBox->y1)*(src_h)/(float)(drw_h);
380 BEGIN_RING(Nv3D, NV34TCL_SCISSOR_HORIZ, 2);
381 OUT_RING ((sx2 << 16) | 0);
382 OUT_RING ((sy2 << 16) | 0);
384 VERTEX_OUT(tx1, ty1, sx1, sy1);
385 VERTEX_OUT(tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
386 VERTEX_OUT(tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
391 BEGIN_RING(Nv3D, NV34TCL_VERTEX_BEGIN_END, 1);
392 OUT_RING (NV34TCL_VERTEX_BEGIN_END_STOP);
400 * NV30SetTexturePortAttribute
401 * sets the attribute "attribute" of port "data" to value "value"
402 * supported attributes:
406 * @param attribute attribute to set
407 * @param value value to which attribute is to be set
408 * @param data port from which the attribute is to be set
410 * @return Success, if setting is successful
411 * BadValue/BadMatch, if value/attribute are invalid
414 NV30SetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
415 INT32 value, pointer data)
417 NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
418 NVPtr pNv = NVPTR(pScrn);
420 if ((attribute == xvSyncToVBlank) && pNv->WaitVSyncPossible) {
421 if ((value < 0) || (value > 1))
423 pPriv->SyncToVBlank = value;
425 if (attribute == xvSetDefaults) {
426 pPriv->SyncToVBlank = pNv->WaitVSyncPossible;
434 * NV30GetTexturePortAttribute
435 * reads the value of attribute "attribute" from port "data" into INT32 "*value"
438 * @param pScrn unused
439 * @param attribute attribute to be read
440 * @param value value of attribute will be stored here
441 * @param data port from which attribute will be read
442 * @return Success, if queried attribute exists
445 NV30GetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
446 INT32 *value, pointer data)
448 NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
450 if(attribute == xvSyncToVBlank)
451 *value = (pPriv->SyncToVBlank) ? 1 : 0;