2 * Copyright 2007-2008 Maarten Maathuis
3 * Copyright 2008 Stephane Marchesin
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
30 #include <X11/extensions/Xv.h>
33 #include "dixstruct.h"
36 #include "nv_include.h"
39 #include "nv30_shaders.h"
41 extern Atom xvSyncToVBlank, xvSetDefaults;
44 * The filtering function used for video scaling. We use a cubic filter as defined in
45 * "Reconstruction Filters in Computer Graphics"
46 * Mitchell & Netravali in SIGGRAPH '88
48 static float filter_func(float x)
51 const double C=(1.0-B)/2.0;
57 return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0;
59 return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
62 static int8_t f32tosb8(float v)
64 return (int8_t)(v*127.0);
68 * Implements the filtering as described in
69 * "Fast Third-Order Texture Filtering"
70 * Sigg & Hardwiger in GPU Gems 2
72 #define TABLE_SIZE 512
73 static void compute_filter_table(int8_t *t) {
76 for(i=0;i<TABLE_SIZE;i++) {
79 float w0=filter_func(x+1.0);
80 float w1=filter_func(x);
81 float w2=filter_func(x-1.0);
82 float w3=filter_func(x-2.0);
84 t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
85 t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
86 t[4*i+0]=f32tosb8(w0+w1);
87 t[4*i+3]=f32tosb8(0.0);
92 NV40_LoadFilterTable(ScrnInfoPtr pScrn)
94 NVPtr pNv = NVPTR(pScrn);
96 if (!pNv->xv_filtertable_mem) {
97 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
98 0, TABLE_SIZE*sizeof(float)*4, &pNv->xv_filtertable_mem)) {
99 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
100 "Couldn't alloc filter table!\n");
104 if (nouveau_bo_map(pNv->xv_filtertable_mem, NOUVEAU_BO_RDWR)) {
105 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
106 "Couldn't map filter table!\n");
110 int8_t *t=pNv->xv_filtertable_mem->map;
111 compute_filter_table(t);
115 #define SWIZZLE(ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
117 NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
118 NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
119 NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
120 NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w \
124 * Texture 0 : filter table
126 * Texture 2 : UV data
129 NV40VideoTexture(ScrnInfoPtr pScrn, struct nouveau_bo *src, int offset,
130 uint16_t width, uint16_t height, uint16_t src_pitch, int unit)
132 NVPtr pNv = NVPTR(pScrn);
133 struct nouveau_channel *chan = pNv->chan;
134 struct nouveau_grobj *curie = pNv->Nv3D;
136 uint32_t card_fmt = 0;
137 uint32_t card_swz = 0;
141 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8;
142 card_swz = SWIZZLE(S1, S1, S1, S1, X, Y, Z, W);
145 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_L8;
146 card_swz = SWIZZLE(S1, S1, S1, S1, X, X, X, X);
149 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8L8;
150 #if X_BYTE_ORDER == X_BIG_ENDIAN
151 card_swz = SWIZZLE(S1, S1, S1, S1, Z, W, X, Y); /* x = V, y = U */
153 card_swz = SWIZZLE(S1, S1, S1, S1, W, Z, Y, X); /* x = V, y = U */
158 BEGIN_RING(chan, curie, NV40TCL_TEX_OFFSET(unit), 8);
159 OUT_RELOCl(chan, src, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
161 OUT_RELOCd(chan, pNv->FB, card_fmt |
162 NV40TCL_TEX_FORMAT_DIMS_1D | 0x8000 |
163 NV40TCL_TEX_FORMAT_NO_BORDER |
164 (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
165 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
166 NV40TCL_TEX_FORMAT_DMA0, 0);
167 OUT_RING (chan, NV40TCL_TEX_WRAP_S_REPEAT |
168 NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
169 NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
171 OUT_RELOCd(chan, pNv->FB, card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
172 NV40TCL_TEX_FORMAT_RECT | 0x8000 |
173 NV40TCL_TEX_FORMAT_DIMS_2D |
174 NV40TCL_TEX_FORMAT_NO_BORDER |
175 (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
176 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
177 NV40TCL_TEX_FORMAT_DMA0, 0);
178 OUT_RING (chan, NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE |
179 NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
180 NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
183 OUT_RING (chan, NV40TCL_TEX_ENABLE_ENABLE);
184 OUT_RING (chan, card_swz);
186 OUT_RING (chan, NV40TCL_TEX_FILTER_SIGNED_ALPHA |
187 NV40TCL_TEX_FILTER_SIGNED_RED |
188 NV40TCL_TEX_FILTER_SIGNED_GREEN |
189 NV40TCL_TEX_FILTER_SIGNED_BLUE |
190 NV40TCL_TEX_FILTER_MIN_LINEAR |
191 NV40TCL_TEX_FILTER_MAG_LINEAR | 0x3fd6);
193 OUT_RING (chan, NV40TCL_TEX_FILTER_MIN_LINEAR |
194 NV40TCL_TEX_FILTER_MAG_LINEAR | 0x3fd6);
195 OUT_RING (chan, (width << 16) | height);
196 OUT_RING (chan, 0); /* border ARGB */
198 BEGIN_RING(chan, curie, NV40TCL_TEX_SIZE1(unit), 1);
199 OUT_RING (chan, (1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
200 (uint16_t) src_pitch);
206 NV40GetSurfaceFormat(PixmapPtr ppix, int *fmt_ret)
208 switch (ppix->drawable.bitsPerPixel) {
210 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
213 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
216 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_R5G6B5;
219 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_B8;
229 NV40StopTexturedVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
233 #define VERTEX_OUT(sx,sy,dx,dy) do { \
234 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(8), 4); \
235 OUT_RINGf (chan, (sx)); OUT_RINGf (chan, (sy)); \
236 OUT_RINGf (chan, (sx)/2.0); OUT_RINGf (chan, (sy)/2.0); \
237 BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2I(0), 1); \
238 OUT_RING (chan, ((dy)<<16)|(dx)); \
242 NV40PutTextureImage(ScrnInfoPtr pScrn,
243 struct nouveau_bo *src, int src_offset, int src_offset2,
244 int id, int src_pitch, BoxPtr dstBox,
245 int x1, int y1, int x2, int y2,
246 uint16_t width, uint16_t height,
247 uint16_t src_w, uint16_t src_h,
248 uint16_t drw_w, uint16_t drw_h,
249 RegionPtr clipBoxes, PixmapPtr ppix,
252 NVPtr pNv = NVPTR(pScrn);
253 struct nouveau_channel *chan = pNv->chan;
254 struct nouveau_grobj *curie = pNv->Nv3D;
255 Bool redirected = FALSE;
256 float X1, X2, Y1, Y2;
261 if (drw_w > 4096 || drw_h > 4096) {
262 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
263 "XV: Draw size too large.\n");
267 if (!NV40GetSurfaceFormat(ppix, &dst_format)) {
268 ErrorF("No surface format, bad.\n");
272 if (!NVExaPixmapIsOnscreen(ppix))
276 pbox = REGION_RECTS(clipBoxes);
277 nbox = REGION_NUM_RECTS(clipBoxes);
279 /* Disable blending */
280 BEGIN_RING(chan, curie, NV40TCL_BLEND_ENABLE, 1);
284 BEGIN_RING(chan, curie, NV40TCL_RT_FORMAT, 3);
285 OUT_RING (chan, NV40TCL_RT_FORMAT_TYPE_LINEAR |
286 NV40TCL_RT_FORMAT_ZETA_Z24S8 | dst_format);
287 OUT_RING (chan, exaGetPixmapPitch(ppix));
288 OUT_PIXMAPl(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
290 NV40_LoadFilterTable(pScrn);
292 NV40VideoTexture(pScrn, pNv->xv_filtertable_mem, 0, TABLE_SIZE, 1, 0 , 0);
293 NV40VideoTexture(pScrn, src, src_offset, src_w, src_h, src_pitch, 1);
294 /* We've got NV12 format, which means half width and half height texture of chroma channels. */
295 NV40VideoTexture(pScrn, src, src_offset2, src_w/2, src_h/2, src_pitch, 2);
297 NV40_LoadVtxProg(pScrn, &nv40_vp_video);
299 NV40_LoadFragProg(pScrn, &nv40_fp_yv12_bicubic);
301 NV40_LoadFragProg(pScrn, &nv30_fp_yv12_bilinear);
303 /* Appears to be some kind of cache flush, needed here at least
304 * sometimes.. funky text rendering otherwise :)
306 BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
308 BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
311 /* Just before rendering we wait for vblank in the non-composited case. */
312 if (pPriv->SyncToVBlank && !redirected) {
313 uint8_t crtcs = nv_window_belongs_to_crtc(pScrn, dstBox->x1, dstBox->y1,
314 dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1);
318 NVWaitVSync(pScrn, 0);
319 else if (crtcs & 0x2)
320 NVWaitVSync(pScrn, 1);
323 /* These are fixed point values in the 16.16 format. */
324 X1 = (float)(x1>>16)+(float)(x1&0xFFFF)/(float)0x10000;
325 Y1 = (float)(y1>>16)+(float)(y1&0xFFFF)/(float)0x10000;
326 X2 = (float)(x2>>16)+(float)(x2&0xFFFF)/(float)0x10000;
327 Y2 = (float)(y2>>16)+(float)(y2&0xFFFF)/(float)0x10000;
329 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
330 OUT_RING (chan, NV40TCL_BEGIN_END_TRIANGLES);
333 float tx1=X1+(float)(pbox->x1 - dstBox->x1)*(X2-X1)/(float)(drw_w);
334 float tx2=X1+(float)(pbox->x2 - dstBox->x1)*(src_w)/(float)(drw_w);
335 float ty1=Y1+(float)(pbox->y1 - dstBox->y1)*(Y2-Y1)/(float)(drw_h);
336 float ty2=Y1+(float)(pbox->y2 - dstBox->y1)*(src_h)/(float)(drw_h);
342 BEGIN_RING(chan, curie, NV40TCL_SCISSOR_HORIZ, 2);
343 OUT_RING (chan, (sx2 << 16) | 0);
344 OUT_RING (chan, (sy2 << 16) | 0);
346 VERTEX_OUT(tx1, ty1, sx1, sy1);
347 VERTEX_OUT(tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
348 VERTEX_OUT(tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
353 BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
354 OUT_RING (chan, NV40TCL_BEGIN_END_STOP);
362 * NV40SetTexturePortAttribute
363 * sets the attribute "attribute" of port "data" to value "value"
364 * supported attributes:
368 * @param attribute attribute to set
369 * @param value value to which attribute is to be set
370 * @param data port from which the attribute is to be set
372 * @return Success, if setting is successful
373 * BadValue/BadMatch, if value/attribute are invalid
376 NV40SetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
377 INT32 value, pointer data)
379 NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
380 NVPtr pNv = NVPTR(pScrn);
382 if ((attribute == xvSyncToVBlank) && pNv->WaitVSyncPossible) {
383 if ((value < 0) || (value > 1))
385 pPriv->SyncToVBlank = value;
387 if (attribute == xvSetDefaults) {
388 pPriv->SyncToVBlank = pNv->WaitVSyncPossible;
396 * NV40GetTexturePortAttribute
397 * reads the value of attribute "attribute" from port "data" into INT32 "*value"
400 * @param pScrn unused
401 * @param attribute attribute to be read
402 * @param value value of attribute will be stored here
403 * @param data port from which attribute will be read
404 * @return Success, if queried attribute exists
407 NV40GetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
408 INT32 *value, pointer data)
410 NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
412 if(attribute == xvSyncToVBlank)
413 *value = (pPriv->SyncToVBlank) ? 1 : 0;