2 * Copyright 2007 Maarten Maathuis
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
29 #include "xf86_OSproc.h"
30 #include "xf86Resources.h"
32 #include "xf86PciInfo.h"
34 #include "xf86fbman.h"
35 #include "regionstr.h"
38 #include <X11/extensions/Xv.h>
41 #include "dixstruct.h"
44 #include "nv_include.h"
47 #include "nv_shaders.h"
49 static nv_shader_t nv40_video = {
50 .card_priv.NV30VP.vp_in_reg = 0x00000309,
51 .card_priv.NV30VP.vp_out_reg = 0x0000c001,
54 /* MOV result.position, vertex.position */
55 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
56 /* MOV result.texcoord[0], vertex.texcoord[0] */
57 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
58 /* MOV result.texcoord[1], vertex.texcoord[1] */
59 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
63 static nv_shader_t nv40_yv12 = {
64 .card_priv.NV30FP.num_regs = 4,
67 /* INST 0.0, TEX R1 (TR0.xyzw), attrib.texcoord[0] */
68 0x17009e02, 0x1c9dc811, 0x0001c801, 0x0001c801,
69 /* INST 1.0, TEX R2 (TR0.xyzw), attrib.texcoord[1] */
70 0x1702be04, 0x1c9dc815, 0x0001c801, 0x0001c801,
71 /* INST 2.0, DP4R R3.x (TR0.xyzw), R1, { 0.00, 0.00, 0.00, 1.00 } */
72 0x06000206, 0x1c9dc804, 0x0001c802, 0x0001c801,
74 0x00000000, 0x00000000, 0x00000000, 0x3f800000,
75 /* INST 3.0, DP4R R3.y (TR0.xyzw), R2, { 0.00, 1.00, 0.00, 0.00 } */
76 0x06000406, 0x1c9dc808, 0x0001c802, 0x0001c801,
78 0x00000000, 0x3f800000, 0x00000000, 0x00000000,
79 /* INST 4.0, DP4R R3.z (TR0.xyzw), R2, { 1.00, 0.00, 0.00, 0.00 } */
80 0x06000806, 0x1c9dc808, 0x0001c802, 0x0001c801,
82 0x3f800000, 0x00000000, 0x00000000, 0x00000000,
83 /* INST 5.0, ADDR R3 (TR0.xyzw), R3, { 0.00, -0.50, -0.50, 0.00 } */
84 0x03001e06, 0x1c9dc80c, 0x0001c802, 0x0001c801,
86 0x00000000, 0xBF000000, 0xBF000000, 0x00000000,
87 /* INST 6.0, DP3R R0.x (TR0.xyzw), R3, { 1.1678, 0.00, 1.6007, 0.00 } */
88 0x05000280, 0x1c9dc80c, 0x0001c802, 0x0001c801,
90 0x3F957A78, 0x00000000, 0x3FCCE3BD, 0x00000000,
91 /* INST 7.0, DP3R R0.y (TR0.xyzw), R3, { 1.1678, -0.3929, -0.8154, 0.00 } */
92 0x05000480, 0x1c9dc80c, 0x0001c802, 0x0001c801,
94 0x3F957A78, 0xBEC92A30, 0xBF50BE0E, 0x00000000,
95 /* INST 8.0, DP3R R0.z (TR0.xyzw), R3, { 1.1678, 2.0232, 0.00, 0.00 } */
96 0x05000880, 0x1c9dc80c, 0x0001c802, 0x0001c801,
98 0x3F957A78, 0x40017C1C, 0x00000000, 0x00000000,
99 /* INST 9.0, MOVR R0.w (TR0.xyzw), R3.wwww + END */
100 0x01001081, 0x1c9dfe0c, 0x0001c801, 0x0001c801,
104 #define SWIZZLE(ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \
106 NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \
107 NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \
108 NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \
109 NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w \
113 NV40VideoTexture(ScrnInfoPtr pScrn, int offset, uint16_t width, uint16_t height, uint16_t src_pitch, int unit)
115 NVPtr pNv = NVPTR(pScrn);
117 uint32_t card_fmt = 0;
118 uint32_t card_swz = 0;
121 /* Pretend we've got a normal 8 bits format. */
122 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_L8;
123 card_swz = SWIZZLE(ZERO, ZERO, ZERO, S1, X, X, X, X);
125 /* Pretend we've got a normal 2x8 bits format. */
126 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8L8;
127 card_swz = SWIZZLE(S1, S1, S1, S1, Y, X, W, Z); /* x = V, y = U */
130 BEGIN_RING(Nv3D, NV40TCL_TEX_OFFSET(unit), 8);
131 /* We get an obsolute offset, which needs to be corrected. */
132 OUT_RELOCl(pNv->FB, (uint32_t)(offset - pNv->FB->offset), NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
133 OUT_RELOCd(pNv->FB, card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
134 NV40TCL_TEX_FORMAT_DIMS_2D | NV40TCL_TEX_FORMAT_NO_BORDER |
135 (0x8000) | (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
136 NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
137 NV40TCL_TEX_FORMAT_DMA0, 0);
139 OUT_RING(NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER |
140 NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER |
141 NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER);
142 OUT_RING(NV40TCL_TEX_ENABLE_ENABLE);
145 OUT_RING(NV40TCL_TEX_FILTER_MIN_LINEAR |
146 NV40TCL_TEX_FILTER_MAG_LINEAR |
148 } else { /* UV texture cannot be linearly filtered, because it's just offsets. */
149 OUT_RING(NV40TCL_TEX_FILTER_MIN_NEAREST |
150 NV40TCL_TEX_FILTER_MAG_NEAREST |
153 OUT_RING((width << 16) | height);
154 OUT_RING(0); /* border ARGB */
155 BEGIN_RING(Nv3D, NV40TCL_TEX_SIZE1(unit), 1);
156 OUT_RING((1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
157 (uint16_t) src_pitch);
163 NV40GetSurfaceFormat(PixmapPtr pPix, int *fmt_ret)
165 switch (pPix->drawable.bitsPerPixel) {
167 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
170 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
173 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_R5G6B5;
176 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_B8;
185 #ifndef ExaOffscreenMarkUsed
186 extern void ExaOffscreenMarkUsed(PixmapPtr);
188 #ifndef exaGetDrawablePixmap
189 extern PixmapPtr exaGetDrawablePixmap(DrawablePtr);
191 #ifndef exaPixmapIsOffscreen
192 extern Bool exaPixmapIsOffscreen(PixmapPtr p);
194 /* To support EXA 2.0, 2.1 has this in the header */
195 #ifndef exaMoveInPixmap
196 extern void exaMoveInPixmap(PixmapPtr pPixmap);
199 #define SF(bf) (NV40TCL_BLEND_FUNC_SRC_RGB_##bf | \
200 NV40TCL_BLEND_FUNC_SRC_ALPHA_##bf)
201 #define DF(bf) (NV40TCL_BLEND_FUNC_DST_RGB_##bf | \
202 NV40TCL_BLEND_FUNC_DST_ALPHA_##bf)
204 #define VERTEX_OUT(sx,sy,dx,dy) do { \
205 BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 4); \
206 OUT_RINGf ((sx)); OUT_RINGf ((sy)); \
207 OUT_RINGf ((sx)); OUT_RINGf ((sy)); \
208 BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1); \
209 OUT_RING (((dy)<<16)|(dx)); \
212 #define GET_TEXTURED_PRIVATE(pNv) \
213 (NVPortPrivPtr)((pNv)->blitAdaptor->pPortPrivates[0].ptr)
215 int NV40PutTextureImage(ScrnInfoPtr pScrn, int src_offset,
216 int src_offset2, int id,
217 int src_pitch, BoxPtr dstBox,
218 int x1, int y1, int x2, int y2,
219 uint16_t width, uint16_t height,
220 uint16_t src_w, uint16_t src_h,
221 uint16_t drw_w, uint16_t drw_h,
225 NVPtr pNv = NVPTR(pScrn);
226 //NVPortPrivPtr pPriv = GET_TEXTURED_PRIVATE(pNv);
228 /* Remove some warnings. */
229 /* This has to be done better at some point. */
230 (void)nv40_vp_exa_render;
231 (void)nv30_fp_pass_col0;
232 (void)nv30_fp_pass_tex0;
233 (void)nv30_fp_composite_mask;
234 (void)nv30_fp_composite_mask_sa_ca;
235 (void)nv30_fp_composite_mask_ca;
237 if (drw_w > 4096 || drw_h > 4096) {
238 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
239 "XV: Draw size too large.\n");
243 float X1, X2, Y1, Y2;
244 float scaleX1, scaleX2, scaleY1, scaleY2;
245 float scaleX, scaleY;
246 PixmapPtr pPix = exaGetDrawablePixmap(pDraw);
250 if (!NV40GetSurfaceFormat(pPix, &dst_format)) {
251 ErrorF("No surface format, bad.\n");
254 /* Try to get the dest drawable into vram */
255 if (!exaPixmapIsOffscreen(pPix)) {
256 exaMoveInPixmap(pPix);
257 ExaOffscreenMarkUsed(pPix);
260 /* Fail if we can't move the pixmap into memory. */
261 if (!exaPixmapIsOffscreen(pPix)) {
262 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
263 "XV: couldn't move dst surface into vram.\n");
268 /* Adjust coordinates if drawing to an offscreen pixmap */
269 if (pPix->screen_x || pPix->screen_y) {
270 REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
273 dstBox->x1 -= pPix->screen_x;
274 dstBox->x2 -= pPix->screen_x;
275 dstBox->y1 -= pPix->screen_y;
276 dstBox->y2 -= pPix->screen_y;
279 DamageDamageRegion((DrawablePtr)pPix, clipBoxes);
282 pbox = REGION_RECTS(clipBoxes);
283 nbox = REGION_NUM_RECTS(clipBoxes);
285 /* Disable blending */
286 BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
290 BEGIN_RING(Nv3D, NV40TCL_RT_FORMAT, 3);
291 OUT_RING (NV40TCL_RT_FORMAT_TYPE_LINEAR |
292 NV40TCL_RT_FORMAT_ZETA_Z24S8 |
294 OUT_RING (exaGetPixmapPitch(pPix));
295 OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
297 NV40VideoTexture(pScrn, src_offset, src_w, src_h, src_pitch, 0);
298 /* We've got NV12 format, which means half width and half height texture of chroma channels. */
299 NV40VideoTexture(pScrn, src_offset2, src_w/2, src_h/2, src_pitch, 1);
301 NV40_LoadVtxProg(pScrn, &nv40_video);
302 NV40_LoadFragProg(pScrn, &nv40_yv12);
304 /* Appears to be some kind of cache flush, needed here at least
305 * sometimes.. funky text rendering otherwise :)
307 BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
309 BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
312 /* These are fixed point values in the 16.16 format. */
318 X1 = (float)x1/(float)src_w;
319 Y1 = (float)y1/(float)src_h;
320 X2 = (float)x2/(float)src_w;
321 Y2 = (float)y2/(float)src_h;
323 /* The corrections here are emperical, i tried to explain them as best as possible. */
325 /* This correction is need for when the image clips the screen at the right or bottom. */
326 /* In this case x2 and/or y2 is adjusted for the clipping, otherwise not. */
327 /* Otherwise the lower right coordinate stretches in the clipping direction. */
328 scaleX = (float)src_w/(float)(x2 - x1);
329 scaleY = (float)src_h/(float)(y2 - y1);
331 BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
332 OUT_RING (NV40TCL_BEGIN_END_QUADS);
336 /* The src coordinates needs to be scaled to the draw size. */
337 scaleX1 = (float)(pbox->x1 - dstBox->x1)/(float)drw_w;
338 scaleX2 = (float)(pbox->x2 - dstBox->x1)/(float)drw_w;
339 scaleY1 = (float)(pbox->y1 - dstBox->y1)/(float)drw_h;
340 scaleY2 = (float)(pbox->y2 - dstBox->y1)/(float)drw_h;
342 /* Submit the appropriate vertices. */
343 /* This submits the same vertices for the Y and the UV texture. */
344 VERTEX_OUT(X1 + (X2 - X1) * scaleX1 * scaleX, Y1 + (Y2 - Y1) * scaleY1 * scaleY, pbox->x1, pbox->y1);
345 VERTEX_OUT(X1 + (X2 - X1) * scaleX2 * scaleX, Y1 + (Y2 - Y1) * scaleY1 * scaleY, pbox->x2, pbox->y1);
346 VERTEX_OUT(X1 + (X2 - X1) * scaleX2 * scaleX, Y1 + (Y2 - Y1) * scaleY2 * scaleY, pbox->x2, pbox->y2);
347 VERTEX_OUT(X1 + (X2 - X1) * scaleX1 * scaleX, Y1 + (Y2 - Y1) * scaleY2 * scaleY, pbox->x1, pbox->y2);
352 BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
353 OUT_RING (NV40TCL_BEGIN_END_STOP);