NV40: Preliminary Xv Texture Adaptor.
[nouveau] / src / nv40_video_texture.c
1 /*
2  * Copyright 2007 Maarten Maathuis
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27
28 #include "xf86.h"
29 #include "xf86_OSproc.h"
30 #include "xf86Resources.h"
31 #include "compiler.h"
32 #include "xf86PciInfo.h"
33 #include "xf86Pci.h"
34 #include "xf86fbman.h"
35 #include "regionstr.h"
36
37 #include "xf86xv.h"
38 #include <X11/extensions/Xv.h>
39 #include "exa.h"
40 #include "damage.h"
41 #include "dixstruct.h"
42 #include "fourcc.h"
43
44 #include "nv_include.h"
45 #include "nv_dma.h"
46
47 #include "nv_shaders.h"
48
49 static nv_shader_t nv40_video = {
50         .card_priv.NV30VP.vp_in_reg  = 0x00000309,
51         .card_priv.NV30VP.vp_out_reg = 0x0000c001,
52         .size = (3*4),
53         .data = {
54                 /* MOV result.position, vertex.position */
55                 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
56                 /* MOV result.texcoord[0], vertex.texcoord[0] */
57                 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
58                 /* MOV result.texcoord[1], vertex.texcoord[1] */
59                 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
60         }
61 };
62
63 static nv_shader_t nv40_yv12 = {
64         .card_priv.NV30FP.num_regs = 4,
65         .size = (17*4),
66         .data = {
67                 /* INST 0.0, TEX R1 (TR0.xyzw), attrib.texcoord[0] */
68                 0x17009e02, 0x1c9dc811, 0x0001c801, 0x0001c801,
69                 /* INST 1.0, TEX R2 (TR0.xyzw), attrib.texcoord[1] */
70                 0x1702be04, 0x1c9dc815, 0x0001c801, 0x0001c801,
71                 /* INST 2.0, DP4R R3.x (TR0.xyzw), R1, { 0.00, 0.00, 0.00, 1.00 } */
72                 0x06000206, 0x1c9dc804, 0x0001c802, 0x0001c801,
73                 /* const */
74                 0x00000000, 0x00000000, 0x00000000, 0x3f800000,
75                 /* INST 3.0, DP4R R3.y (TR0.xyzw), R2, { 0.00, 1.00, 0.00, 0.00 } */
76                 0x06000406, 0x1c9dc808, 0x0001c802, 0x0001c801,
77                 /* const */
78                 0x00000000, 0x3f800000, 0x00000000, 0x00000000,
79                 /* INST 4.0, DP4R R3.z (TR0.xyzw), R2, { 1.00, 0.00, 0.00, 0.00 } */
80                 0x06000806, 0x1c9dc808, 0x0001c802, 0x0001c801,
81                 /* const */
82                 0x3f800000, 0x00000000, 0x00000000, 0x00000000,
83                 /* INST 5.0, ADDR R3 (TR0.xyzw), R3, { 0.00, -0.50, -0.50, 0.00 } */
84                 0x03001e06, 0x1c9dc80c, 0x0001c802, 0x0001c801,
85                 /* const */
86                 0x00000000, 0xBF000000, 0xBF000000, 0x00000000,
87                 /* INST 6.0, DP3R R0.x (TR0.xyzw), R3, { 1.00, 0.00, 1.4022, 0.00 } */
88                 0x05000280, 0x1c9dc80c, 0x0001c802, 0x0001c801,
89                 /* const */
90                 0x3f800000, 0x00000000, 0x3FB37B4A, 0x00000000,
91                 /* INST 7.0, DP3R R0.y (TR0.xyzw), R3, { 1.00, -0.3457, -0.7145, 0.00 } */
92                 0x05000480, 0x1c9dc80c, 0x0001c802, 0x0001c801,
93                 /* const */
94                 0x3f800000, 0xBEB0FF97, 0xBF36E979, 0x00000000,
95                 /* INST 8.0, DP3R R0.z (TR0.xyzw), R3, { 1.00, 1.7710, 0.00, 0.00 } */
96                 0x05000880, 0x1c9dc80c, 0x0001c802, 0x0001c801,
97                 /* const */
98                 0x3f800000, 0x3FE2B021, 0x00000000, 0x00000000,
99                 /* INST 9.0, MOVR R0.w (TR0.xyzw), R3.wwww + END */
100                 0x01001081, 0x1c9dfe0c, 0x0001c801, 0x0001c801,
101         }
102 };
103
104 #define SWIZZLE(ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                                                        \
105         (                                                                                                                                       \
106         NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y               |       \
107         NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w       |       \
108         NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y       |       \
109         NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w               \
110         )
111
112 static Bool
113 NV40VideoTexture(ScrnInfoPtr pScrn, int offset, uint16_t width, uint16_t height, uint16_t src_pitch, int unit)
114 {
115         NVPtr pNv = NVPTR(pScrn);
116
117         uint32_t card_fmt = 0;
118         uint32_t card_swz = 0;
119
120         if (unit == 0) {
121                 /* Pretend we've got a normal 8 bits format. */
122                 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_L8;
123                 card_swz = SWIZZLE(ZERO, ZERO, ZERO, S1, X, X, X, X);
124         } else {
125                 /* Pretend we've got a normal 2x8 bits format. */
126                 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8L8;
127                 card_swz = SWIZZLE(S1, S1, S1, S1, Y, X, W, Z); /* x = V, y = U */
128         }
129
130         BEGIN_RING(Nv3D, NV40TCL_TEX_OFFSET(unit), 8);
131         /* We get an obsolute offset, which needs to be corrected. */
132         OUT_RELOCl(pNv->FB, (uint32_t)(offset - pNv->FB->offset), NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
133         OUT_RELOCd(pNv->FB, card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
134                         NV40TCL_TEX_FORMAT_DIMS_2D | NV40TCL_TEX_FORMAT_NO_BORDER |
135                         (0x8000) | (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
136                         NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
137                         NV40TCL_TEX_FORMAT_DMA0, 0);
138
139         OUT_RING(NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER |
140                         NV40TCL_TEX_WRAP_T_CLAMP_TO_BORDER |
141                         NV40TCL_TEX_WRAP_R_CLAMP_TO_BORDER);
142         OUT_RING(NV40TCL_TEX_ENABLE_ENABLE);
143         OUT_RING(card_swz);
144         if (unit == 0) {
145                 OUT_RING(NV40TCL_TEX_FILTER_MIN_LINEAR |
146                                 NV40TCL_TEX_FILTER_MAG_LINEAR |
147                                 0x3fd6);
148         } else { /* UV texture cannot be linearly filtered, because it's just offsets. */
149                 OUT_RING(NV40TCL_TEX_FILTER_MIN_NEAREST |
150                                 NV40TCL_TEX_FILTER_MAG_NEAREST |
151                                 0x3fd6);
152         }
153         OUT_RING((width << 16) | height);
154         OUT_RING(0); /* border ARGB */
155         BEGIN_RING(Nv3D, NV40TCL_TEX_SIZE1(unit), 1);
156         OUT_RING((1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
157                         (uint16_t) src_pitch);
158
159         return TRUE;
160 }
161
162 Bool
163 NV40GetSurfaceFormat(PixmapPtr pPix, int *fmt_ret)
164 {
165         switch (pPix->drawable.bitsPerPixel) {
166                 case 32:
167                         *fmt_ret = NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
168                         break;
169                 case 24:
170                         *fmt_ret = NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
171                         break;
172                 case 16:
173                         *fmt_ret = NV40TCL_RT_FORMAT_COLOR_R5G6B5;
174                         break;
175                 case 8:
176                         *fmt_ret = NV40TCL_RT_FORMAT_COLOR_B8;
177                         break;
178                 default:
179                         return FALSE;
180         }
181
182         return TRUE;
183 }
184
185 #ifndef ExaOffscreenMarkUsed
186 extern void ExaOffscreenMarkUsed(PixmapPtr);
187 #endif
188 #ifndef exaGetDrawablePixmap
189 extern PixmapPtr exaGetDrawablePixmap(DrawablePtr);
190 #endif
191 #ifndef exaPixmapIsOffscreen
192 extern Bool exaPixmapIsOffscreen(PixmapPtr p);
193 #endif
194 /* To support EXA 2.0, 2.1 has this in the header */
195 #ifndef exaMoveInPixmap
196 extern void exaMoveInPixmap(PixmapPtr pPixmap);
197 #endif
198
199 #define SF(bf) (NV40TCL_BLEND_FUNC_SRC_RGB_##bf |                              \
200                 NV40TCL_BLEND_FUNC_SRC_ALPHA_##bf)
201 #define DF(bf) (NV40TCL_BLEND_FUNC_DST_RGB_##bf |                              \
202                 NV40TCL_BLEND_FUNC_DST_ALPHA_##bf)
203
204 #define VERTEX_OUT(sx,sy,dx,dy) do {                                        \
205         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2F_X(8), 4);                         \
206         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
207         OUT_RINGf ((sx)); OUT_RINGf ((sy));                                    \
208         BEGIN_RING(Nv3D, NV40TCL_VTX_ATTR_2I(0), 1);                           \
209         OUT_RING  (((dy)<<16)|(dx));                                           \
210 } while(0)
211
212 void NV40PutTextureImage(ScrnInfoPtr pScrn, int src_offset,
213                 int src_offset2, int id,
214                 int src_pitch, BoxPtr dstBox,
215                 int x1, int y1, int x2, int y2,
216                 uint16_t width, uint16_t height,
217                 uint16_t src_w, uint16_t src_h,
218                 uint16_t drw_w, uint16_t drw_h,
219                 RegionPtr clipBoxes,
220                 DrawablePtr pDraw)
221 {
222         /* Remove some warnings. */
223         /* This has to be done better at some point. */
224         (void)nv40_vp_exa_render;
225         (void)nv30_fp_pass_col0;
226         (void)nv30_fp_pass_tex0;
227         (void)nv30_fp_composite_mask;
228         (void)nv30_fp_composite_mask_sa_ca;
229         (void)nv30_fp_composite_mask_ca;
230
231         NVPtr pNv = NVPTR(pScrn);
232         float X1, X2, X3, X4, Y1, Y2, Y3, Y4;
233         ScreenPtr pScreen = pScrn->pScreen;
234         PixmapPtr pPix = exaGetDrawablePixmap(pDraw);
235         int dst_format = 0;
236         if (!NV40GetSurfaceFormat(pPix, &dst_format)) {
237                 ErrorF("No surface format, bad.\n");
238         }
239
240         /* Try to get the dest drawable into vram */
241         if (!exaPixmapIsOffscreen(pPix)) {
242                 exaMoveInPixmap(pPix);
243                 ExaOffscreenMarkUsed(pPix);
244         }
245
246         /* If we failed, draw directly onto the screen pixmap.
247          * Not sure if this is the best approach, maybe failing
248          * with BadAlloc would be better?
249          */
250         if (!exaPixmapIsOffscreen(pPix)) {
251                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
252                         "XV: couldn't move dst surface into vram\n");
253                 pPix = pScreen->GetScreenPixmap(pScreen);
254         }
255
256 #ifdef COMPOSITE
257         /* Adjust coordinates if drawing to an offscreen pixmap */
258         if (pPix->screen_x || pPix->screen_y) {
259                 REGION_TRANSLATE(pScrn->pScreen, clipBoxes,
260                                                         -pPix->screen_x,
261                                                         -pPix->screen_y);
262                 dstBox->x1 -= pPix->screen_x;
263                 dstBox->x2 -= pPix->screen_x;
264                 dstBox->y1 -= pPix->screen_y;
265                 dstBox->y2 -= pPix->screen_y;
266         }
267
268         DamageDamageRegion((DrawablePtr)pPix, clipBoxes);
269 #endif
270
271         /* Disable blending */
272         BEGIN_RING(Nv3D, NV40TCL_BLEND_ENABLE, 1);
273         OUT_RING(0);
274
275         /* Setup surface */
276         BEGIN_RING(Nv3D, NV40TCL_RT_FORMAT, 3);
277         OUT_RING  (NV40TCL_RT_FORMAT_TYPE_LINEAR |
278                         NV40TCL_RT_FORMAT_ZETA_Z24S8 |
279                         dst_format);
280         OUT_RING  (exaGetPixmapPitch(pPix));
281         OUT_PIXMAPl(pPix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
282
283         NV40VideoTexture(pScrn, src_offset, width, height, src_pitch, 0);
284         /* We've got NV12 format, which means half width and half height texture of chroma channels. */
285         NV40VideoTexture(pScrn, src_offset2, width/2, height/2, src_pitch, 1);
286
287         NV40_LoadVtxProg(pScrn, &nv40_video);
288         NV40_LoadFragProg(pScrn, &nv40_yv12);
289
290         /* Appears to be some kind of cache flush, needed here at least
291          * sometimes.. funky text rendering otherwise :)
292          */
293         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
294         OUT_RING  (2);
295         BEGIN_RING(Nv3D, NV40TCL_TEX_CACHE_CTL, 1);
296         OUT_RING  (1);
297
298         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
299         OUT_RING  (NV40TCL_BEGIN_END_QUADS);
300
301         /* These are fixed point values in the 16.16 format. */
302         x1 >>= 16;
303         x2 >>= 16;
304         y1 >>= 16;
305         y2 >>= 16;
306
307         X1 = (float)x1/(float)src_w;
308         Y1 = (float)y1/(float)src_h;
309         X2 = (float)x2/(float)src_w;
310         Y2 = (float)y1/(float)src_h;
311         X3 = (float)x2/(float)src_w;
312         Y3 = (float)y2/(float)src_h;
313         X4 = (float)x1/(float)src_w;
314         Y4 = (float)y2/(float)src_h;
315
316         /* Submit the appropriate vertices. */
317         /* This submits the same vertices for the Y and the UV texture. */
318         VERTEX_OUT(X1, Y1, dstBox->x1, dstBox->y1);
319         VERTEX_OUT(X2, Y2, dstBox->x2, dstBox->y1);
320         VERTEX_OUT(X3, Y3, dstBox->x2, dstBox->y2);
321         VERTEX_OUT(X4, Y4, dstBox->x1, dstBox->y2);
322
323         FIRE_RING();
324
325         BEGIN_RING(Nv3D, NV40TCL_BEGIN_END, 1);
326         OUT_RING  (NV40TCL_BEGIN_END_STOP);
327 }