fence: ref could destroy the object it was referencing, fix that!
[nouveau] / src / nv40_xv_tex.c
1 /*
2  * Copyright 2007-2008 Maarten Maathuis
3  * Copyright 2008 Stephane Marchesin
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #include "xf86xv.h"
30 #include <X11/extensions/Xv.h>
31 #include "exa.h"
32 #include "damage.h"
33 #include "dixstruct.h"
34 #include "fourcc.h"
35
36 #include "nv_include.h"
37 #include "nv_dma.h"
38
39 #include "nv30_shaders.h"
40
41 extern Atom xvSyncToVBlank, xvSetDefaults;
42
43 /*
44  * The filtering function used for video scaling. We use a cubic filter as defined in 
45  * "Reconstruction Filters in Computer Graphics"
46  * Mitchell & Netravali in SIGGRAPH '88 
47  */
48 static float filter_func(float x)
49 {
50         const double B=0.75;
51         const double C=(1.0-B)/2.0;
52         double x1=fabs(x);
53         double x2=fabs(x)*x1;
54         double x3=fabs(x)*x2;
55
56         if (fabs(x)<1.0) 
57                 return ( (12.0-9.0*B-6.0*C)*x3+(-18.0+12.0*B+6.0*C)*x2+(6.0-2.0*B) )/6.0; 
58         else 
59                 return ( (-B-6.0*C)*x3+(6.0*B+30.0*C)*x2+(-12.0*B-48.0*C)*x1+(8.0*B+24.0*C) )/6.0;
60 }
61
62 static int8_t f32tosb8(float v)
63 {
64         return (int8_t)(v*127.0);
65 }
66
67 /*
68  * Implements the filtering as described in
69  * "Fast Third-Order Texture Filtering"
70  * Sigg & Hardwiger in GPU Gems 2
71  */
72 #define TABLE_SIZE 512
73 static void compute_filter_table(int8_t *t) {
74         int i;
75         float x;
76         for(i=0;i<TABLE_SIZE;i++) {
77                 x=(i+0.5)/TABLE_SIZE;
78
79                 float w0=filter_func(x+1.0);
80                 float w1=filter_func(x);
81                 float w2=filter_func(x-1.0);
82                 float w3=filter_func(x-2.0);
83
84                 t[4*i+2]=f32tosb8(1.0+x-w1/(w0+w1));
85                 t[4*i+1]=f32tosb8(1.0-x+w3/(w2+w3));
86                 t[4*i+0]=f32tosb8(w0+w1);
87                 t[4*i+3]=f32tosb8(0.0);
88         }
89 }
90
91 static struct nouveau_bo *table_mem = NULL;
92 static void
93 NV40_LoadFilterTable(ScrnInfoPtr pScrn)
94 {
95         NVPtr pNv = NVPTR(pScrn);
96
97         if (!table_mem) {
98                 if (nouveau_bo_new(pNv->dev, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART,
99                                 0, TABLE_SIZE*sizeof(float)*4, &table_mem)) {
100                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
101                                 "Couldn't alloc filter table!\n");
102                         return;
103                 }
104
105                 if (nouveau_bo_map(table_mem, NOUVEAU_BO_RDWR)) {
106                         xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
107                                    "Couldn't map filter table!\n");
108                         return;
109                 }
110
111                 int8_t *t=table_mem->map;
112                 compute_filter_table(t);
113
114                 nouveau_bo_unmap(table_mem);
115         }
116 }
117
118 #define SWIZZLE(ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w)                                \
119         (                                                                               \
120         NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y       |       \
121         NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w       |       \
122         NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y       |       \
123         NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w               \
124         )
125
126 /*
127  * Texture 0 : filter table
128  * Texture 1 : Y data
129  * Texture 2 : UV data
130  */
131 static Bool
132 NV40VideoTexture(ScrnInfoPtr pScrn, struct nouveau_bo *src, int offset,
133                  uint16_t width, uint16_t height, uint16_t src_pitch, int unit)
134 {
135         NVPtr pNv = NVPTR(pScrn);
136         struct nouveau_channel *chan = pNv->chan;
137         struct nouveau_grobj *curie = pNv->Nv3D;
138
139         uint32_t card_fmt = 0;
140         uint32_t card_swz = 0;
141
142         switch(unit) {
143         case 0:
144                 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8R8G8B8;
145                 card_swz = SWIZZLE(S1, S1, S1, S1, X, Y, Z, W);
146                 break;
147         case 1:
148                 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_L8;
149                 card_swz = SWIZZLE(S1, S1, S1, S1, X, X, X, X);
150                 break;
151         case 2:
152                 card_fmt = NV40TCL_TEX_FORMAT_FORMAT_A8L8;
153 #if X_BYTE_ORDER == X_BIG_ENDIAN
154                 card_swz = SWIZZLE(S1, S1, S1, S1, Z, W, X, Y); /* x = V, y = U */
155 #else
156                 card_swz = SWIZZLE(S1, S1, S1, S1, W, Z, Y, X); /* x = V, y = U */
157 #endif
158                 break;
159         }
160
161         BEGIN_RING(chan, curie, NV40TCL_TEX_OFFSET(unit), 8);
162         OUT_RELOCl(chan, src, offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
163         if (unit==0) {
164                 OUT_RELOCd(chan, pNv->FB, card_fmt | 
165                                  NV40TCL_TEX_FORMAT_DIMS_1D | 0x8000 |
166                                  NV40TCL_TEX_FORMAT_NO_BORDER |
167                                  (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
168                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
169                                  NV40TCL_TEX_FORMAT_DMA0, 0);
170                 OUT_RING  (chan, NV40TCL_TEX_WRAP_S_REPEAT |
171                                  NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
172                                  NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
173         } else {
174                 OUT_RELOCd(chan, pNv->FB, card_fmt | NV40TCL_TEX_FORMAT_LINEAR |
175                                  NV40TCL_TEX_FORMAT_RECT | 0x8000 |
176                                  NV40TCL_TEX_FORMAT_DIMS_2D |
177                                  NV40TCL_TEX_FORMAT_NO_BORDER |
178                                  (1 << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT),
179                                  NOUVEAU_BO_VRAM | NOUVEAU_BO_RD,
180                                  NV40TCL_TEX_FORMAT_DMA0, 0);
181                 OUT_RING  (chan, NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE |
182                                  NV40TCL_TEX_WRAP_T_CLAMP_TO_EDGE |
183                                  NV40TCL_TEX_WRAP_R_CLAMP_TO_EDGE);
184         }
185
186         OUT_RING  (chan, NV40TCL_TEX_ENABLE_ENABLE);
187         OUT_RING  (chan, card_swz);
188         if (unit == 0)
189                 OUT_RING  (chan, NV40TCL_TEX_FILTER_SIGNED_ALPHA |
190                                  NV40TCL_TEX_FILTER_SIGNED_RED |
191                                  NV40TCL_TEX_FILTER_SIGNED_GREEN |
192                                  NV40TCL_TEX_FILTER_SIGNED_BLUE |
193                                  NV40TCL_TEX_FILTER_MIN_LINEAR |
194                                  NV40TCL_TEX_FILTER_MAG_LINEAR | 0x3fd6);
195         else
196                 OUT_RING  (chan, NV40TCL_TEX_FILTER_MIN_LINEAR |
197                                  NV40TCL_TEX_FILTER_MAG_LINEAR | 0x3fd6);
198         OUT_RING  (chan, (width << 16) | height);
199         OUT_RING  (chan, 0); /* border ARGB */
200
201         BEGIN_RING(chan, curie, NV40TCL_TEX_SIZE1(unit), 1);
202         OUT_RING  (chan, (1 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) |
203                          (uint16_t) src_pitch);
204
205         return TRUE;
206 }
207
208 Bool
209 NV40GetSurfaceFormat(PixmapPtr ppix, int *fmt_ret)
210 {
211         switch (ppix->drawable.bitsPerPixel) {
212         case 32:
213                 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_A8R8G8B8;
214                 break;
215         case 24:
216                 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_X8R8G8B8;
217                 break;
218         case 16:
219                 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_R5G6B5;
220                 break;
221         case 8:
222                 *fmt_ret = NV40TCL_RT_FORMAT_COLOR_B8;
223                 break;
224         default:
225                 return FALSE;
226         }
227
228         return TRUE;
229 }
230
231 void
232 NV40StopTexturedVideo(ScrnInfoPtr pScrn, pointer data, Bool Exit)
233 {
234 }
235
236 #define VERTEX_OUT(sx,sy,dx,dy) do {                                           \
237         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2F_X(8), 4);                  \
238         OUT_RINGf (chan, (sx)); OUT_RINGf (chan, (sy));                        \
239         OUT_RINGf (chan, (sx)/2.0); OUT_RINGf (chan, (sy)/2.0);                \
240         BEGIN_RING(chan, curie, NV40TCL_VTX_ATTR_2I(0), 1);                    \
241         OUT_RING  (chan, ((dy)<<16)|(dx));                                     \
242 } while(0)
243
244 int
245 NV40PutTextureImage(ScrnInfoPtr pScrn,
246                     struct nouveau_bo *src, int src_offset, int src_offset2,
247                     int id, int src_pitch, BoxPtr dstBox,
248                     int x1, int y1, int x2, int y2,
249                     uint16_t width, uint16_t height,
250                     uint16_t src_w, uint16_t src_h,
251                     uint16_t drw_w, uint16_t drw_h,
252                     RegionPtr clipBoxes, PixmapPtr ppix,
253                     NVPortPrivPtr pPriv)
254 {
255         NVPtr pNv = NVPTR(pScrn);
256         struct nouveau_channel *chan = pNv->chan;
257         struct nouveau_grobj *curie = pNv->Nv3D;
258         Bool redirected = FALSE;
259         float X1, X2, Y1, Y2;
260         BoxPtr pbox;
261         int nbox;
262         int dst_format = 0;
263
264         if (drw_w > 4096 || drw_h > 4096) {
265                 xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
266                         "XV: Draw size too large.\n");
267                 return BadAlloc;
268         }
269
270         if (!NV40GetSurfaceFormat(ppix, &dst_format)) {
271                 ErrorF("No surface format, bad.\n");
272         }
273
274 #ifdef COMPOSITE
275         if (!NVExaPixmapIsOnscreen(ppix))
276                 redirected = TRUE;
277 #endif
278
279         pbox = REGION_RECTS(clipBoxes);
280         nbox = REGION_NUM_RECTS(clipBoxes);
281
282         /* Disable blending */
283         BEGIN_RING(chan, curie, NV40TCL_BLEND_ENABLE, 1);
284         OUT_RING  (chan, 0);
285
286         /* Setup surface */
287         BEGIN_RING(chan, curie, NV40TCL_RT_FORMAT, 3);
288         OUT_RING  (chan, NV40TCL_RT_FORMAT_TYPE_LINEAR |
289                          NV40TCL_RT_FORMAT_ZETA_Z24S8 | dst_format);
290         OUT_RING  (chan, exaGetPixmapPitch(ppix));
291         OUT_PIXMAPl(chan, ppix, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
292
293         NV40_LoadFilterTable(pScrn);
294
295         NV40VideoTexture(pScrn, table_mem, 0, TABLE_SIZE, 1, 0 , 0);
296         NV40VideoTexture(pScrn, src, src_offset, src_w, src_h, src_pitch, 1);
297         /* We've got NV12 format, which means half width and half height texture of chroma channels. */
298         NV40VideoTexture(pScrn, src, src_offset2, src_w/2, src_h/2, src_pitch, 2);
299
300         NV40_LoadVtxProg(pScrn, &nv40_vp_video);
301         if (pPriv->bicubic)
302                 NV40_LoadFragProg(pScrn, &nv40_fp_yv12_bicubic);
303         else
304                 NV40_LoadFragProg(pScrn, &nv30_fp_yv12_bilinear);
305
306         /* Appears to be some kind of cache flush, needed here at least
307          * sometimes.. funky text rendering otherwise :)
308          */
309         BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
310         OUT_RING  (chan, 2);
311         BEGIN_RING(chan, curie, NV40TCL_TEX_CACHE_CTL, 1);
312         OUT_RING  (chan, 1);
313
314         /* Just before rendering we wait for vblank in the non-composited case. */
315         if (pPriv->SyncToVBlank && !redirected) {
316                 uint8_t crtcs = nv_window_belongs_to_crtc(pScrn, dstBox->x1, dstBox->y1,
317                         dstBox->x2 - dstBox->x1, dstBox->y2 - dstBox->y1);
318
319                 FIRE_RING (chan);
320                 if (crtcs & 0x1)
321                         NVWaitVSync(pScrn, 0);
322                 else if (crtcs & 0x2)
323                         NVWaitVSync(pScrn, 1);
324         }
325
326         /* These are fixed point values in the 16.16 format. */
327         X1 = (float)(x1>>16)+(float)(x1&0xFFFF)/(float)0x10000;
328         Y1 = (float)(y1>>16)+(float)(y1&0xFFFF)/(float)0x10000;
329         X2 = (float)(x2>>16)+(float)(x2&0xFFFF)/(float)0x10000;
330         Y2 = (float)(y2>>16)+(float)(y2&0xFFFF)/(float)0x10000;
331
332         BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
333         OUT_RING  (chan, NV40TCL_BEGIN_END_TRIANGLES);
334
335         while(nbox--) {
336                 float tx1=X1+(float)(pbox->x1 - dstBox->x1)*(X2-X1)/(float)(drw_w);
337                 float tx2=X1+(float)(pbox->x2 - dstBox->x1)*(src_w)/(float)(drw_w);
338                 float ty1=Y1+(float)(pbox->y1 - dstBox->y1)*(Y2-Y1)/(float)(drw_h);
339                 float ty2=Y1+(float)(pbox->y2 - dstBox->y1)*(src_h)/(float)(drw_h);
340                 int sx1=pbox->x1;
341                 int sx2=pbox->x2;
342                 int sy1=pbox->y1;
343                 int sy2=pbox->y2;
344
345                 BEGIN_RING(chan, curie, NV40TCL_SCISSOR_HORIZ, 2);
346                 OUT_RING  (chan, (sx2 << 16) | 0);
347                 OUT_RING  (chan, (sy2 << 16) | 0);
348
349                 VERTEX_OUT(tx1, ty1, sx1, sy1);
350                 VERTEX_OUT(tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
351                 VERTEX_OUT(tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
352
353                 pbox++;
354         }
355
356         BEGIN_RING(chan, curie, NV40TCL_BEGIN_END, 1);
357         OUT_RING  (chan, NV40TCL_BEGIN_END_STOP);
358
359         FIRE_RING (chan);
360
361         return Success;
362 }
363
364 /**
365  * NV40SetTexturePortAttribute
366  * sets the attribute "attribute" of port "data" to value "value"
367  * supported attributes:
368  * Sync to vblank.
369  * 
370  * @param pScrenInfo
371  * @param attribute attribute to set
372  * @param value value to which attribute is to be set
373  * @param data port from which the attribute is to be set
374  * 
375  * @return Success, if setting is successful
376  * BadValue/BadMatch, if value/attribute are invalid
377  */
378 int
379 NV40SetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
380                        INT32 value, pointer data)
381 {
382         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
383         NVPtr           pNv = NVPTR(pScrn);
384
385         if ((attribute == xvSyncToVBlank) && pNv->WaitVSyncPossible) {
386                 if ((value < 0) || (value > 1))
387                         return BadValue;
388                 pPriv->SyncToVBlank = value;
389         } else
390         if (attribute == xvSetDefaults) {
391                 pPriv->SyncToVBlank = pNv->WaitVSyncPossible;
392         } else
393                 return BadMatch;
394
395         return Success;
396 }
397
398 /**
399  * NV40GetTexturePortAttribute
400  * reads the value of attribute "attribute" from port "data" into INT32 "*value"
401  * Sync to vblank.
402  * 
403  * @param pScrn unused
404  * @param attribute attribute to be read
405  * @param value value of attribute will be stored here
406  * @param data port from which attribute will be read
407  * @return Success, if queried attribute exists
408  */
409 int
410 NV40GetTexturePortAttribute(ScrnInfoPtr pScrn, Atom attribute,
411                        INT32 *value, pointer data)
412 {
413         NVPortPrivPtr pPriv = (NVPortPrivPtr)data;
414
415         if(attribute == xvSyncToVBlank)
416                 *value = (pPriv->SyncToVBlank) ? 1 : 0;
417         else
418                 return BadMatch;
419
420         return Success;
421 }
422