Merge branch 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
[linux-2.6] / drivers / video / nvidia / nv_accel.c
1  /***************************************************************************\
2 |*                                                                           *|
3 |*       Copyright 1993-2003 NVIDIA, Corporation.  All rights reserved.      *|
4 |*                                                                           *|
5 |*     NOTICE TO USER:   The source code  is copyrighted under  U.S. and     *|
6 |*     international laws.  Users and possessors of this source code are     *|
7 |*     hereby granted a nonexclusive,  royalty-free copyright license to     *|
8 |*     use this code in individual and commercial software.                  *|
9 |*                                                                           *|
10 |*     Any use of this source code must include,  in the user documenta-     *|
11 |*     tion and  internal comments to the code,  notices to the end user     *|
12 |*     as follows:                                                           *|
13 |*                                                                           *|
14 |*       Copyright 1993-2003 NVIDIA, Corporation.  All rights reserved.      *|
15 |*                                                                           *|
16 |*     NVIDIA, CORPORATION MAKES NO REPRESENTATION ABOUT THE SUITABILITY     *|
17 |*     OF  THIS SOURCE  CODE  FOR ANY PURPOSE.  IT IS  PROVIDED  "AS IS"     *|
18 |*     WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.  NVIDIA, CORPOR-     *|
19 |*     ATION DISCLAIMS ALL WARRANTIES  WITH REGARD  TO THIS SOURCE CODE,     *|
20 |*     INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, NONINFRINGE-     *|
21 |*     MENT,  AND FITNESS  FOR A PARTICULAR PURPOSE.   IN NO EVENT SHALL     *|
22 |*     NVIDIA, CORPORATION  BE LIABLE FOR ANY SPECIAL,  INDIRECT,  INCI-     *|
23 |*     DENTAL, OR CONSEQUENTIAL DAMAGES,  OR ANY DAMAGES  WHATSOEVER RE-     *|
24 |*     SULTING FROM LOSS OF USE,  DATA OR PROFITS,  WHETHER IN AN ACTION     *|
25 |*     OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF     *|
26 |*     OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOURCE CODE.     *|
27 |*                                                                           *|
28 |*     U.S. Government  End  Users.   This source code  is a "commercial     *|
29 |*     item,"  as that  term is  defined at  48 C.F.R. 2.101 (OCT 1995),     *|
30 |*     consisting  of "commercial  computer  software"  and  "commercial     *|
31 |*     computer  software  documentation,"  as such  terms  are  used in     *|
32 |*     48 C.F.R. 12.212 (SEPT 1995)  and is provided to the U.S. Govern-     *|
33 |*     ment only as  a commercial end item.   Consistent with  48 C.F.R.     *|
34 |*     12.212 and  48 C.F.R. 227.7202-1 through  227.7202-4 (JUNE 1995),     *|
35 |*     all U.S. Government End Users  acquire the source code  with only     *|
36 |*     those rights set forth herein.                                        *|
37 |*                                                                           *|
38  \***************************************************************************/
39
40 /*
41  * GPL Licensing Note - According to Mark Vojkovich, author of the Xorg/
42  * XFree86 'nv' driver, this source code is provided under MIT-style licensing
43  * where the source code is provided "as is" without warranty of any kind.
44  * The only usage restriction is for the copyright notices to be retained
45  * whenever code is used.
46  *
47  * Antonino Daplas <adaplas@pol.net> 2005-03-11
48  */
49
50 #include <linux/fb.h>
51 #include "nv_type.h"
52 #include "nv_proto.h"
53 #include "nv_dma.h"
54 #include "nv_local.h"
55
56 /* There is a HW race condition with videoram command buffers.
57    You can't jump to the location of your put offset.  We write put
58    at the jump offset + SKIPS dwords with noop padding in between
59    to solve this problem */
60 #define SKIPS  8
61
62 static const int NVCopyROP[16] = {
63         0xCC,                   /* copy   */
64         0x55                    /* invert */
65 };
66
67 static const int NVCopyROP_PM[16] = {
68         0xCA,                   /* copy  */
69         0x5A,                   /* invert */
70 };
71
72 static inline void NVFlush(struct nvidia_par *par)
73 {
74         int count = 1000000000;
75
76         while (--count && READ_GET(par) != par->dmaPut) ;
77
78         if (!count) {
79                 printk("nvidiafb: DMA Flush lockup\n");
80                 par->lockup = 1;
81         }
82 }
83
84 static inline void NVSync(struct nvidia_par *par)
85 {
86         int count = 1000000000;
87
88         while (--count && NV_RD32(par->PGRAPH, 0x0700)) ;
89
90         if (!count) {
91                 printk("nvidiafb: DMA Sync lockup\n");
92                 par->lockup = 1;
93         }
94 }
95
96 static void NVDmaKickoff(struct nvidia_par *par)
97 {
98         if (par->dmaCurrent != par->dmaPut) {
99                 par->dmaPut = par->dmaCurrent;
100                 WRITE_PUT(par, par->dmaPut);
101         }
102 }
103
104 static void NVDmaWait(struct nvidia_par *par, int size)
105 {
106         int dmaGet;
107         int count = 1000000000, cnt;
108         size++;
109
110         while (par->dmaFree < size && --count && !par->lockup) {
111                 dmaGet = READ_GET(par);
112
113                 if (par->dmaPut >= dmaGet) {
114                         par->dmaFree = par->dmaMax - par->dmaCurrent;
115                         if (par->dmaFree < size) {
116                                 NVDmaNext(par, 0x20000000);
117                                 if (dmaGet <= SKIPS) {
118                                         if (par->dmaPut <= SKIPS)
119                                                 WRITE_PUT(par, SKIPS + 1);
120                                         cnt = 1000000000;
121                                         do {
122                                                 dmaGet = READ_GET(par);
123                                         } while (--cnt && dmaGet <= SKIPS);
124                                         if (!cnt) {
125                                                 printk("DMA Get lockup\n");
126                                                 par->lockup = 1;
127                                         }
128                                 }
129                                 WRITE_PUT(par, SKIPS);
130                                 par->dmaCurrent = par->dmaPut = SKIPS;
131                                 par->dmaFree = dmaGet - (SKIPS + 1);
132                         }
133                 } else
134                         par->dmaFree = dmaGet - par->dmaCurrent - 1;
135         }
136
137         if (!count) {
138                 printk("DMA Wait Lockup\n");
139                 par->lockup = 1;
140         }
141 }
142
143 static void NVSetPattern(struct nvidia_par *par, u32 clr0, u32 clr1,
144                          u32 pat0, u32 pat1)
145 {
146         NVDmaStart(par, PATTERN_COLOR_0, 4);
147         NVDmaNext(par, clr0);
148         NVDmaNext(par, clr1);
149         NVDmaNext(par, pat0);
150         NVDmaNext(par, pat1);
151 }
152
153 static void NVSetRopSolid(struct nvidia_par *par, u32 rop, u32 planemask)
154 {
155         if (planemask != ~0) {
156                 NVSetPattern(par, 0, planemask, ~0, ~0);
157                 if (par->currentRop != (rop + 32)) {
158                         NVDmaStart(par, ROP_SET, 1);
159                         NVDmaNext(par, NVCopyROP_PM[rop]);
160                         par->currentRop = rop + 32;
161                 }
162         } else if (par->currentRop != rop) {
163                 if (par->currentRop >= 16)
164                         NVSetPattern(par, ~0, ~0, ~0, ~0);
165                 NVDmaStart(par, ROP_SET, 1);
166                 NVDmaNext(par, NVCopyROP[rop]);
167                 par->currentRop = rop;
168         }
169 }
170
171 static void NVSetClippingRectangle(struct fb_info *info, int x1, int y1,
172                                    int x2, int y2)
173 {
174         struct nvidia_par *par = info->par;
175         int h = y2 - y1 + 1;
176         int w = x2 - x1 + 1;
177
178         NVDmaStart(par, CLIP_POINT, 2);
179         NVDmaNext(par, (y1 << 16) | x1);
180         NVDmaNext(par, (h << 16) | w);
181 }
182
183 void NVResetGraphics(struct fb_info *info)
184 {
185         struct nvidia_par *par = info->par;
186         u32 surfaceFormat, patternFormat, rectFormat, lineFormat;
187         int pitch, i;
188
189         pitch = info->fix.line_length;
190
191         par->dmaBase = (u32 __iomem *) (&par->FbStart[par->FbUsableSize]);
192
193         for (i = 0; i < SKIPS; i++)
194                 NV_WR32(&par->dmaBase[i], 0, 0x00000000);
195
196         NV_WR32(&par->dmaBase[0x0 + SKIPS], 0, 0x00040000);
197         NV_WR32(&par->dmaBase[0x1 + SKIPS], 0, 0x80000010);
198         NV_WR32(&par->dmaBase[0x2 + SKIPS], 0, 0x00042000);
199         NV_WR32(&par->dmaBase[0x3 + SKIPS], 0, 0x80000011);
200         NV_WR32(&par->dmaBase[0x4 + SKIPS], 0, 0x00044000);
201         NV_WR32(&par->dmaBase[0x5 + SKIPS], 0, 0x80000012);
202         NV_WR32(&par->dmaBase[0x6 + SKIPS], 0, 0x00046000);
203         NV_WR32(&par->dmaBase[0x7 + SKIPS], 0, 0x80000013);
204         NV_WR32(&par->dmaBase[0x8 + SKIPS], 0, 0x00048000);
205         NV_WR32(&par->dmaBase[0x9 + SKIPS], 0, 0x80000014);
206         NV_WR32(&par->dmaBase[0xA + SKIPS], 0, 0x0004A000);
207         NV_WR32(&par->dmaBase[0xB + SKIPS], 0, 0x80000015);
208         NV_WR32(&par->dmaBase[0xC + SKIPS], 0, 0x0004C000);
209         NV_WR32(&par->dmaBase[0xD + SKIPS], 0, 0x80000016);
210         NV_WR32(&par->dmaBase[0xE + SKIPS], 0, 0x0004E000);
211         NV_WR32(&par->dmaBase[0xF + SKIPS], 0, 0x80000017);
212
213         par->dmaPut = 0;
214         par->dmaCurrent = 16 + SKIPS;
215         par->dmaMax = 8191;
216         par->dmaFree = par->dmaMax - par->dmaCurrent;
217
218         switch (info->var.bits_per_pixel) {
219         case 32:
220         case 24:
221                 surfaceFormat = SURFACE_FORMAT_DEPTH24;
222                 patternFormat = PATTERN_FORMAT_DEPTH24;
223                 rectFormat = RECT_FORMAT_DEPTH24;
224                 lineFormat = LINE_FORMAT_DEPTH24;
225                 break;
226         case 16:
227                 surfaceFormat = SURFACE_FORMAT_DEPTH16;
228                 patternFormat = PATTERN_FORMAT_DEPTH16;
229                 rectFormat = RECT_FORMAT_DEPTH16;
230                 lineFormat = LINE_FORMAT_DEPTH16;
231                 break;
232         default:
233                 surfaceFormat = SURFACE_FORMAT_DEPTH8;
234                 patternFormat = PATTERN_FORMAT_DEPTH8;
235                 rectFormat = RECT_FORMAT_DEPTH8;
236                 lineFormat = LINE_FORMAT_DEPTH8;
237                 break;
238         }
239
240         NVDmaStart(par, SURFACE_FORMAT, 4);
241         NVDmaNext(par, surfaceFormat);
242         NVDmaNext(par, pitch | (pitch << 16));
243         NVDmaNext(par, 0);
244         NVDmaNext(par, 0);
245
246         NVDmaStart(par, PATTERN_FORMAT, 1);
247         NVDmaNext(par, patternFormat);
248
249         NVDmaStart(par, RECT_FORMAT, 1);
250         NVDmaNext(par, rectFormat);
251
252         NVDmaStart(par, LINE_FORMAT, 1);
253         NVDmaNext(par, lineFormat);
254
255         par->currentRop = ~0;   /* set to something invalid */
256         NVSetRopSolid(par, ROP_COPY, ~0);
257
258         NVSetClippingRectangle(info, 0, 0, info->var.xres_virtual,
259                                info->var.yres_virtual);
260
261         NVDmaKickoff(par);
262 }
263
264 u8 byte_rev[256] = {
265         0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
266         0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
267         0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
268         0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
269         0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
270         0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
271         0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
272         0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
273         0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
274         0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
275         0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
276         0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
277         0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
278         0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
279         0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
280         0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
281         0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
282         0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
283         0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
284         0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
285         0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
286         0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
287         0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
288         0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
289         0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
290         0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
291         0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
292         0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
293         0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
294         0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
295         0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
296         0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
297 };
298
299 int nvidiafb_sync(struct fb_info *info)
300 {
301         struct nvidia_par *par = info->par;
302
303         if (info->state != FBINFO_STATE_RUNNING)
304                 return 0;
305
306         if (!par->lockup)
307                 NVFlush(par);
308
309         if (!par->lockup)
310                 NVSync(par);
311
312         return 0;
313 }
314
315 void nvidiafb_copyarea(struct fb_info *info, const struct fb_copyarea *region)
316 {
317         struct nvidia_par *par = info->par;
318
319         if (info->state != FBINFO_STATE_RUNNING)
320                 return;
321
322         if (par->lockup)
323                 return cfb_copyarea(info, region);
324
325         NVDmaStart(par, BLIT_POINT_SRC, 3);
326         NVDmaNext(par, (region->sy << 16) | region->sx);
327         NVDmaNext(par, (region->dy << 16) | region->dx);
328         NVDmaNext(par, (region->height << 16) | region->width);
329
330         NVDmaKickoff(par);
331 }
332
333 void nvidiafb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
334 {
335         struct nvidia_par *par = info->par;
336         u32 color;
337
338         if (info->state != FBINFO_STATE_RUNNING)
339                 return;
340
341         if (par->lockup)
342                 return cfb_fillrect(info, rect);
343
344         if (info->var.bits_per_pixel == 8)
345                 color = rect->color;
346         else
347                 color = ((u32 *) info->pseudo_palette)[rect->color];
348
349         if (rect->rop != ROP_COPY)
350                 NVSetRopSolid(par, rect->rop, ~0);
351
352         NVDmaStart(par, RECT_SOLID_COLOR, 1);
353         NVDmaNext(par, color);
354
355         NVDmaStart(par, RECT_SOLID_RECTS(0), 2);
356         NVDmaNext(par, (rect->dx << 16) | rect->dy);
357         NVDmaNext(par, (rect->width << 16) | rect->height);
358
359         NVDmaKickoff(par);
360
361         if (rect->rop != ROP_COPY)
362                 NVSetRopSolid(par, ROP_COPY, ~0);
363 }
364
365 static void nvidiafb_mono_color_expand(struct fb_info *info,
366                                        const struct fb_image *image)
367 {
368         struct nvidia_par *par = info->par;
369         u32 fg, bg, mask = ~(~0 >> (32 - info->var.bits_per_pixel));
370         u32 dsize, width, *data = (u32 *) image->data, tmp;
371         int j, k = 0;
372
373         width = (image->width + 31) & ~31;
374         dsize = (width * image->height) >> 5;
375
376         if (info->var.bits_per_pixel == 8) {
377                 fg = image->fg_color | mask;
378                 bg = image->bg_color | mask;
379         } else {
380                 fg = ((u32 *) info->pseudo_palette)[image->fg_color] | mask;
381                 bg = ((u32 *) info->pseudo_palette)[image->bg_color] | mask;
382         }
383
384         NVDmaStart(par, RECT_EXPAND_TWO_COLOR_CLIP, 7);
385         NVDmaNext(par, (image->dy << 16) | (image->dx & 0xffff));
386         NVDmaNext(par, ((image->dy + image->height) << 16) |
387                   ((image->dx + image->width) & 0xffff));
388         NVDmaNext(par, bg);
389         NVDmaNext(par, fg);
390         NVDmaNext(par, (image->height << 16) | width);
391         NVDmaNext(par, (image->height << 16) | width);
392         NVDmaNext(par, (image->dy << 16) | (image->dx & 0xffff));
393
394         while (dsize >= RECT_EXPAND_TWO_COLOR_DATA_MAX_DWORDS) {
395                 NVDmaStart(par, RECT_EXPAND_TWO_COLOR_DATA(0),
396                            RECT_EXPAND_TWO_COLOR_DATA_MAX_DWORDS);
397
398                 for (j = RECT_EXPAND_TWO_COLOR_DATA_MAX_DWORDS; j--;) {
399                         tmp = data[k++];
400                         reverse_order(&tmp);
401                         NVDmaNext(par, tmp);
402                 }
403
404                 dsize -= RECT_EXPAND_TWO_COLOR_DATA_MAX_DWORDS;
405         }
406
407         if (dsize) {
408                 NVDmaStart(par, RECT_EXPAND_TWO_COLOR_DATA(0), dsize);
409
410                 for (j = dsize; j--;) {
411                         tmp = data[k++];
412                         reverse_order(&tmp);
413                         NVDmaNext(par, tmp);
414                 }
415         }
416
417         NVDmaKickoff(par);
418 }
419
420 void nvidiafb_imageblit(struct fb_info *info, const struct fb_image *image)
421 {
422         struct nvidia_par *par = info->par;
423
424         if (info->state != FBINFO_STATE_RUNNING)
425                 return;
426
427         if (image->depth == 1 && !par->lockup)
428                 nvidiafb_mono_color_expand(info, image);
429         else
430                 cfb_imageblit(info, image);
431 }