Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / drivers / char / drm / r300_cmdbuf.c
1 /* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
2  *
3  * Copyright (C) The Weather Channel, Inc.  2002.
4  * Copyright (C) 2004 Nicolai Haehnle.
5  * All Rights Reserved.
6  *
7  * The Weather Channel (TM) funded Tungsten Graphics to develop the
8  * initial release of the Radeon 8500 driver under the XFree86 license.
9  * This notice must be preserved.
10  *
11  * Permission is hereby granted, free of charge, to any person obtaining a
12  * copy of this software and associated documentation files (the "Software"),
13  * to deal in the Software without restriction, including without limitation
14  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15  * and/or sell copies of the Software, and to permit persons to whom the
16  * Software is furnished to do so, subject to the following conditions:
17  *
18  * The above copyright notice and this permission notice (including the next
19  * paragraph) shall be included in all copies or substantial portions of the
20  * Software.
21  *
22  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
25  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
28  * DEALINGS IN THE SOFTWARE.
29  *
30  * Authors:
31  *    Nicolai Haehnle <prefect_@gmx.net>
32  */
33
34 #include "drmP.h"
35 #include "drm.h"
36 #include "radeon_drm.h"
37 #include "radeon_drv.h"
38 #include "r300_reg.h"
39
40 #define R300_SIMULTANEOUS_CLIPRECTS             4
41
42 /* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
43  */
44 static const int r300_cliprect_cntl[4] = {
45         0xAAAA,
46         0xEEEE,
47         0xFEFE,
48         0xFFFE
49 };
50
51 /**
52  * Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
53  * buffer, starting with index n.
54  */
55 static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
56                                drm_radeon_kcmd_buffer_t *cmdbuf, int n)
57 {
58         struct drm_clip_rect box;
59         int nr;
60         int i;
61         RING_LOCALS;
62
63         nr = cmdbuf->nbox - n;
64         if (nr > R300_SIMULTANEOUS_CLIPRECTS)
65                 nr = R300_SIMULTANEOUS_CLIPRECTS;
66
67         DRM_DEBUG("%i cliprects\n", nr);
68
69         if (nr) {
70                 BEGIN_RING(6 + nr * 2);
71                 OUT_RING(CP_PACKET0(R300_RE_CLIPRECT_TL_0, nr * 2 - 1));
72
73                 for (i = 0; i < nr; ++i) {
74                         if (DRM_COPY_FROM_USER_UNCHECKED
75                             (&box, &cmdbuf->boxes[n + i], sizeof(box))) {
76                                 DRM_ERROR("copy cliprect faulted\n");
77                                 return -EFAULT;
78                         }
79
80                         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
81                                 box.x1 = (box.x1) &
82                                         R300_CLIPRECT_MASK;
83                                 box.y1 = (box.y1) &
84                                         R300_CLIPRECT_MASK;
85                                 box.x2 = (box.x2) &
86                                         R300_CLIPRECT_MASK;
87                                 box.y2 = (box.y2) &
88                                         R300_CLIPRECT_MASK;
89                         } else {
90                                 box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) &
91                                         R300_CLIPRECT_MASK;
92                                 box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) &
93                                         R300_CLIPRECT_MASK;
94                                 box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) &
95                                         R300_CLIPRECT_MASK;
96                                 box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) &
97                                         R300_CLIPRECT_MASK;
98
99                         }
100                         OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
101                                  (box.y1 << R300_CLIPRECT_Y_SHIFT));
102                         OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
103                                  (box.y2 << R300_CLIPRECT_Y_SHIFT));
104
105                 }
106
107                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr - 1]);
108
109                 /* TODO/SECURITY: Force scissors to a safe value, otherwise the
110                  * client might be able to trample over memory.
111                  * The impact should be very limited, but I'd rather be safe than
112                  * sorry.
113                  */
114                 OUT_RING(CP_PACKET0(R300_RE_SCISSORS_TL, 1));
115                 OUT_RING(0);
116                 OUT_RING(R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK);
117                 ADVANCE_RING();
118         } else {
119                 /* Why we allow zero cliprect rendering:
120                  * There are some commands in a command buffer that must be submitted
121                  * even when there are no cliprects, e.g. DMA buffer discard
122                  * or state setting (though state setting could be avoided by
123                  * simulating a loss of context).
124                  *
125                  * Now since the cmdbuf interface is so chaotic right now (and is
126                  * bound to remain that way for a bit until things settle down),
127                  * it is basically impossible to filter out the commands that are
128                  * necessary and those that aren't.
129                  *
130                  * So I choose the safe way and don't do any filtering at all;
131                  * instead, I simply set up the engine so that all rendering
132                  * can't produce any fragments.
133                  */
134                 BEGIN_RING(2);
135                 OUT_RING_REG(R300_RE_CLIPRECT_CNTL, 0);
136                 ADVANCE_RING();
137         }
138
139         return 0;
140 }
141
142 static u8 r300_reg_flags[0x10000 >> 2];
143
144 void r300_init_reg_flags(struct drm_device *dev)
145 {
146         int i;
147         drm_radeon_private_t *dev_priv = dev->dev_private;
148
149         memset(r300_reg_flags, 0, 0x10000 >> 2);
150 #define ADD_RANGE_MARK(reg, count,mark) \
151                 for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
152                         r300_reg_flags[i]|=(mark);
153
154 #define MARK_SAFE               1
155 #define MARK_CHECK_OFFSET       2
156
157 #define ADD_RANGE(reg, count)   ADD_RANGE_MARK(reg, count, MARK_SAFE)
158
159         /* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
160         ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
161         ADD_RANGE(R300_VAP_CNTL, 1);
162         ADD_RANGE(R300_SE_VTE_CNTL, 2);
163         ADD_RANGE(0x2134, 2);
164         ADD_RANGE(R300_VAP_CNTL_STATUS, 1);
165         ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
166         ADD_RANGE(0x21DC, 1);
167         ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
168         ADD_RANGE(R300_VAP_CLIP_X_0, 4);
169         ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
170         ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
171         ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
172         ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
173         ADD_RANGE(R300_GB_ENABLE, 1);
174         ADD_RANGE(R300_GB_MSPOS0, 5);
175         ADD_RANGE(R300_TX_CNTL, 1);
176         ADD_RANGE(R300_TX_ENABLE, 1);
177         ADD_RANGE(0x4200, 4);
178         ADD_RANGE(0x4214, 1);
179         ADD_RANGE(R300_RE_POINTSIZE, 1);
180         ADD_RANGE(0x4230, 3);
181         ADD_RANGE(R300_RE_LINE_CNT, 1);
182         ADD_RANGE(R300_RE_UNK4238, 1);
183         ADD_RANGE(0x4260, 3);
184         ADD_RANGE(R300_RE_SHADE, 4);
185         ADD_RANGE(R300_RE_POLYGON_MODE, 5);
186         ADD_RANGE(R300_RE_ZBIAS_CNTL, 1);
187         ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
188         ADD_RANGE(R300_RE_OCCLUSION_CNTL, 1);
189         ADD_RANGE(R300_RE_CULL_CNTL, 1);
190         ADD_RANGE(0x42C0, 2);
191         ADD_RANGE(R300_RS_CNTL_0, 2);
192         ADD_RANGE(R300_RS_INTERP_0, 8);
193         ADD_RANGE(R300_RS_ROUTE_0, 8);
194         ADD_RANGE(0x43A4, 2);
195         ADD_RANGE(0x43E8, 1);
196         ADD_RANGE(R300_PFS_CNTL_0, 3);
197         ADD_RANGE(R300_PFS_NODE_0, 4);
198         ADD_RANGE(R300_PFS_TEXI_0, 64);
199         ADD_RANGE(0x46A4, 5);
200         ADD_RANGE(R300_PFS_INSTR0_0, 64);
201         ADD_RANGE(R300_PFS_INSTR1_0, 64);
202         ADD_RANGE(R300_PFS_INSTR2_0, 64);
203         ADD_RANGE(R300_PFS_INSTR3_0, 64);
204         ADD_RANGE(R300_RE_FOG_STATE, 1);
205         ADD_RANGE(R300_FOG_COLOR_R, 3);
206         ADD_RANGE(R300_PP_ALPHA_TEST, 2);
207         ADD_RANGE(0x4BD8, 1);
208         ADD_RANGE(R300_PFS_PARAM_0_X, 64);
209         ADD_RANGE(0x4E00, 1);
210         ADD_RANGE(R300_RB3D_CBLEND, 2);
211         ADD_RANGE(R300_RB3D_COLORMASK, 1);
212         ADD_RANGE(R300_RB3D_BLEND_COLOR, 3);
213         ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET);   /* check offset */
214         ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
215         ADD_RANGE(0x4E50, 9);
216         ADD_RANGE(0x4E88, 1);
217         ADD_RANGE(0x4EA0, 2);
218         ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
219         ADD_RANGE(R300_RB3D_ZSTENCIL_FORMAT, 4);
220         ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET);    /* check offset */
221         ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
222         ADD_RANGE(0x4F28, 1);
223         ADD_RANGE(0x4F30, 2);
224         ADD_RANGE(0x4F44, 1);
225         ADD_RANGE(0x4F54, 1);
226
227         ADD_RANGE(R300_TX_FILTER_0, 16);
228         ADD_RANGE(R300_TX_FILTER1_0, 16);
229         ADD_RANGE(R300_TX_SIZE_0, 16);
230         ADD_RANGE(R300_TX_FORMAT_0, 16);
231         ADD_RANGE(R300_TX_PITCH_0, 16);
232         /* Texture offset is dangerous and needs more checking */
233         ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
234         ADD_RANGE(R300_TX_CHROMA_KEY_0, 16);
235         ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
236
237         /* Sporadic registers used as primitives are emitted */
238         ADD_RANGE(R300_RB3D_ZCACHE_CTLSTAT, 1);
239         ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
240         ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
241         ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
242
243         if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_RV515) {
244                 ADD_RANGE(0x4074, 16);
245         }
246 }
247
248 static __inline__ int r300_check_range(unsigned reg, int count)
249 {
250         int i;
251         if (reg & ~0xffff)
252                 return -1;
253         for (i = (reg >> 2); i < (reg >> 2) + count; i++)
254                 if (r300_reg_flags[i] != MARK_SAFE)
255                         return 1;
256         return 0;
257 }
258
259 static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t *
260                                                           dev_priv,
261                                                           drm_radeon_kcmd_buffer_t
262                                                           * cmdbuf,
263                                                           drm_r300_cmd_header_t
264                                                           header)
265 {
266         int reg;
267         int sz;
268         int i;
269         int values[64];
270         RING_LOCALS;
271
272         sz = header.packet0.count;
273         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
274
275         if ((sz > 64) || (sz < 0)) {
276                 DRM_ERROR
277                     ("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n",
278                      reg, sz);
279                 return -EINVAL;
280         }
281         for (i = 0; i < sz; i++) {
282                 values[i] = ((int *)cmdbuf->buf)[i];
283                 switch (r300_reg_flags[(reg >> 2) + i]) {
284                 case MARK_SAFE:
285                         break;
286                 case MARK_CHECK_OFFSET:
287                         if (!radeon_check_offset(dev_priv, (u32) values[i])) {
288                                 DRM_ERROR
289                                     ("Offset failed range check (reg=%04x sz=%d)\n",
290                                      reg, sz);
291                                 return -EINVAL;
292                         }
293                         break;
294                 default:
295                         DRM_ERROR("Register %04x failed check as flag=%02x\n",
296                                   reg + i * 4, r300_reg_flags[(reg >> 2) + i]);
297                         return -EINVAL;
298                 }
299         }
300
301         BEGIN_RING(1 + sz);
302         OUT_RING(CP_PACKET0(reg, sz - 1));
303         OUT_RING_TABLE(values, sz);
304         ADVANCE_RING();
305
306         cmdbuf->buf += sz * 4;
307         cmdbuf->bufsz -= sz * 4;
308
309         return 0;
310 }
311
312 /**
313  * Emits a packet0 setting arbitrary registers.
314  * Called by r300_do_cp_cmdbuf.
315  *
316  * Note that checks are performed on contents and addresses of the registers
317  */
318 static __inline__ int r300_emit_packet0(drm_radeon_private_t *dev_priv,
319                                         drm_radeon_kcmd_buffer_t *cmdbuf,
320                                         drm_r300_cmd_header_t header)
321 {
322         int reg;
323         int sz;
324         RING_LOCALS;
325
326         sz = header.packet0.count;
327         reg = (header.packet0.reghi << 8) | header.packet0.reglo;
328
329         if (!sz)
330                 return 0;
331
332         if (sz * 4 > cmdbuf->bufsz)
333                 return -EINVAL;
334
335         if (reg + sz * 4 >= 0x10000) {
336                 DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg,
337                           sz);
338                 return -EINVAL;
339         }
340
341         if (r300_check_range(reg, sz)) {
342                 /* go and check everything */
343                 return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf,
344                                                            header);
345         }
346         /* the rest of the data is safe to emit, whatever the values the user passed */
347
348         BEGIN_RING(1 + sz);
349         OUT_RING(CP_PACKET0(reg, sz - 1));
350         OUT_RING_TABLE((int *)cmdbuf->buf, sz);
351         ADVANCE_RING();
352
353         cmdbuf->buf += sz * 4;
354         cmdbuf->bufsz -= sz * 4;
355
356         return 0;
357 }
358
359 /**
360  * Uploads user-supplied vertex program instructions or parameters onto
361  * the graphics card.
362  * Called by r300_do_cp_cmdbuf.
363  */
364 static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
365                                     drm_radeon_kcmd_buffer_t *cmdbuf,
366                                     drm_r300_cmd_header_t header)
367 {
368         int sz;
369         int addr;
370         RING_LOCALS;
371
372         sz = header.vpu.count;
373         addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
374
375         if (!sz)
376                 return 0;
377         if (sz * 16 > cmdbuf->bufsz)
378                 return -EINVAL;
379
380         BEGIN_RING(5 + sz * 4);
381         /* Wait for VAP to come to senses.. */
382         /* there is no need to emit it multiple times, (only once before VAP is programmed,
383            but this optimization is for later */
384         OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
385         OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
386         OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
387         OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
388
389         ADVANCE_RING();
390
391         cmdbuf->buf += sz * 16;
392         cmdbuf->bufsz -= sz * 16;
393
394         return 0;
395 }
396
397 /**
398  * Emit a clear packet from userspace.
399  * Called by r300_emit_packet3.
400  */
401 static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
402                                       drm_radeon_kcmd_buffer_t *cmdbuf)
403 {
404         RING_LOCALS;
405
406         if (8 * 4 > cmdbuf->bufsz)
407                 return -EINVAL;
408
409         BEGIN_RING(10);
410         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
411         OUT_RING(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
412                  (1 << R300_PRIM_NUM_VERTICES_SHIFT));
413         OUT_RING_TABLE((int *)cmdbuf->buf, 8);
414         ADVANCE_RING();
415
416         cmdbuf->buf += 8 * 4;
417         cmdbuf->bufsz -= 8 * 4;
418
419         return 0;
420 }
421
422 static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t *dev_priv,
423                                                drm_radeon_kcmd_buffer_t *cmdbuf,
424                                                u32 header)
425 {
426         int count, i, k;
427 #define MAX_ARRAY_PACKET  64
428         u32 payload[MAX_ARRAY_PACKET];
429         u32 narrays;
430         RING_LOCALS;
431
432         count = (header >> 16) & 0x3fff;
433
434         if ((count + 1) > MAX_ARRAY_PACKET) {
435                 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
436                           count);
437                 return -EINVAL;
438         }
439         memset(payload, 0, MAX_ARRAY_PACKET * 4);
440         memcpy(payload, cmdbuf->buf + 4, (count + 1) * 4);
441
442         /* carefully check packet contents */
443
444         narrays = payload[0];
445         k = 0;
446         i = 1;
447         while ((k < narrays) && (i < (count + 1))) {
448                 i++;            /* skip attribute field */
449                 if (!radeon_check_offset(dev_priv, payload[i])) {
450                         DRM_ERROR
451                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
452                              k, i);
453                         return -EINVAL;
454                 }
455                 k++;
456                 i++;
457                 if (k == narrays)
458                         break;
459                 /* have one more to process, they come in pairs */
460                 if (!radeon_check_offset(dev_priv, payload[i])) {
461                         DRM_ERROR
462                             ("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n",
463                              k, i);
464                         return -EINVAL;
465                 }
466                 k++;
467                 i++;
468         }
469         /* do the counts match what we expect ? */
470         if ((k != narrays) || (i != (count + 1))) {
471                 DRM_ERROR
472                     ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
473                      k, i, narrays, count + 1);
474                 return -EINVAL;
475         }
476
477         /* all clear, output packet */
478
479         BEGIN_RING(count + 2);
480         OUT_RING(header);
481         OUT_RING_TABLE(payload, count + 1);
482         ADVANCE_RING();
483
484         cmdbuf->buf += (count + 2) * 4;
485         cmdbuf->bufsz -= (count + 2) * 4;
486
487         return 0;
488 }
489
490 static __inline__ int r300_emit_bitblt_multi(drm_radeon_private_t *dev_priv,
491                                              drm_radeon_kcmd_buffer_t *cmdbuf)
492 {
493         u32 *cmd = (u32 *) cmdbuf->buf;
494         int count, ret;
495         RING_LOCALS;
496
497         count=(cmd[0]>>16) & 0x3fff;
498
499         if (cmd[0] & 0x8000) {
500                 u32 offset;
501
502                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
503                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
504                         offset = cmd[2] << 10;
505                         ret = !radeon_check_offset(dev_priv, offset);
506                         if (ret) {
507                                 DRM_ERROR("Invalid bitblt first offset is %08X\n", offset);
508                                 return -EINVAL;
509                         }
510                 }
511
512                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
513                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
514                         offset = cmd[3] << 10;
515                         ret = !radeon_check_offset(dev_priv, offset);
516                         if (ret) {
517                                 DRM_ERROR("Invalid bitblt second offset is %08X\n", offset);
518                                 return -EINVAL;
519                         }
520
521                 }
522         }
523
524         BEGIN_RING(count+2);
525         OUT_RING(cmd[0]);
526         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
527         ADVANCE_RING();
528
529         cmdbuf->buf += (count+2)*4;
530         cmdbuf->bufsz -= (count+2)*4;
531
532         return 0;
533 }
534
535 static __inline__ int r300_emit_indx_buffer(drm_radeon_private_t *dev_priv,
536                                              drm_radeon_kcmd_buffer_t *cmdbuf)
537 {
538         u32 *cmd = (u32 *) cmdbuf->buf;
539         int count, ret;
540         RING_LOCALS;
541
542         count=(cmd[0]>>16) & 0x3fff;
543
544         if ((cmd[1] & 0x8000ffff) != 0x80000810) {
545                 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
546                 return -EINVAL;
547         }
548         ret = !radeon_check_offset(dev_priv, cmd[2]);
549         if (ret) {
550                 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
551                 return -EINVAL;
552         }
553
554         BEGIN_RING(count+2);
555         OUT_RING(cmd[0]);
556         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
557         ADVANCE_RING();
558
559         cmdbuf->buf += (count+2)*4;
560         cmdbuf->bufsz -= (count+2)*4;
561
562         return 0;
563 }
564
565 static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
566                                             drm_radeon_kcmd_buffer_t *cmdbuf)
567 {
568         u32 header;
569         int count;
570         RING_LOCALS;
571
572         if (4 > cmdbuf->bufsz)
573                 return -EINVAL;
574
575         /* Fixme !! This simply emits a packet without much checking.
576            We need to be smarter. */
577
578         /* obtain first word - actual packet3 header */
579         header = *(u32 *) cmdbuf->buf;
580
581         /* Is it packet 3 ? */
582         if ((header >> 30) != 0x3) {
583                 DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
584                 return -EINVAL;
585         }
586
587         count = (header >> 16) & 0x3fff;
588
589         /* Check again now that we know how much data to expect */
590         if ((count + 2) * 4 > cmdbuf->bufsz) {
591                 DRM_ERROR
592                     ("Expected packet3 of length %d but have only %d bytes left\n",
593                      (count + 2) * 4, cmdbuf->bufsz);
594                 return -EINVAL;
595         }
596
597         /* Is it a packet type we know about ? */
598         switch (header & 0xff00) {
599         case RADEON_3D_LOAD_VBPNTR:     /* load vertex array pointers */
600                 return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
601
602         case RADEON_CNTL_BITBLT_MULTI:
603                 return r300_emit_bitblt_multi(dev_priv, cmdbuf);
604
605         case RADEON_CP_INDX_BUFFER:     /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
606                 return r300_emit_indx_buffer(dev_priv, cmdbuf);
607         case RADEON_CP_3D_DRAW_IMMD_2:  /* triggers drawing using in-packet vertex data */
608         case RADEON_CP_3D_DRAW_VBUF_2:  /* triggers drawing of vertex buffers setup elsewhere */
609         case RADEON_CP_3D_DRAW_INDX_2:  /* triggers drawing using indices to vertex buffer */
610         case RADEON_WAIT_FOR_IDLE:
611         case RADEON_CP_NOP:
612                 /* these packets are safe */
613                 break;
614         default:
615                 DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
616                 return -EINVAL;
617         }
618
619         BEGIN_RING(count + 2);
620         OUT_RING(header);
621         OUT_RING_TABLE((int *)(cmdbuf->buf + 4), count + 1);
622         ADVANCE_RING();
623
624         cmdbuf->buf += (count + 2) * 4;
625         cmdbuf->bufsz -= (count + 2) * 4;
626
627         return 0;
628 }
629
630 /**
631  * Emit a rendering packet3 from userspace.
632  * Called by r300_do_cp_cmdbuf.
633  */
634 static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
635                                         drm_radeon_kcmd_buffer_t *cmdbuf,
636                                         drm_r300_cmd_header_t header)
637 {
638         int n;
639         int ret;
640         char *orig_buf = cmdbuf->buf;
641         int orig_bufsz = cmdbuf->bufsz;
642
643         /* This is a do-while-loop so that we run the interior at least once,
644          * even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
645          */
646         n = 0;
647         do {
648                 if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
649                         ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
650                         if (ret)
651                                 return ret;
652
653                         cmdbuf->buf = orig_buf;
654                         cmdbuf->bufsz = orig_bufsz;
655                 }
656
657                 switch (header.packet3.packet) {
658                 case R300_CMD_PACKET3_CLEAR:
659                         DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
660                         ret = r300_emit_clear(dev_priv, cmdbuf);
661                         if (ret) {
662                                 DRM_ERROR("r300_emit_clear failed\n");
663                                 return ret;
664                         }
665                         break;
666
667                 case R300_CMD_PACKET3_RAW:
668                         DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
669                         ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
670                         if (ret) {
671                                 DRM_ERROR("r300_emit_raw_packet3 failed\n");
672                                 return ret;
673                         }
674                         break;
675
676                 default:
677                         DRM_ERROR("bad packet3 type %i at %p\n",
678                                   header.packet3.packet,
679                                   cmdbuf->buf - sizeof(header));
680                         return -EINVAL;
681                 }
682
683                 n += R300_SIMULTANEOUS_CLIPRECTS;
684         } while (n < cmdbuf->nbox);
685
686         return 0;
687 }
688
689 /* Some of the R300 chips seem to be extremely touchy about the two registers
690  * that are configured in r300_pacify.
691  * Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
692  * sends a command buffer that contains only state setting commands and a
693  * vertex program/parameter upload sequence, this will eventually lead to a
694  * lockup, unless the sequence is bracketed by calls to r300_pacify.
695  * So we should take great care to *always* call r300_pacify before
696  * *anything* 3D related, and again afterwards. This is what the
697  * call bracket in r300_do_cp_cmdbuf is for.
698  */
699
700 /**
701  * Emit the sequence to pacify R300.
702  */
703 static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
704 {
705         RING_LOCALS;
706
707         BEGIN_RING(6);
708         OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
709         OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
710         OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));
711         OUT_RING(R300_RB3D_ZCACHE_UNKNOWN_03);
712         OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
713         OUT_RING(0x0);
714         ADVANCE_RING();
715 }
716
717 /**
718  * Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
719  * The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
720  * be careful about how this function is called.
721  */
722 static void r300_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
723 {
724         drm_radeon_private_t *dev_priv = dev->dev_private;
725         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
726
727         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
728         buf->pending = 1;
729         buf->used = 0;
730 }
731
732 static int r300_scratch(drm_radeon_private_t *dev_priv,
733                         drm_radeon_kcmd_buffer_t *cmdbuf,
734                         drm_r300_cmd_header_t header)
735 {
736         u32 *ref_age_base;
737         u32 i, buf_idx, h_pending;
738         RING_LOCALS;
739
740         if (cmdbuf->bufsz <
741             (sizeof(u64) + header.scratch.n_bufs * sizeof(buf_idx))) {
742                 return -EINVAL;
743         }
744
745         if (header.scratch.reg >= 5) {
746                 return -EINVAL;
747         }
748
749         dev_priv->scratch_ages[header.scratch.reg]++;
750
751         ref_age_base =  (u32 *)(unsigned long)*((uint64_t *)cmdbuf->buf);
752
753         cmdbuf->buf += sizeof(u64);
754         cmdbuf->bufsz -= sizeof(u64);
755
756         for (i=0; i < header.scratch.n_bufs; i++) {
757                 buf_idx = *(u32 *)cmdbuf->buf;
758                 buf_idx *= 2; /* 8 bytes per buf */
759
760                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx, &dev_priv->scratch_ages[header.scratch.reg], sizeof(u32))) {
761                         return -EINVAL;
762                 }
763
764                 if (DRM_COPY_FROM_USER(&h_pending, ref_age_base + buf_idx + 1, sizeof(u32))) {
765                         return -EINVAL;
766                 }
767
768                 if (h_pending == 0) {
769                         return -EINVAL;
770                 }
771
772                 h_pending--;
773
774                 if (DRM_COPY_TO_USER(ref_age_base + buf_idx + 1, &h_pending, sizeof(u32))) {
775                         return -EINVAL;
776                 }
777
778                 cmdbuf->buf += sizeof(buf_idx);
779                 cmdbuf->bufsz -= sizeof(buf_idx);
780         }
781
782         BEGIN_RING(2);
783         OUT_RING( CP_PACKET0( RADEON_SCRATCH_REG0 + header.scratch.reg * 4, 0 ) );
784         OUT_RING( dev_priv->scratch_ages[header.scratch.reg] );
785         ADVANCE_RING();
786
787         return 0;
788 }
789
790 /**
791  * Parses and validates a user-supplied command buffer and emits appropriate
792  * commands on the DMA ring buffer.
793  * Called by the ioctl handler function radeon_cp_cmdbuf.
794  */
795 int r300_do_cp_cmdbuf(struct drm_device *dev,
796                       struct drm_file *file_priv,
797                       drm_radeon_kcmd_buffer_t *cmdbuf)
798 {
799         drm_radeon_private_t *dev_priv = dev->dev_private;
800         struct drm_device_dma *dma = dev->dma;
801         struct drm_buf *buf = NULL;
802         int emit_dispatch_age = 0;
803         int ret = 0;
804
805         DRM_DEBUG("\n");
806
807         /* See the comment above r300_emit_begin3d for why this call must be here,
808          * and what the cleanup gotos are for. */
809         r300_pacify(dev_priv);
810
811         if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
812                 ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
813                 if (ret)
814                         goto cleanup;
815         }
816
817         while (cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
818                 int idx;
819                 drm_r300_cmd_header_t header;
820
821                 header.u = *(unsigned int *)cmdbuf->buf;
822
823                 cmdbuf->buf += sizeof(header);
824                 cmdbuf->bufsz -= sizeof(header);
825
826                 switch (header.header.cmd_type) {
827                 case R300_CMD_PACKET0:
828                         DRM_DEBUG("R300_CMD_PACKET0\n");
829                         ret = r300_emit_packet0(dev_priv, cmdbuf, header);
830                         if (ret) {
831                                 DRM_ERROR("r300_emit_packet0 failed\n");
832                                 goto cleanup;
833                         }
834                         break;
835
836                 case R300_CMD_VPU:
837                         DRM_DEBUG("R300_CMD_VPU\n");
838                         ret = r300_emit_vpu(dev_priv, cmdbuf, header);
839                         if (ret) {
840                                 DRM_ERROR("r300_emit_vpu failed\n");
841                                 goto cleanup;
842                         }
843                         break;
844
845                 case R300_CMD_PACKET3:
846                         DRM_DEBUG("R300_CMD_PACKET3\n");
847                         ret = r300_emit_packet3(dev_priv, cmdbuf, header);
848                         if (ret) {
849                                 DRM_ERROR("r300_emit_packet3 failed\n");
850                                 goto cleanup;
851                         }
852                         break;
853
854                 case R300_CMD_END3D:
855                         DRM_DEBUG("R300_CMD_END3D\n");
856                         /* TODO:
857                            Ideally userspace driver should not need to issue this call,
858                            i.e. the drm driver should issue it automatically and prevent
859                            lockups.
860
861                            In practice, we do not understand why this call is needed and what
862                            it does (except for some vague guesses that it has to do with cache
863                            coherence) and so the user space driver does it.
864
865                            Once we are sure which uses prevent lockups the code could be moved
866                            into the kernel and the userspace driver will not
867                            need to use this command.
868
869                            Note that issuing this command does not hurt anything
870                            except, possibly, performance */
871                         r300_pacify(dev_priv);
872                         break;
873
874                 case R300_CMD_CP_DELAY:
875                         /* simple enough, we can do it here */
876                         DRM_DEBUG("R300_CMD_CP_DELAY\n");
877                         {
878                                 int i;
879                                 RING_LOCALS;
880
881                                 BEGIN_RING(header.delay.count);
882                                 for (i = 0; i < header.delay.count; i++)
883                                         OUT_RING(RADEON_CP_PACKET2);
884                                 ADVANCE_RING();
885                         }
886                         break;
887
888                 case R300_CMD_DMA_DISCARD:
889                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
890                         idx = header.dma.buf_idx;
891                         if (idx < 0 || idx >= dma->buf_count) {
892                                 DRM_ERROR("buffer index %d (of %d max)\n",
893                                           idx, dma->buf_count - 1);
894                                 ret = -EINVAL;
895                                 goto cleanup;
896                         }
897
898                         buf = dma->buflist[idx];
899                         if (buf->file_priv != file_priv || buf->pending) {
900                                 DRM_ERROR("bad buffer %p %p %d\n",
901                                           buf->file_priv, file_priv,
902                                           buf->pending);
903                                 ret = -EINVAL;
904                                 goto cleanup;
905                         }
906
907                         emit_dispatch_age = 1;
908                         r300_discard_buffer(dev, buf);
909                         break;
910
911                 case R300_CMD_WAIT:
912                         /* simple enough, we can do it here */
913                         DRM_DEBUG("R300_CMD_WAIT\n");
914                         if (header.wait.flags == 0)
915                                 break;  /* nothing to do */
916
917                         {
918                                 RING_LOCALS;
919
920                                 BEGIN_RING(2);
921                                 OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
922                                 OUT_RING((header.wait.flags & 0xf) << 14);
923                                 ADVANCE_RING();
924                         }
925                         break;
926
927                 case R300_CMD_SCRATCH:
928                         DRM_DEBUG("R300_CMD_SCRATCH\n");
929                         ret = r300_scratch(dev_priv, cmdbuf, header);
930                         if (ret) {
931                                 DRM_ERROR("r300_scratch failed\n");
932                                 goto cleanup;
933                         }
934                         break;
935
936                 default:
937                         DRM_ERROR("bad cmd_type %i at %p\n",
938                                   header.header.cmd_type,
939                                   cmdbuf->buf - sizeof(header));
940                         ret = -EINVAL;
941                         goto cleanup;
942                 }
943         }
944
945         DRM_DEBUG("END\n");
946
947       cleanup:
948         r300_pacify(dev_priv);
949
950         /* We emit the vertex buffer age here, outside the pacifier "brackets"
951          * for two reasons:
952          *  (1) This may coalesce multiple age emissions into a single one and
953          *  (2) more importantly, some chips lock up hard when scratch registers
954          *      are written inside the pacifier bracket.
955          */
956         if (emit_dispatch_age) {
957                 RING_LOCALS;
958
959                 /* Emit the vertex buffer age */
960                 BEGIN_RING(2);
961                 RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
962                 ADVANCE_RING();
963         }
964
965         COMMIT_RING();
966
967         return ret;
968 }