V4L/DVB (5809): Use mutex instead of semaphore in Philips webcam driver
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     struct drm_file * filp_priv,
43                                                     u32 *offset)
44 {
45         u64 off = *offset;
46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47         struct drm_radeon_driver_file_fields *radeon_priv;
48
49         /* Hrm ... the story of the offset ... So this function converts
50          * the various ideas of what userland clients might have for an
51          * offset in the card address space into an offset into the card
52          * address space :) So with a sane client, it should just keep
53          * the value intact and just do some boundary checking. However,
54          * not all clients are sane. Some older clients pass us 0 based
55          * offsets relative to the start of the framebuffer and some may
56          * assume the AGP aperture it appended to the framebuffer, so we
57          * try to detect those cases and fix them up.
58          *
59          * Note: It might be a good idea here to make sure the offset lands
60          * in some "allowed" area to protect things like the PCIE GART...
61          */
62
63         /* First, the best case, the offset already lands in either the
64          * framebuffer or the GART mapped space
65          */
66         if (radeon_check_offset(dev_priv, off))
67                 return 0;
68
69         /* Ok, that didn't happen... now check if we have a zero based
70          * offset that fits in the framebuffer + gart space, apply the
71          * magic offset we get from SETPARAM or calculated from fb_location
72          */
73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74                 radeon_priv = filp_priv->driver_priv;
75                 off += radeon_priv->radeon_fb_delta;
76         }
77
78         /* Finally, assume we aimed at a GART offset if beyond the fb */
79         if (off > fb_end)
80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
81
82         /* Now recheck and fail if out of bounds */
83         if (radeon_check_offset(dev_priv, off)) {
84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85                 *offset = off;
86                 return 0;
87         }
88         return DRM_ERR(EINVAL);
89 }
90
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92                                                      dev_priv,
93                                                      struct drm_file * filp_priv,
94                                                      int id, u32 *data)
95 {
96         switch (id) {
97
98         case RADEON_EMIT_PP_MISC:
99                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101                         DRM_ERROR("Invalid depth buffer offset\n");
102                         return DRM_ERR(EINVAL);
103                 }
104                 break;
105
106         case RADEON_EMIT_PP_CNTL:
107                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
109                         DRM_ERROR("Invalid colour buffer offset\n");
110                         return DRM_ERR(EINVAL);
111                 }
112                 break;
113
114         case R200_EMIT_PP_TXOFFSET_0:
115         case R200_EMIT_PP_TXOFFSET_1:
116         case R200_EMIT_PP_TXOFFSET_2:
117         case R200_EMIT_PP_TXOFFSET_3:
118         case R200_EMIT_PP_TXOFFSET_4:
119         case R200_EMIT_PP_TXOFFSET_5:
120                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
121                                                   &data[0])) {
122                         DRM_ERROR("Invalid R200 texture offset\n");
123                         return DRM_ERR(EINVAL);
124                 }
125                 break;
126
127         case RADEON_EMIT_PP_TXFILTER_0:
128         case RADEON_EMIT_PP_TXFILTER_1:
129         case RADEON_EMIT_PP_TXFILTER_2:
130                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
132                         DRM_ERROR("Invalid R100 texture offset\n");
133                         return DRM_ERR(EINVAL);
134                 }
135                 break;
136
137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
143                         int i;
144                         for (i = 0; i < 5; i++) {
145                                 if (radeon_check_and_fixup_offset(dev_priv,
146                                                                   filp_priv,
147                                                                   &data[i])) {
148                                         DRM_ERROR
149                                             ("Invalid R200 cubic texture offset\n");
150                                         return DRM_ERR(EINVAL);
151                                 }
152                         }
153                         break;
154                 }
155
156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
159                         int i;
160                         for (i = 0; i < 5; i++) {
161                                 if (radeon_check_and_fixup_offset(dev_priv,
162                                                                   filp_priv,
163                                                                   &data[i])) {
164                                         DRM_ERROR
165                                             ("Invalid R100 cubic texture offset\n");
166                                         return DRM_ERR(EINVAL);
167                                 }
168                         }
169                 }
170                 break;
171
172         case R200_EMIT_VAP_CTL:{
173                         RING_LOCALS;
174                         BEGIN_RING(2);
175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
176                         ADVANCE_RING();
177                 }
178                 break;
179
180         case RADEON_EMIT_RB3D_COLORPITCH:
181         case RADEON_EMIT_RE_LINE_PATTERN:
182         case RADEON_EMIT_SE_LINE_WIDTH:
183         case RADEON_EMIT_PP_LUM_MATRIX:
184         case RADEON_EMIT_PP_ROT_MATRIX_0:
185         case RADEON_EMIT_RB3D_STENCILREFMASK:
186         case RADEON_EMIT_SE_VPORT_XSCALE:
187         case RADEON_EMIT_SE_CNTL:
188         case RADEON_EMIT_SE_CNTL_STATUS:
189         case RADEON_EMIT_RE_MISC:
190         case RADEON_EMIT_PP_BORDER_COLOR_0:
191         case RADEON_EMIT_PP_BORDER_COLOR_1:
192         case RADEON_EMIT_PP_BORDER_COLOR_2:
193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
196         case R200_EMIT_PP_TXCBLEND_0:
197         case R200_EMIT_PP_TXCBLEND_1:
198         case R200_EMIT_PP_TXCBLEND_2:
199         case R200_EMIT_PP_TXCBLEND_3:
200         case R200_EMIT_PP_TXCBLEND_4:
201         case R200_EMIT_PP_TXCBLEND_5:
202         case R200_EMIT_PP_TXCBLEND_6:
203         case R200_EMIT_PP_TXCBLEND_7:
204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
205         case R200_EMIT_TFACTOR_0:
206         case R200_EMIT_VTX_FMT_0:
207         case R200_EMIT_MATRIX_SELECT_0:
208         case R200_EMIT_TEX_PROC_CTL_2:
209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
210         case R200_EMIT_PP_TXFILTER_0:
211         case R200_EMIT_PP_TXFILTER_1:
212         case R200_EMIT_PP_TXFILTER_2:
213         case R200_EMIT_PP_TXFILTER_3:
214         case R200_EMIT_PP_TXFILTER_4:
215         case R200_EMIT_PP_TXFILTER_5:
216         case R200_EMIT_VTE_CNTL:
217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
218         case R200_EMIT_PP_TAM_DEBUG3:
219         case R200_EMIT_PP_CNTL_X:
220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
222         case R200_EMIT_RE_SCISSOR_TL_0:
223         case R200_EMIT_RE_SCISSOR_TL_1:
224         case R200_EMIT_RE_SCISSOR_TL_2:
225         case R200_EMIT_SE_VAP_CNTL_STATUS:
226         case R200_EMIT_SE_VTX_STATE_CNTL:
227         case R200_EMIT_RE_POINTSIZE:
228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
229         case R200_EMIT_PP_CUBIC_FACES_0:
230         case R200_EMIT_PP_CUBIC_FACES_1:
231         case R200_EMIT_PP_CUBIC_FACES_2:
232         case R200_EMIT_PP_CUBIC_FACES_3:
233         case R200_EMIT_PP_CUBIC_FACES_4:
234         case R200_EMIT_PP_CUBIC_FACES_5:
235         case RADEON_EMIT_PP_TEX_SIZE_0:
236         case RADEON_EMIT_PP_TEX_SIZE_1:
237         case RADEON_EMIT_PP_TEX_SIZE_2:
238         case R200_EMIT_RB3D_BLENDCOLOR:
239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
240         case RADEON_EMIT_PP_CUBIC_FACES_0:
241         case RADEON_EMIT_PP_CUBIC_FACES_1:
242         case RADEON_EMIT_PP_CUBIC_FACES_2:
243         case R200_EMIT_PP_TRI_PERF_CNTL:
244         case R200_EMIT_PP_AFS_0:
245         case R200_EMIT_PP_AFS_1:
246         case R200_EMIT_ATF_TFACTOR:
247         case R200_EMIT_PP_TXCTLALL_0:
248         case R200_EMIT_PP_TXCTLALL_1:
249         case R200_EMIT_PP_TXCTLALL_2:
250         case R200_EMIT_PP_TXCTLALL_3:
251         case R200_EMIT_PP_TXCTLALL_4:
252         case R200_EMIT_PP_TXCTLALL_5:
253         case R200_EMIT_VAP_PVS_CNTL:
254                 /* These packets don't contain memory offsets */
255                 break;
256
257         default:
258                 DRM_ERROR("Unknown state packet ID %d\n", id);
259                 return DRM_ERR(EINVAL);
260         }
261
262         return 0;
263 }
264
265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
266                                                      dev_priv,
267                                                      struct drm_file *filp_priv,
268                                                      drm_radeon_kcmd_buffer_t *
269                                                      cmdbuf,
270                                                      unsigned int *cmdsz)
271 {
272         u32 *cmd = (u32 *) cmdbuf->buf;
273         u32 offset, narrays;
274         int count, i, k;
275
276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
277
278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
279                 DRM_ERROR("Not a type 3 packet\n");
280                 return DRM_ERR(EINVAL);
281         }
282
283         if (4 * *cmdsz > cmdbuf->bufsz) {
284                 DRM_ERROR("Packet size larger than size of data provided\n");
285                 return DRM_ERR(EINVAL);
286         }
287
288         switch(cmd[0] & 0xff00) {
289         /* XXX Are there old drivers needing other packets? */
290
291         case RADEON_3D_DRAW_IMMD:
292         case RADEON_3D_DRAW_VBUF:
293         case RADEON_3D_DRAW_INDX:
294         case RADEON_WAIT_FOR_IDLE:
295         case RADEON_CP_NOP:
296         case RADEON_3D_CLEAR_ZMASK:
297 /*      case RADEON_CP_NEXT_CHAR:
298         case RADEON_CP_PLY_NEXTSCAN:
299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
300                 /* these packets are safe */
301                 break;
302
303         case RADEON_CP_3D_DRAW_IMMD_2:
304         case RADEON_CP_3D_DRAW_VBUF_2:
305         case RADEON_CP_3D_DRAW_INDX_2:
306         case RADEON_3D_CLEAR_HIZ:
307                 /* safe but r200 only */
308                 if (dev_priv->microcode_version != UCODE_R200) {
309                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
310                         return DRM_ERR(EINVAL);
311                 }
312                 break;
313
314         case RADEON_3D_LOAD_VBPNTR:
315                 count = (cmd[0] >> 16) & 0x3fff;
316
317                 if (count > 18) { /* 12 arrays max */
318                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
319                                   count);
320                         return DRM_ERR(EINVAL);
321                 }
322
323                 /* carefully check packet contents */
324                 narrays = cmd[1] & ~0xc000;
325                 k = 0;
326                 i = 2;
327                 while ((k < narrays) && (i < (count + 2))) {
328                         i++;            /* skip attribute field */
329                         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[i])) {
330                                 DRM_ERROR
331                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
332                                      k, i);
333                                 return DRM_ERR(EINVAL);
334                         }
335                         k++;
336                         i++;
337                         if (k == narrays)
338                                 break;
339                         /* have one more to process, they come in pairs */
340                         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[i])) {
341                                 DRM_ERROR
342                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
343                                      k, i);
344                                 return DRM_ERR(EINVAL);
345                         }
346                         k++;
347                         i++;
348                 }
349                 /* do the counts match what we expect ? */
350                 if ((k != narrays) || (i != (count + 2))) {
351                         DRM_ERROR
352                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
353                               k, i, narrays, count + 1);
354                         return DRM_ERR(EINVAL);
355                 }
356                 break;
357
358         case RADEON_3D_RNDR_GEN_INDX_PRIM:
359                 if (dev_priv->microcode_version != UCODE_R100) {
360                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
361                         return DRM_ERR(EINVAL);
362                 }
363                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[1])) {
364                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
365                                 return DRM_ERR(EINVAL);
366                 }
367                 break;
368
369         case RADEON_CP_INDX_BUFFER:
370                 if (dev_priv->microcode_version != UCODE_R200) {
371                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
372                         return DRM_ERR(EINVAL);
373                 }
374                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
375                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
376                         return DRM_ERR(EINVAL);
377                 }
378                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[2])) {
379                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
380                         return DRM_ERR(EINVAL);
381                 }
382                 break;
383
384         case RADEON_CNTL_HOSTDATA_BLT:
385         case RADEON_CNTL_PAINT_MULTI:
386         case RADEON_CNTL_BITBLT_MULTI:
387                 /* MSB of opcode: next DWORD GUI_CNTL */
388                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
389                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
390                         offset = cmd[2] << 10;
391                         if (radeon_check_and_fixup_offset
392                             (dev_priv, filp_priv, &offset)) {
393                                 DRM_ERROR("Invalid first packet offset\n");
394                                 return DRM_ERR(EINVAL);
395                         }
396                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
397                 }
398
399                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
400                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
401                         offset = cmd[3] << 10;
402                         if (radeon_check_and_fixup_offset
403                             (dev_priv, filp_priv, &offset)) {
404                                 DRM_ERROR("Invalid second packet offset\n");
405                                 return DRM_ERR(EINVAL);
406                         }
407                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
408                 }
409                 break;
410
411         default:
412                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
413                 return DRM_ERR(EINVAL);
414         }
415
416         return 0;
417 }
418
419 /* ================================================================
420  * CP hardware state programming functions
421  */
422
423 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
424                                              struct drm_clip_rect * box)
425 {
426         RING_LOCALS;
427
428         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
429                   box->x1, box->y1, box->x2, box->y2);
430
431         BEGIN_RING(4);
432         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
433         OUT_RING((box->y1 << 16) | box->x1);
434         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
435         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
436         ADVANCE_RING();
437 }
438
439 /* Emit 1.1 state
440  */
441 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
442                              struct drm_file * filp_priv,
443                              drm_radeon_context_regs_t * ctx,
444                              drm_radeon_texture_regs_t * tex,
445                              unsigned int dirty)
446 {
447         RING_LOCALS;
448         DRM_DEBUG("dirty=0x%08x\n", dirty);
449
450         if (dirty & RADEON_UPLOAD_CONTEXT) {
451                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
452                                                   &ctx->rb3d_depthoffset)) {
453                         DRM_ERROR("Invalid depth buffer offset\n");
454                         return DRM_ERR(EINVAL);
455                 }
456
457                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
458                                                   &ctx->rb3d_coloroffset)) {
459                         DRM_ERROR("Invalid depth buffer offset\n");
460                         return DRM_ERR(EINVAL);
461                 }
462
463                 BEGIN_RING(14);
464                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
465                 OUT_RING(ctx->pp_misc);
466                 OUT_RING(ctx->pp_fog_color);
467                 OUT_RING(ctx->re_solid_color);
468                 OUT_RING(ctx->rb3d_blendcntl);
469                 OUT_RING(ctx->rb3d_depthoffset);
470                 OUT_RING(ctx->rb3d_depthpitch);
471                 OUT_RING(ctx->rb3d_zstencilcntl);
472                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
473                 OUT_RING(ctx->pp_cntl);
474                 OUT_RING(ctx->rb3d_cntl);
475                 OUT_RING(ctx->rb3d_coloroffset);
476                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
477                 OUT_RING(ctx->rb3d_colorpitch);
478                 ADVANCE_RING();
479         }
480
481         if (dirty & RADEON_UPLOAD_VERTFMT) {
482                 BEGIN_RING(2);
483                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
484                 OUT_RING(ctx->se_coord_fmt);
485                 ADVANCE_RING();
486         }
487
488         if (dirty & RADEON_UPLOAD_LINE) {
489                 BEGIN_RING(5);
490                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
491                 OUT_RING(ctx->re_line_pattern);
492                 OUT_RING(ctx->re_line_state);
493                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
494                 OUT_RING(ctx->se_line_width);
495                 ADVANCE_RING();
496         }
497
498         if (dirty & RADEON_UPLOAD_BUMPMAP) {
499                 BEGIN_RING(5);
500                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
501                 OUT_RING(ctx->pp_lum_matrix);
502                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
503                 OUT_RING(ctx->pp_rot_matrix_0);
504                 OUT_RING(ctx->pp_rot_matrix_1);
505                 ADVANCE_RING();
506         }
507
508         if (dirty & RADEON_UPLOAD_MASKS) {
509                 BEGIN_RING(4);
510                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
511                 OUT_RING(ctx->rb3d_stencilrefmask);
512                 OUT_RING(ctx->rb3d_ropcntl);
513                 OUT_RING(ctx->rb3d_planemask);
514                 ADVANCE_RING();
515         }
516
517         if (dirty & RADEON_UPLOAD_VIEWPORT) {
518                 BEGIN_RING(7);
519                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
520                 OUT_RING(ctx->se_vport_xscale);
521                 OUT_RING(ctx->se_vport_xoffset);
522                 OUT_RING(ctx->se_vport_yscale);
523                 OUT_RING(ctx->se_vport_yoffset);
524                 OUT_RING(ctx->se_vport_zscale);
525                 OUT_RING(ctx->se_vport_zoffset);
526                 ADVANCE_RING();
527         }
528
529         if (dirty & RADEON_UPLOAD_SETUP) {
530                 BEGIN_RING(4);
531                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
532                 OUT_RING(ctx->se_cntl);
533                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
534                 OUT_RING(ctx->se_cntl_status);
535                 ADVANCE_RING();
536         }
537
538         if (dirty & RADEON_UPLOAD_MISC) {
539                 BEGIN_RING(2);
540                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
541                 OUT_RING(ctx->re_misc);
542                 ADVANCE_RING();
543         }
544
545         if (dirty & RADEON_UPLOAD_TEX0) {
546                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
547                                                   &tex[0].pp_txoffset)) {
548                         DRM_ERROR("Invalid texture offset for unit 0\n");
549                         return DRM_ERR(EINVAL);
550                 }
551
552                 BEGIN_RING(9);
553                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
554                 OUT_RING(tex[0].pp_txfilter);
555                 OUT_RING(tex[0].pp_txformat);
556                 OUT_RING(tex[0].pp_txoffset);
557                 OUT_RING(tex[0].pp_txcblend);
558                 OUT_RING(tex[0].pp_txablend);
559                 OUT_RING(tex[0].pp_tfactor);
560                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
561                 OUT_RING(tex[0].pp_border_color);
562                 ADVANCE_RING();
563         }
564
565         if (dirty & RADEON_UPLOAD_TEX1) {
566                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
567                                                   &tex[1].pp_txoffset)) {
568                         DRM_ERROR("Invalid texture offset for unit 1\n");
569                         return DRM_ERR(EINVAL);
570                 }
571
572                 BEGIN_RING(9);
573                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
574                 OUT_RING(tex[1].pp_txfilter);
575                 OUT_RING(tex[1].pp_txformat);
576                 OUT_RING(tex[1].pp_txoffset);
577                 OUT_RING(tex[1].pp_txcblend);
578                 OUT_RING(tex[1].pp_txablend);
579                 OUT_RING(tex[1].pp_tfactor);
580                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
581                 OUT_RING(tex[1].pp_border_color);
582                 ADVANCE_RING();
583         }
584
585         if (dirty & RADEON_UPLOAD_TEX2) {
586                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
587                                                   &tex[2].pp_txoffset)) {
588                         DRM_ERROR("Invalid texture offset for unit 2\n");
589                         return DRM_ERR(EINVAL);
590                 }
591
592                 BEGIN_RING(9);
593                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
594                 OUT_RING(tex[2].pp_txfilter);
595                 OUT_RING(tex[2].pp_txformat);
596                 OUT_RING(tex[2].pp_txoffset);
597                 OUT_RING(tex[2].pp_txcblend);
598                 OUT_RING(tex[2].pp_txablend);
599                 OUT_RING(tex[2].pp_tfactor);
600                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
601                 OUT_RING(tex[2].pp_border_color);
602                 ADVANCE_RING();
603         }
604
605         return 0;
606 }
607
608 /* Emit 1.2 state
609  */
610 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
611                               struct drm_file * filp_priv,
612                               drm_radeon_state_t * state)
613 {
614         RING_LOCALS;
615
616         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
617                 BEGIN_RING(3);
618                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
619                 OUT_RING(state->context2.se_zbias_factor);
620                 OUT_RING(state->context2.se_zbias_constant);
621                 ADVANCE_RING();
622         }
623
624         return radeon_emit_state(dev_priv, filp_priv, &state->context,
625                                  state->tex, state->dirty);
626 }
627
628 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
629  * 1.3 cmdbuffers allow all previous state to be updated as well as
630  * the tcl scalar and vector areas.
631  */
632 static struct {
633         int start;
634         int len;
635         const char *name;
636 } packet[RADEON_MAX_STATE_PACKETS] = {
637         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
638         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
639         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
640         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
641         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
642         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
643         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
644         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
645         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
646         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
647         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
648         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
649         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
650         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
651         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
652         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
653         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
654         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
655         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
656         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
657         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
658                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
659         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
660         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
661         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
662         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
663         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
664         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
665         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
666         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
667         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
668         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
669         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
670         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
671         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
672         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
673         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
674         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
675         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
676         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
677         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
678         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
679         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
680         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
681         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
682         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
683         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
684         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
685         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
686         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
687         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
688          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
689         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
690         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
691         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
692         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
693         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
694         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
695         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
696         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
697         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
698         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
699         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
700                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
701         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
702         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
703         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
704         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
705         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
706         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
707         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
708         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
709         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
710         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
711         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
712         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
713         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
714         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
715         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
716         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
717         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
718         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
719         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
720         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
721         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
722         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
723         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
724         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
725         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
726         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
727         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
728         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
729         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
730         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
731         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
732         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
733         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
734         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
735 };
736
737 /* ================================================================
738  * Performance monitoring functions
739  */
740
741 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
742                              int x, int y, int w, int h, int r, int g, int b)
743 {
744         u32 color;
745         RING_LOCALS;
746
747         x += dev_priv->sarea_priv->boxes[0].x1;
748         y += dev_priv->sarea_priv->boxes[0].y1;
749
750         switch (dev_priv->color_fmt) {
751         case RADEON_COLOR_FORMAT_RGB565:
752                 color = (((r & 0xf8) << 8) |
753                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
754                 break;
755         case RADEON_COLOR_FORMAT_ARGB8888:
756         default:
757                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
758                 break;
759         }
760
761         BEGIN_RING(4);
762         RADEON_WAIT_UNTIL_3D_IDLE();
763         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
764         OUT_RING(0xffffffff);
765         ADVANCE_RING();
766
767         BEGIN_RING(6);
768
769         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
770         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
771                  RADEON_GMC_BRUSH_SOLID_COLOR |
772                  (dev_priv->color_fmt << 8) |
773                  RADEON_GMC_SRC_DATATYPE_COLOR |
774                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
775
776         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
777                 OUT_RING(dev_priv->front_pitch_offset);
778         } else {
779                 OUT_RING(dev_priv->back_pitch_offset);
780         }
781
782         OUT_RING(color);
783
784         OUT_RING((x << 16) | y);
785         OUT_RING((w << 16) | h);
786
787         ADVANCE_RING();
788 }
789
790 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
791 {
792         /* Collapse various things into a wait flag -- trying to
793          * guess if userspase slept -- better just to have them tell us.
794          */
795         if (dev_priv->stats.last_frame_reads > 1 ||
796             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
797                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
798         }
799
800         if (dev_priv->stats.freelist_loops) {
801                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
802         }
803
804         /* Purple box for page flipping
805          */
806         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
807                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
808
809         /* Red box if we have to wait for idle at any point
810          */
811         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
812                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
813
814         /* Blue box: lost context?
815          */
816
817         /* Yellow box for texture swaps
818          */
819         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
820                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
821
822         /* Green box if hardware never idles (as far as we can tell)
823          */
824         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
825                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
826
827         /* Draw bars indicating number of buffers allocated
828          * (not a great measure, easily confused)
829          */
830         if (dev_priv->stats.requested_bufs) {
831                 if (dev_priv->stats.requested_bufs > 100)
832                         dev_priv->stats.requested_bufs = 100;
833
834                 radeon_clear_box(dev_priv, 4, 16,
835                                  dev_priv->stats.requested_bufs, 4,
836                                  196, 128, 128);
837         }
838
839         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
840
841 }
842
843 /* ================================================================
844  * CP command dispatch functions
845  */
846
847 static void radeon_cp_dispatch_clear(struct drm_device * dev,
848                                      drm_radeon_clear_t * clear,
849                                      drm_radeon_clear_rect_t * depth_boxes)
850 {
851         drm_radeon_private_t *dev_priv = dev->dev_private;
852         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
853         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
854         int nbox = sarea_priv->nbox;
855         struct drm_clip_rect *pbox = sarea_priv->boxes;
856         unsigned int flags = clear->flags;
857         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
858         int i;
859         RING_LOCALS;
860         DRM_DEBUG("flags = 0x%x\n", flags);
861
862         dev_priv->stats.clears++;
863
864         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
865                 unsigned int tmp = flags;
866
867                 flags &= ~(RADEON_FRONT | RADEON_BACK);
868                 if (tmp & RADEON_FRONT)
869                         flags |= RADEON_BACK;
870                 if (tmp & RADEON_BACK)
871                         flags |= RADEON_FRONT;
872         }
873
874         if (flags & (RADEON_FRONT | RADEON_BACK)) {
875
876                 BEGIN_RING(4);
877
878                 /* Ensure the 3D stream is idle before doing a
879                  * 2D fill to clear the front or back buffer.
880                  */
881                 RADEON_WAIT_UNTIL_3D_IDLE();
882
883                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
884                 OUT_RING(clear->color_mask);
885
886                 ADVANCE_RING();
887
888                 /* Make sure we restore the 3D state next time.
889                  */
890                 dev_priv->sarea_priv->ctx_owner = 0;
891
892                 for (i = 0; i < nbox; i++) {
893                         int x = pbox[i].x1;
894                         int y = pbox[i].y1;
895                         int w = pbox[i].x2 - x;
896                         int h = pbox[i].y2 - y;
897
898                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
899                                   x, y, w, h, flags);
900
901                         if (flags & RADEON_FRONT) {
902                                 BEGIN_RING(6);
903
904                                 OUT_RING(CP_PACKET3
905                                          (RADEON_CNTL_PAINT_MULTI, 4));
906                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
907                                          RADEON_GMC_BRUSH_SOLID_COLOR |
908                                          (dev_priv->
909                                           color_fmt << 8) |
910                                          RADEON_GMC_SRC_DATATYPE_COLOR |
911                                          RADEON_ROP3_P |
912                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
913
914                                 OUT_RING(dev_priv->front_pitch_offset);
915                                 OUT_RING(clear->clear_color);
916
917                                 OUT_RING((x << 16) | y);
918                                 OUT_RING((w << 16) | h);
919
920                                 ADVANCE_RING();
921                         }
922
923                         if (flags & RADEON_BACK) {
924                                 BEGIN_RING(6);
925
926                                 OUT_RING(CP_PACKET3
927                                          (RADEON_CNTL_PAINT_MULTI, 4));
928                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
929                                          RADEON_GMC_BRUSH_SOLID_COLOR |
930                                          (dev_priv->
931                                           color_fmt << 8) |
932                                          RADEON_GMC_SRC_DATATYPE_COLOR |
933                                          RADEON_ROP3_P |
934                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
935
936                                 OUT_RING(dev_priv->back_pitch_offset);
937                                 OUT_RING(clear->clear_color);
938
939                                 OUT_RING((x << 16) | y);
940                                 OUT_RING((w << 16) | h);
941
942                                 ADVANCE_RING();
943                         }
944                 }
945         }
946
947         /* hyper z clear */
948         /* no docs available, based on reverse engeneering by Stephane Marchesin */
949         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
950             && (flags & RADEON_CLEAR_FASTZ)) {
951
952                 int i;
953                 int depthpixperline =
954                     dev_priv->depth_fmt ==
955                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
956                                                        2) : (dev_priv->
957                                                              depth_pitch / 4);
958
959                 u32 clearmask;
960
961                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
962                     ((clear->depth_mask & 0xff) << 24);
963
964                 /* Make sure we restore the 3D state next time.
965                  * we haven't touched any "normal" state - still need this?
966                  */
967                 dev_priv->sarea_priv->ctx_owner = 0;
968
969                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
970                     && (flags & RADEON_USE_HIERZ)) {
971                         /* FIXME : reverse engineer that for Rx00 cards */
972                         /* FIXME : the mask supposedly contains low-res z values. So can't set
973                            just to the max (0xff? or actually 0x3fff?), need to take z clear
974                            value into account? */
975                         /* pattern seems to work for r100, though get slight
976                            rendering errors with glxgears. If hierz is not enabled for r100,
977                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
978                            other ones are ignored, and the same clear mask can be used. That's
979                            very different behaviour than R200 which needs different clear mask
980                            and different number of tiles to clear if hierz is enabled or not !?!
981                          */
982                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
983                 } else {
984                         /* clear mask : chooses the clearing pattern.
985                            rv250: could be used to clear only parts of macrotiles
986                            (but that would get really complicated...)?
987                            bit 0 and 1 (either or both of them ?!?!) are used to
988                            not clear tile (or maybe one of the bits indicates if the tile is
989                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
990                            Pattern is as follows:
991                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
992                            bits -------------------------------------------------
993                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
994                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
995                            covers 256 pixels ?!?
996                          */
997                         clearmask = 0x0;
998                 }
999
1000                 BEGIN_RING(8);
1001                 RADEON_WAIT_UNTIL_2D_IDLE();
1002                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1003                              tempRB3D_DEPTHCLEARVALUE);
1004                 /* what offset is this exactly ? */
1005                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1006                 /* need ctlstat, otherwise get some strange black flickering */
1007                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1008                              RADEON_RB3D_ZC_FLUSH_ALL);
1009                 ADVANCE_RING();
1010
1011                 for (i = 0; i < nbox; i++) {
1012                         int tileoffset, nrtilesx, nrtilesy, j;
1013                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1014                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
1015                             && !(dev_priv->microcode_version == UCODE_R200)) {
1016                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1017                                    maybe r200 actually doesn't need to put the low-res z value into
1018                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
1019                                    Works for R100, both with hierz and without.
1020                                    R100 seems to operate on 2x1 8x8 tiles, but...
1021                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1022                                    problematic with resolutions which are not 64 pix aligned? */
1023                                 tileoffset =
1024                                     ((pbox[i].y1 >> 3) * depthpixperline +
1025                                      pbox[i].x1) >> 6;
1026                                 nrtilesx =
1027                                     ((pbox[i].x2 & ~63) -
1028                                      (pbox[i].x1 & ~63)) >> 4;
1029                                 nrtilesy =
1030                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1031                                 for (j = 0; j <= nrtilesy; j++) {
1032                                         BEGIN_RING(4);
1033                                         OUT_RING(CP_PACKET3
1034                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1035                                         /* first tile */
1036                                         OUT_RING(tileoffset * 8);
1037                                         /* the number of tiles to clear */
1038                                         OUT_RING(nrtilesx + 4);
1039                                         /* clear mask : chooses the clearing pattern. */
1040                                         OUT_RING(clearmask);
1041                                         ADVANCE_RING();
1042                                         tileoffset += depthpixperline >> 6;
1043                                 }
1044                         } else if (dev_priv->microcode_version == UCODE_R200) {
1045                                 /* works for rv250. */
1046                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1047                                 tileoffset =
1048                                     ((pbox[i].y1 >> 3) * depthpixperline +
1049                                      pbox[i].x1) >> 5;
1050                                 nrtilesx =
1051                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1052                                 nrtilesy =
1053                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1054                                 for (j = 0; j <= nrtilesy; j++) {
1055                                         BEGIN_RING(4);
1056                                         OUT_RING(CP_PACKET3
1057                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1058                                         /* first tile */
1059                                         /* judging by the first tile offset needed, could possibly
1060                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
1061                                            macro tiles, though would still need clear mask for
1062                                            right/bottom if truely 4x4 granularity is desired ? */
1063                                         OUT_RING(tileoffset * 16);
1064                                         /* the number of tiles to clear */
1065                                         OUT_RING(nrtilesx + 1);
1066                                         /* clear mask : chooses the clearing pattern. */
1067                                         OUT_RING(clearmask);
1068                                         ADVANCE_RING();
1069                                         tileoffset += depthpixperline >> 5;
1070                                 }
1071                         } else {        /* rv 100 */
1072                                 /* rv100 might not need 64 pix alignment, who knows */
1073                                 /* offsets are, hmm, weird */
1074                                 tileoffset =
1075                                     ((pbox[i].y1 >> 4) * depthpixperline +
1076                                      pbox[i].x1) >> 6;
1077                                 nrtilesx =
1078                                     ((pbox[i].x2 & ~63) -
1079                                      (pbox[i].x1 & ~63)) >> 4;
1080                                 nrtilesy =
1081                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1082                                 for (j = 0; j <= nrtilesy; j++) {
1083                                         BEGIN_RING(4);
1084                                         OUT_RING(CP_PACKET3
1085                                                  (RADEON_3D_CLEAR_ZMASK, 2));
1086                                         OUT_RING(tileoffset * 128);
1087                                         /* the number of tiles to clear */
1088                                         OUT_RING(nrtilesx + 4);
1089                                         /* clear mask : chooses the clearing pattern. */
1090                                         OUT_RING(clearmask);
1091                                         ADVANCE_RING();
1092                                         tileoffset += depthpixperline >> 6;
1093                                 }
1094                         }
1095                 }
1096
1097                 /* TODO don't always clear all hi-level z tiles */
1098                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1099                     && (dev_priv->microcode_version == UCODE_R200)
1100                     && (flags & RADEON_USE_HIERZ))
1101                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1102                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1103                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1104                            value into account? */
1105                 {
1106                         BEGIN_RING(4);
1107                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1108                         OUT_RING(0x0);  /* First tile */
1109                         OUT_RING(0x3cc0);
1110                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1111                         ADVANCE_RING();
1112                 }
1113         }
1114
1115         /* We have to clear the depth and/or stencil buffers by
1116          * rendering a quad into just those buffers.  Thus, we have to
1117          * make sure the 3D engine is configured correctly.
1118          */
1119         else if ((dev_priv->microcode_version == UCODE_R200) &&
1120                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1121
1122                 int tempPP_CNTL;
1123                 int tempRE_CNTL;
1124                 int tempRB3D_CNTL;
1125                 int tempRB3D_ZSTENCILCNTL;
1126                 int tempRB3D_STENCILREFMASK;
1127                 int tempRB3D_PLANEMASK;
1128                 int tempSE_CNTL;
1129                 int tempSE_VTE_CNTL;
1130                 int tempSE_VTX_FMT_0;
1131                 int tempSE_VTX_FMT_1;
1132                 int tempSE_VAP_CNTL;
1133                 int tempRE_AUX_SCISSOR_CNTL;
1134
1135                 tempPP_CNTL = 0;
1136                 tempRE_CNTL = 0;
1137
1138                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1139
1140                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1141                 tempRB3D_STENCILREFMASK = 0x0;
1142
1143                 tempSE_CNTL = depth_clear->se_cntl;
1144
1145                 /* Disable TCL */
1146
1147                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1148                                           (0x9 <<
1149                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1150
1151                 tempRB3D_PLANEMASK = 0x0;
1152
1153                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1154
1155                 tempSE_VTE_CNTL =
1156                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1157
1158                 /* Vertex format (X, Y, Z, W) */
1159                 tempSE_VTX_FMT_0 =
1160                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1161                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1162                 tempSE_VTX_FMT_1 = 0x0;
1163
1164                 /*
1165                  * Depth buffer specific enables
1166                  */
1167                 if (flags & RADEON_DEPTH) {
1168                         /* Enable depth buffer */
1169                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1170                 } else {
1171                         /* Disable depth buffer */
1172                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1173                 }
1174
1175                 /*
1176                  * Stencil buffer specific enables
1177                  */
1178                 if (flags & RADEON_STENCIL) {
1179                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1180                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1181                 } else {
1182                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1183                         tempRB3D_STENCILREFMASK = 0x00000000;
1184                 }
1185
1186                 if (flags & RADEON_USE_COMP_ZBUF) {
1187                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1188                             RADEON_Z_DECOMPRESSION_ENABLE;
1189                 }
1190                 if (flags & RADEON_USE_HIERZ) {
1191                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1192                 }
1193
1194                 BEGIN_RING(26);
1195                 RADEON_WAIT_UNTIL_2D_IDLE();
1196
1197                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1198                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1199                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1200                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1201                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1202                              tempRB3D_STENCILREFMASK);
1203                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1204                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1205                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1206                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1207                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1208                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1209                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1210                 ADVANCE_RING();
1211
1212                 /* Make sure we restore the 3D state next time.
1213                  */
1214                 dev_priv->sarea_priv->ctx_owner = 0;
1215
1216                 for (i = 0; i < nbox; i++) {
1217
1218                         /* Funny that this should be required --
1219                          *  sets top-left?
1220                          */
1221                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1222
1223                         BEGIN_RING(14);
1224                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1225                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1226                                   RADEON_PRIM_WALK_RING |
1227                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1228                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1229                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1230                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1231                         OUT_RING(0x3f800000);
1232                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1233                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1235                         OUT_RING(0x3f800000);
1236                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1239                         OUT_RING(0x3f800000);
1240                         ADVANCE_RING();
1241                 }
1242         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1243
1244                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1245
1246                 rb3d_cntl = depth_clear->rb3d_cntl;
1247
1248                 if (flags & RADEON_DEPTH) {
1249                         rb3d_cntl |= RADEON_Z_ENABLE;
1250                 } else {
1251                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1252                 }
1253
1254                 if (flags & RADEON_STENCIL) {
1255                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1256                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1257                 } else {
1258                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1259                         rb3d_stencilrefmask = 0x00000000;
1260                 }
1261
1262                 if (flags & RADEON_USE_COMP_ZBUF) {
1263                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1264                             RADEON_Z_DECOMPRESSION_ENABLE;
1265                 }
1266                 if (flags & RADEON_USE_HIERZ) {
1267                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1268                 }
1269
1270                 BEGIN_RING(13);
1271                 RADEON_WAIT_UNTIL_2D_IDLE();
1272
1273                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1274                 OUT_RING(0x00000000);
1275                 OUT_RING(rb3d_cntl);
1276
1277                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1278                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1279                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1280                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1281                 ADVANCE_RING();
1282
1283                 /* Make sure we restore the 3D state next time.
1284                  */
1285                 dev_priv->sarea_priv->ctx_owner = 0;
1286
1287                 for (i = 0; i < nbox; i++) {
1288
1289                         /* Funny that this should be required --
1290                          *  sets top-left?
1291                          */
1292                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1293
1294                         BEGIN_RING(15);
1295
1296                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1297                         OUT_RING(RADEON_VTX_Z_PRESENT |
1298                                  RADEON_VTX_PKCOLOR_PRESENT);
1299                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1300                                   RADEON_PRIM_WALK_RING |
1301                                   RADEON_MAOS_ENABLE |
1302                                   RADEON_VTX_FMT_RADEON_MODE |
1303                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1304
1305                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1306                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1307                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1308                         OUT_RING(0x0);
1309
1310                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1311                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1313                         OUT_RING(0x0);
1314
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1318                         OUT_RING(0x0);
1319
1320                         ADVANCE_RING();
1321                 }
1322         }
1323
1324         /* Increment the clear counter.  The client-side 3D driver must
1325          * wait on this value before performing the clear ioctl.  We
1326          * need this because the card's so damned fast...
1327          */
1328         dev_priv->sarea_priv->last_clear++;
1329
1330         BEGIN_RING(4);
1331
1332         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1333         RADEON_WAIT_UNTIL_IDLE();
1334
1335         ADVANCE_RING();
1336 }
1337
1338 static void radeon_cp_dispatch_swap(struct drm_device * dev)
1339 {
1340         drm_radeon_private_t *dev_priv = dev->dev_private;
1341         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1342         int nbox = sarea_priv->nbox;
1343         struct drm_clip_rect *pbox = sarea_priv->boxes;
1344         int i;
1345         RING_LOCALS;
1346         DRM_DEBUG("\n");
1347
1348         /* Do some trivial performance monitoring...
1349          */
1350         if (dev_priv->do_boxes)
1351                 radeon_cp_performance_boxes(dev_priv);
1352
1353         /* Wait for the 3D stream to idle before dispatching the bitblt.
1354          * This will prevent data corruption between the two streams.
1355          */
1356         BEGIN_RING(2);
1357
1358         RADEON_WAIT_UNTIL_3D_IDLE();
1359
1360         ADVANCE_RING();
1361
1362         for (i = 0; i < nbox; i++) {
1363                 int x = pbox[i].x1;
1364                 int y = pbox[i].y1;
1365                 int w = pbox[i].x2 - x;
1366                 int h = pbox[i].y2 - y;
1367
1368                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1369
1370                 BEGIN_RING(9);
1371
1372                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1373                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1374                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1375                          RADEON_GMC_BRUSH_NONE |
1376                          (dev_priv->color_fmt << 8) |
1377                          RADEON_GMC_SRC_DATATYPE_COLOR |
1378                          RADEON_ROP3_S |
1379                          RADEON_DP_SRC_SOURCE_MEMORY |
1380                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1381
1382                 /* Make this work even if front & back are flipped:
1383                  */
1384                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1385                 if (dev_priv->sarea_priv->pfCurrentPage == 0) {
1386                         OUT_RING(dev_priv->back_pitch_offset);
1387                         OUT_RING(dev_priv->front_pitch_offset);
1388                 } else {
1389                         OUT_RING(dev_priv->front_pitch_offset);
1390                         OUT_RING(dev_priv->back_pitch_offset);
1391                 }
1392
1393                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1394                 OUT_RING((x << 16) | y);
1395                 OUT_RING((x << 16) | y);
1396                 OUT_RING((w << 16) | h);
1397
1398                 ADVANCE_RING();
1399         }
1400
1401         /* Increment the frame counter.  The client-side 3D driver must
1402          * throttle the framerate by waiting for this value before
1403          * performing the swapbuffer ioctl.
1404          */
1405         dev_priv->sarea_priv->last_frame++;
1406
1407         BEGIN_RING(4);
1408
1409         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1410         RADEON_WAIT_UNTIL_2D_IDLE();
1411
1412         ADVANCE_RING();
1413 }
1414
1415 static void radeon_cp_dispatch_flip(struct drm_device * dev)
1416 {
1417         drm_radeon_private_t *dev_priv = dev->dev_private;
1418         struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
1419         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
1420             ? dev_priv->front_offset : dev_priv->back_offset;
1421         RING_LOCALS;
1422         DRM_DEBUG("%s: pfCurrentPage=%d\n",
1423                   __FUNCTION__,
1424                   dev_priv->sarea_priv->pfCurrentPage);
1425
1426         /* Do some trivial performance monitoring...
1427          */
1428         if (dev_priv->do_boxes) {
1429                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1430                 radeon_cp_performance_boxes(dev_priv);
1431         }
1432
1433         /* Update the frame offsets for both CRTCs
1434          */
1435         BEGIN_RING(6);
1436
1437         RADEON_WAIT_UNTIL_3D_IDLE();
1438         OUT_RING_REG(RADEON_CRTC_OFFSET,
1439                      ((sarea->frame.y * dev_priv->front_pitch +
1440                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1441                      + offset);
1442         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1443                      + offset);
1444
1445         ADVANCE_RING();
1446
1447         /* Increment the frame counter.  The client-side 3D driver must
1448          * throttle the framerate by waiting for this value before
1449          * performing the swapbuffer ioctl.
1450          */
1451         dev_priv->sarea_priv->last_frame++;
1452         dev_priv->sarea_priv->pfCurrentPage =
1453                 1 - dev_priv->sarea_priv->pfCurrentPage;
1454
1455         BEGIN_RING(2);
1456
1457         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1458
1459         ADVANCE_RING();
1460 }
1461
1462 static int bad_prim_vertex_nr(int primitive, int nr)
1463 {
1464         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1465         case RADEON_PRIM_TYPE_NONE:
1466         case RADEON_PRIM_TYPE_POINT:
1467                 return nr < 1;
1468         case RADEON_PRIM_TYPE_LINE:
1469                 return (nr & 1) || nr == 0;
1470         case RADEON_PRIM_TYPE_LINE_STRIP:
1471                 return nr < 2;
1472         case RADEON_PRIM_TYPE_TRI_LIST:
1473         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1474         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1475         case RADEON_PRIM_TYPE_RECT_LIST:
1476                 return nr % 3 || nr == 0;
1477         case RADEON_PRIM_TYPE_TRI_FAN:
1478         case RADEON_PRIM_TYPE_TRI_STRIP:
1479                 return nr < 3;
1480         default:
1481                 return 1;
1482         }
1483 }
1484
1485 typedef struct {
1486         unsigned int start;
1487         unsigned int finish;
1488         unsigned int prim;
1489         unsigned int numverts;
1490         unsigned int offset;
1491         unsigned int vc_format;
1492 } drm_radeon_tcl_prim_t;
1493
1494 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1495                                       struct drm_buf * buf,
1496                                       drm_radeon_tcl_prim_t * prim)
1497 {
1498         drm_radeon_private_t *dev_priv = dev->dev_private;
1499         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1500         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1501         int numverts = (int)prim->numverts;
1502         int nbox = sarea_priv->nbox;
1503         int i = 0;
1504         RING_LOCALS;
1505
1506         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1507                   prim->prim,
1508                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1509
1510         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1511                 DRM_ERROR("bad prim %x numverts %d\n",
1512                           prim->prim, prim->numverts);
1513                 return;
1514         }
1515
1516         do {
1517                 /* Emit the next cliprect */
1518                 if (i < nbox) {
1519                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1520                 }
1521
1522                 /* Emit the vertex buffer rendering commands */
1523                 BEGIN_RING(5);
1524
1525                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1526                 OUT_RING(offset);
1527                 OUT_RING(numverts);
1528                 OUT_RING(prim->vc_format);
1529                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1530                          RADEON_COLOR_ORDER_RGBA |
1531                          RADEON_VTX_FMT_RADEON_MODE |
1532                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1533
1534                 ADVANCE_RING();
1535
1536                 i++;
1537         } while (i < nbox);
1538 }
1539
1540 static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
1541 {
1542         drm_radeon_private_t *dev_priv = dev->dev_private;
1543         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1544         RING_LOCALS;
1545
1546         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1547
1548         /* Emit the vertex buffer age */
1549         BEGIN_RING(2);
1550         RADEON_DISPATCH_AGE(buf_priv->age);
1551         ADVANCE_RING();
1552
1553         buf->pending = 1;
1554         buf->used = 0;
1555 }
1556
1557 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1558                                         struct drm_buf * buf, int start, int end)
1559 {
1560         drm_radeon_private_t *dev_priv = dev->dev_private;
1561         RING_LOCALS;
1562         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1563
1564         if (start != end) {
1565                 int offset = (dev_priv->gart_buffers_offset
1566                               + buf->offset + start);
1567                 int dwords = (end - start + 3) / sizeof(u32);
1568
1569                 /* Indirect buffer data must be an even number of
1570                  * dwords, so if we've been given an odd number we must
1571                  * pad the data with a Type-2 CP packet.
1572                  */
1573                 if (dwords & 1) {
1574                         u32 *data = (u32 *)
1575                             ((char *)dev->agp_buffer_map->handle
1576                              + buf->offset + start);
1577                         data[dwords++] = RADEON_CP_PACKET2;
1578                 }
1579
1580                 /* Fire off the indirect buffer */
1581                 BEGIN_RING(3);
1582
1583                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1584                 OUT_RING(offset);
1585                 OUT_RING(dwords);
1586
1587                 ADVANCE_RING();
1588         }
1589 }
1590
1591 static void radeon_cp_dispatch_indices(struct drm_device * dev,
1592                                        struct drm_buf * elt_buf,
1593                                        drm_radeon_tcl_prim_t * prim)
1594 {
1595         drm_radeon_private_t *dev_priv = dev->dev_private;
1596         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1597         int offset = dev_priv->gart_buffers_offset + prim->offset;
1598         u32 *data;
1599         int dwords;
1600         int i = 0;
1601         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1602         int count = (prim->finish - start) / sizeof(u16);
1603         int nbox = sarea_priv->nbox;
1604
1605         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1606                   prim->prim,
1607                   prim->vc_format,
1608                   prim->start, prim->finish, prim->offset, prim->numverts);
1609
1610         if (bad_prim_vertex_nr(prim->prim, count)) {
1611                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1612                 return;
1613         }
1614
1615         if (start >= prim->finish || (prim->start & 0x7)) {
1616                 DRM_ERROR("buffer prim %d\n", prim->prim);
1617                 return;
1618         }
1619
1620         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1621
1622         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1623                         elt_buf->offset + prim->start);
1624
1625         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1626         data[1] = offset;
1627         data[2] = prim->numverts;
1628         data[3] = prim->vc_format;
1629         data[4] = (prim->prim |
1630                    RADEON_PRIM_WALK_IND |
1631                    RADEON_COLOR_ORDER_RGBA |
1632                    RADEON_VTX_FMT_RADEON_MODE |
1633                    (count << RADEON_NUM_VERTICES_SHIFT));
1634
1635         do {
1636                 if (i < nbox)
1637                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1638
1639                 radeon_cp_dispatch_indirect(dev, elt_buf,
1640                                             prim->start, prim->finish);
1641
1642                 i++;
1643         } while (i < nbox);
1644
1645 }
1646
1647 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1648
1649 static int radeon_cp_dispatch_texture(DRMFILE filp,
1650                                       struct drm_device * dev,
1651                                       drm_radeon_texture_t * tex,
1652                                       drm_radeon_tex_image_t * image)
1653 {
1654         drm_radeon_private_t *dev_priv = dev->dev_private;
1655         struct drm_file *filp_priv;
1656         struct drm_buf *buf;
1657         u32 format;
1658         u32 *buffer;
1659         const u8 __user *data;
1660         int size, dwords, tex_width, blit_width, spitch;
1661         u32 height;
1662         int i;
1663         u32 texpitch, microtile;
1664         u32 offset;
1665         RING_LOCALS;
1666
1667         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1668
1669         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1670                 DRM_ERROR("Invalid destination offset\n");
1671                 return DRM_ERR(EINVAL);
1672         }
1673
1674         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1675
1676         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1677          * up with the texture data from the host data blit, otherwise
1678          * part of the texture image may be corrupted.
1679          */
1680         BEGIN_RING(4);
1681         RADEON_FLUSH_CACHE();
1682         RADEON_WAIT_UNTIL_IDLE();
1683         ADVANCE_RING();
1684
1685         /* The compiler won't optimize away a division by a variable,
1686          * even if the only legal values are powers of two.  Thus, we'll
1687          * use a shift instead.
1688          */
1689         switch (tex->format) {
1690         case RADEON_TXFORMAT_ARGB8888:
1691         case RADEON_TXFORMAT_RGBA8888:
1692                 format = RADEON_COLOR_FORMAT_ARGB8888;
1693                 tex_width = tex->width * 4;
1694                 blit_width = image->width * 4;
1695                 break;
1696         case RADEON_TXFORMAT_AI88:
1697         case RADEON_TXFORMAT_ARGB1555:
1698         case RADEON_TXFORMAT_RGB565:
1699         case RADEON_TXFORMAT_ARGB4444:
1700         case RADEON_TXFORMAT_VYUY422:
1701         case RADEON_TXFORMAT_YVYU422:
1702                 format = RADEON_COLOR_FORMAT_RGB565;
1703                 tex_width = tex->width * 2;
1704                 blit_width = image->width * 2;
1705                 break;
1706         case RADEON_TXFORMAT_I8:
1707         case RADEON_TXFORMAT_RGB332:
1708                 format = RADEON_COLOR_FORMAT_CI8;
1709                 tex_width = tex->width * 1;
1710                 blit_width = image->width * 1;
1711                 break;
1712         default:
1713                 DRM_ERROR("invalid texture format %d\n", tex->format);
1714                 return DRM_ERR(EINVAL);
1715         }
1716         spitch = blit_width >> 6;
1717         if (spitch == 0 && image->height > 1)
1718                 return DRM_ERR(EINVAL);
1719
1720         texpitch = tex->pitch;
1721         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1722                 microtile = 1;
1723                 if (tex_width < 64) {
1724                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1725                         /* we got tiled coordinates, untile them */
1726                         image->x *= 2;
1727                 }
1728         } else
1729                 microtile = 0;
1730
1731         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1732
1733         do {
1734                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1735                           tex->offset >> 10, tex->pitch, tex->format,
1736                           image->x, image->y, image->width, image->height);
1737
1738                 /* Make a copy of some parameters in case we have to
1739                  * update them for a multi-pass texture blit.
1740                  */
1741                 height = image->height;
1742                 data = (const u8 __user *)image->data;
1743
1744                 size = height * blit_width;
1745
1746                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1747                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1748                         size = height * blit_width;
1749                 } else if (size < 4 && size > 0) {
1750                         size = 4;
1751                 } else if (size == 0) {
1752                         return 0;
1753                 }
1754
1755                 buf = radeon_freelist_get(dev);
1756                 if (0 && !buf) {
1757                         radeon_do_cp_idle(dev_priv);
1758                         buf = radeon_freelist_get(dev);
1759                 }
1760                 if (!buf) {
1761                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1762                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1763                                 return DRM_ERR(EFAULT);
1764                         return DRM_ERR(EAGAIN);
1765                 }
1766
1767                 /* Dispatch the indirect buffer.
1768                  */
1769                 buffer =
1770                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1771                 dwords = size / 4;
1772
1773 #define RADEON_COPY_MT(_buf, _data, _width) \
1774         do { \
1775                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1776                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1777                         return DRM_ERR(EFAULT); \
1778                 } \
1779         } while(0)
1780
1781                 if (microtile) {
1782                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1783                            however, we cannot use blitter directly for texture width < 64 bytes,
1784                            since minimum tex pitch is 64 bytes and we need this to match
1785                            the texture width, otherwise the blitter will tile it wrong.
1786                            Thus, tiling manually in this case. Additionally, need to special
1787                            case tex height = 1, since our actual image will have height 2
1788                            and we need to ensure we don't read beyond the texture size
1789                            from user space. */
1790                         if (tex->height == 1) {
1791                                 if (tex_width >= 64 || tex_width <= 16) {
1792                                         RADEON_COPY_MT(buffer, data,
1793                                                 (int)(tex_width * sizeof(u32)));
1794                                 } else if (tex_width == 32) {
1795                                         RADEON_COPY_MT(buffer, data, 16);
1796                                         RADEON_COPY_MT(buffer + 8,
1797                                                        data + 16, 16);
1798                                 }
1799                         } else if (tex_width >= 64 || tex_width == 16) {
1800                                 RADEON_COPY_MT(buffer, data,
1801                                                (int)(dwords * sizeof(u32)));
1802                         } else if (tex_width < 16) {
1803                                 for (i = 0; i < tex->height; i++) {
1804                                         RADEON_COPY_MT(buffer, data, tex_width);
1805                                         buffer += 4;
1806                                         data += tex_width;
1807                                 }
1808                         } else if (tex_width == 32) {
1809                                 /* TODO: make sure this works when not fitting in one buffer
1810                                    (i.e. 32bytes x 2048...) */
1811                                 for (i = 0; i < tex->height; i += 2) {
1812                                         RADEON_COPY_MT(buffer, data, 16);
1813                                         data += 16;
1814                                         RADEON_COPY_MT(buffer + 8, data, 16);
1815                                         data += 16;
1816                                         RADEON_COPY_MT(buffer + 4, data, 16);
1817                                         data += 16;
1818                                         RADEON_COPY_MT(buffer + 12, data, 16);
1819                                         data += 16;
1820                                         buffer += 16;
1821                                 }
1822                         }
1823                 } else {
1824                         if (tex_width >= 32) {
1825                                 /* Texture image width is larger than the minimum, so we
1826                                  * can upload it directly.
1827                                  */
1828                                 RADEON_COPY_MT(buffer, data,
1829                                                (int)(dwords * sizeof(u32)));
1830                         } else {
1831                                 /* Texture image width is less than the minimum, so we
1832                                  * need to pad out each image scanline to the minimum
1833                                  * width.
1834                                  */
1835                                 for (i = 0; i < tex->height; i++) {
1836                                         RADEON_COPY_MT(buffer, data, tex_width);
1837                                         buffer += 8;
1838                                         data += tex_width;
1839                                 }
1840                         }
1841                 }
1842
1843 #undef RADEON_COPY_MT
1844                 buf->filp = filp;
1845                 buf->used = size;
1846                 offset = dev_priv->gart_buffers_offset + buf->offset;
1847                 BEGIN_RING(9);
1848                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1849                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1850                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1851                          RADEON_GMC_BRUSH_NONE |
1852                          (format << 8) |
1853                          RADEON_GMC_SRC_DATATYPE_COLOR |
1854                          RADEON_ROP3_S |
1855                          RADEON_DP_SRC_SOURCE_MEMORY |
1856                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1857                 OUT_RING((spitch << 22) | (offset >> 10));
1858                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1859                 OUT_RING(0);
1860                 OUT_RING((image->x << 16) | image->y);
1861                 OUT_RING((image->width << 16) | height);
1862                 RADEON_WAIT_UNTIL_2D_IDLE();
1863                 ADVANCE_RING();
1864
1865                 radeon_cp_discard_buffer(dev, buf);
1866
1867                 /* Update the input parameters for next time */
1868                 image->y += height;
1869                 image->height -= height;
1870                 image->data = (const u8 __user *)image->data + size;
1871         } while (image->height > 0);
1872
1873         /* Flush the pixel cache after the blit completes.  This ensures
1874          * the texture data is written out to memory before rendering
1875          * continues.
1876          */
1877         BEGIN_RING(4);
1878         RADEON_FLUSH_CACHE();
1879         RADEON_WAIT_UNTIL_2D_IDLE();
1880         ADVANCE_RING();
1881         return 0;
1882 }
1883
1884 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1885 {
1886         drm_radeon_private_t *dev_priv = dev->dev_private;
1887         int i;
1888         RING_LOCALS;
1889         DRM_DEBUG("\n");
1890
1891         BEGIN_RING(35);
1892
1893         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1894         OUT_RING(0x00000000);
1895
1896         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1897         for (i = 0; i < 32; i++) {
1898                 OUT_RING(stipple[i]);
1899         }
1900
1901         ADVANCE_RING();
1902 }
1903
1904 static void radeon_apply_surface_regs(int surf_index,
1905                                       drm_radeon_private_t *dev_priv)
1906 {
1907         if (!dev_priv->mmio)
1908                 return;
1909
1910         radeon_do_cp_idle(dev_priv);
1911
1912         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1913                      dev_priv->surfaces[surf_index].flags);
1914         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1915                      dev_priv->surfaces[surf_index].lower);
1916         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1917                      dev_priv->surfaces[surf_index].upper);
1918 }
1919
1920 /* Allocates a virtual surface
1921  * doesn't always allocate a real surface, will stretch an existing
1922  * surface when possible.
1923  *
1924  * Note that refcount can be at most 2, since during a free refcount=3
1925  * might mean we have to allocate a new surface which might not always
1926  * be available.
1927  * For example : we allocate three contigous surfaces ABC. If B is
1928  * freed, we suddenly need two surfaces to store A and C, which might
1929  * not always be available.
1930  */
1931 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1932                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1933 {
1934         struct radeon_virt_surface *s;
1935         int i;
1936         int virt_surface_index;
1937         uint32_t new_upper, new_lower;
1938
1939         new_lower = new->address;
1940         new_upper = new_lower + new->size - 1;
1941
1942         /* sanity check */
1943         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1944             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1945              RADEON_SURF_ADDRESS_FIXED_MASK)
1946             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1947                 return -1;
1948
1949         /* make sure there is no overlap with existing surfaces */
1950         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1951                 if ((dev_priv->surfaces[i].refcount != 0) &&
1952                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1953                       (new_lower < dev_priv->surfaces[i].upper)) ||
1954                      ((new_lower < dev_priv->surfaces[i].lower) &&
1955                       (new_upper > dev_priv->surfaces[i].lower)))) {
1956                         return -1;
1957                 }
1958         }
1959
1960         /* find a virtual surface */
1961         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1962                 if (dev_priv->virt_surfaces[i].filp == 0)
1963                         break;
1964         if (i == 2 * RADEON_MAX_SURFACES) {
1965                 return -1;
1966         }
1967         virt_surface_index = i;
1968
1969         /* try to reuse an existing surface */
1970         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1971                 /* extend before */
1972                 if ((dev_priv->surfaces[i].refcount == 1) &&
1973                     (new->flags == dev_priv->surfaces[i].flags) &&
1974                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1975                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1976                         s->surface_index = i;
1977                         s->lower = new_lower;
1978                         s->upper = new_upper;
1979                         s->flags = new->flags;
1980                         s->filp = filp;
1981                         dev_priv->surfaces[i].refcount++;
1982                         dev_priv->surfaces[i].lower = s->lower;
1983                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1984                         return virt_surface_index;
1985                 }
1986
1987                 /* extend after */
1988                 if ((dev_priv->surfaces[i].refcount == 1) &&
1989                     (new->flags == dev_priv->surfaces[i].flags) &&
1990                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1991                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1992                         s->surface_index = i;
1993                         s->lower = new_lower;
1994                         s->upper = new_upper;
1995                         s->flags = new->flags;
1996                         s->filp = filp;
1997                         dev_priv->surfaces[i].refcount++;
1998                         dev_priv->surfaces[i].upper = s->upper;
1999                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2000                         return virt_surface_index;
2001                 }
2002         }
2003
2004         /* okay, we need a new one */
2005         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2006                 if (dev_priv->surfaces[i].refcount == 0) {
2007                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
2008                         s->surface_index = i;
2009                         s->lower = new_lower;
2010                         s->upper = new_upper;
2011                         s->flags = new->flags;
2012                         s->filp = filp;
2013                         dev_priv->surfaces[i].refcount = 1;
2014                         dev_priv->surfaces[i].lower = s->lower;
2015                         dev_priv->surfaces[i].upper = s->upper;
2016                         dev_priv->surfaces[i].flags = s->flags;
2017                         radeon_apply_surface_regs(s->surface_index, dev_priv);
2018                         return virt_surface_index;
2019                 }
2020         }
2021
2022         /* we didn't find anything */
2023         return -1;
2024 }
2025
2026 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
2027                         int lower)
2028 {
2029         struct radeon_virt_surface *s;
2030         int i;
2031         /* find the virtual surface */
2032         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2033                 s = &(dev_priv->virt_surfaces[i]);
2034                 if (s->filp) {
2035                         if ((lower == s->lower) && (filp == s->filp)) {
2036                                 if (dev_priv->surfaces[s->surface_index].
2037                                     lower == s->lower)
2038                                         dev_priv->surfaces[s->surface_index].
2039                                             lower = s->upper;
2040
2041                                 if (dev_priv->surfaces[s->surface_index].
2042                                     upper == s->upper)
2043                                         dev_priv->surfaces[s->surface_index].
2044                                             upper = s->lower;
2045
2046                                 dev_priv->surfaces[s->surface_index].refcount--;
2047                                 if (dev_priv->surfaces[s->surface_index].
2048                                     refcount == 0)
2049                                         dev_priv->surfaces[s->surface_index].
2050                                             flags = 0;
2051                                 s->filp = NULL;
2052                                 radeon_apply_surface_regs(s->surface_index,
2053                                                           dev_priv);
2054                                 return 0;
2055                         }
2056                 }
2057         }
2058         return 1;
2059 }
2060
2061 static void radeon_surfaces_release(DRMFILE filp,
2062                                     drm_radeon_private_t * dev_priv)
2063 {
2064         int i;
2065         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2066                 if (dev_priv->virt_surfaces[i].filp == filp)
2067                         free_surface(filp, dev_priv,
2068                                      dev_priv->virt_surfaces[i].lower);
2069         }
2070 }
2071
2072 /* ================================================================
2073  * IOCTL functions
2074  */
2075 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
2076 {
2077         DRM_DEVICE;
2078         drm_radeon_private_t *dev_priv = dev->dev_private;
2079         drm_radeon_surface_alloc_t alloc;
2080
2081         DRM_COPY_FROM_USER_IOCTL(alloc,
2082                                  (drm_radeon_surface_alloc_t __user *) data,
2083                                  sizeof(alloc));
2084
2085         if (alloc_surface(&alloc, dev_priv, filp) == -1)
2086                 return DRM_ERR(EINVAL);
2087         else
2088                 return 0;
2089 }
2090
2091 static int radeon_surface_free(DRM_IOCTL_ARGS)
2092 {
2093         DRM_DEVICE;
2094         drm_radeon_private_t *dev_priv = dev->dev_private;
2095         drm_radeon_surface_free_t memfree;
2096
2097         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2098                                  sizeof(memfree));
2099
2100         if (free_surface(filp, dev_priv, memfree.address))
2101                 return DRM_ERR(EINVAL);
2102         else
2103                 return 0;
2104 }
2105
2106 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2107 {
2108         DRM_DEVICE;
2109         drm_radeon_private_t *dev_priv = dev->dev_private;
2110         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2111         drm_radeon_clear_t clear;
2112         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2113         DRM_DEBUG("\n");
2114
2115         LOCK_TEST_WITH_RETURN(dev, filp);
2116
2117         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2118                                  sizeof(clear));
2119
2120         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2121
2122         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2123                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2124
2125         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2126                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2127                 return DRM_ERR(EFAULT);
2128
2129         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2130
2131         COMMIT_RING();
2132         return 0;
2133 }
2134
2135 /* Not sure why this isn't set all the time:
2136  */
2137 static int radeon_do_init_pageflip(struct drm_device * dev)
2138 {
2139         drm_radeon_private_t *dev_priv = dev->dev_private;
2140         RING_LOCALS;
2141
2142         DRM_DEBUG("\n");
2143
2144         BEGIN_RING(6);
2145         RADEON_WAIT_UNTIL_3D_IDLE();
2146         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2147         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2148                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2149         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2150         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2151                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2152         ADVANCE_RING();
2153
2154         dev_priv->page_flipping = 1;
2155
2156         if (dev_priv->sarea_priv->pfCurrentPage != 1)
2157                 dev_priv->sarea_priv->pfCurrentPage = 0;
2158
2159         return 0;
2160 }
2161
2162 /* Swapping and flipping are different operations, need different ioctls.
2163  * They can & should be intermixed to support multiple 3d windows.
2164  */
2165 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2166 {
2167         DRM_DEVICE;
2168         drm_radeon_private_t *dev_priv = dev->dev_private;
2169         DRM_DEBUG("\n");
2170
2171         LOCK_TEST_WITH_RETURN(dev, filp);
2172
2173         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2174
2175         if (!dev_priv->page_flipping)
2176                 radeon_do_init_pageflip(dev);
2177
2178         radeon_cp_dispatch_flip(dev);
2179
2180         COMMIT_RING();
2181         return 0;
2182 }
2183
2184 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2185 {
2186         DRM_DEVICE;
2187         drm_radeon_private_t *dev_priv = dev->dev_private;
2188         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2189         DRM_DEBUG("\n");
2190
2191         LOCK_TEST_WITH_RETURN(dev, filp);
2192
2193         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2194
2195         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2196                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2197
2198         radeon_cp_dispatch_swap(dev);
2199         dev_priv->sarea_priv->ctx_owner = 0;
2200
2201         COMMIT_RING();
2202         return 0;
2203 }
2204
2205 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2206 {
2207         DRM_DEVICE;
2208         drm_radeon_private_t *dev_priv = dev->dev_private;
2209         struct drm_file *filp_priv;
2210         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2211         struct drm_device_dma *dma = dev->dma;
2212         struct drm_buf *buf;
2213         drm_radeon_vertex_t vertex;
2214         drm_radeon_tcl_prim_t prim;
2215
2216         LOCK_TEST_WITH_RETURN(dev, filp);
2217
2218         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2219
2220         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2221                                  sizeof(vertex));
2222
2223         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2224                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2225
2226         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2227                 DRM_ERROR("buffer index %d (of %d max)\n",
2228                           vertex.idx, dma->buf_count - 1);
2229                 return DRM_ERR(EINVAL);
2230         }
2231         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2232                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2233                 return DRM_ERR(EINVAL);
2234         }
2235
2236         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2237         VB_AGE_TEST_WITH_RETURN(dev_priv);
2238
2239         buf = dma->buflist[vertex.idx];
2240
2241         if (buf->filp != filp) {
2242                 DRM_ERROR("process %d using buffer owned by %p\n",
2243                           DRM_CURRENTPID, buf->filp);
2244                 return DRM_ERR(EINVAL);
2245         }
2246         if (buf->pending) {
2247                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2248                 return DRM_ERR(EINVAL);
2249         }
2250
2251         /* Build up a prim_t record:
2252          */
2253         if (vertex.count) {
2254                 buf->used = vertex.count;       /* not used? */
2255
2256                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2257                         if (radeon_emit_state(dev_priv, filp_priv,
2258                                               &sarea_priv->context_state,
2259                                               sarea_priv->tex_state,
2260                                               sarea_priv->dirty)) {
2261                                 DRM_ERROR("radeon_emit_state failed\n");
2262                                 return DRM_ERR(EINVAL);
2263                         }
2264
2265                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2266                                                RADEON_UPLOAD_TEX1IMAGES |
2267                                                RADEON_UPLOAD_TEX2IMAGES |
2268                                                RADEON_REQUIRE_QUIESCENCE);
2269                 }
2270
2271                 prim.start = 0;
2272                 prim.finish = vertex.count;     /* unused */
2273                 prim.prim = vertex.prim;
2274                 prim.numverts = vertex.count;
2275                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2276
2277                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2278         }
2279
2280         if (vertex.discard) {
2281                 radeon_cp_discard_buffer(dev, buf);
2282         }
2283
2284         COMMIT_RING();
2285         return 0;
2286 }
2287
2288 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2289 {
2290         DRM_DEVICE;
2291         drm_radeon_private_t *dev_priv = dev->dev_private;
2292         struct drm_file *filp_priv;
2293         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2294         struct drm_device_dma *dma = dev->dma;
2295         struct drm_buf *buf;
2296         drm_radeon_indices_t elts;
2297         drm_radeon_tcl_prim_t prim;
2298         int count;
2299
2300         LOCK_TEST_WITH_RETURN(dev, filp);
2301
2302         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2303
2304         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2305                                  sizeof(elts));
2306
2307         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2308                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2309
2310         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2311                 DRM_ERROR("buffer index %d (of %d max)\n",
2312                           elts.idx, dma->buf_count - 1);
2313                 return DRM_ERR(EINVAL);
2314         }
2315         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2316                 DRM_ERROR("buffer prim %d\n", elts.prim);
2317                 return DRM_ERR(EINVAL);
2318         }
2319
2320         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2321         VB_AGE_TEST_WITH_RETURN(dev_priv);
2322
2323         buf = dma->buflist[elts.idx];
2324
2325         if (buf->filp != filp) {
2326                 DRM_ERROR("process %d using buffer owned by %p\n",
2327                           DRM_CURRENTPID, buf->filp);
2328                 return DRM_ERR(EINVAL);
2329         }
2330         if (buf->pending) {
2331                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2332                 return DRM_ERR(EINVAL);
2333         }
2334
2335         count = (elts.end - elts.start) / sizeof(u16);
2336         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2337
2338         if (elts.start & 0x7) {
2339                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2340                 return DRM_ERR(EINVAL);
2341         }
2342         if (elts.start < buf->used) {
2343                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2344                 return DRM_ERR(EINVAL);
2345         }
2346
2347         buf->used = elts.end;
2348
2349         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2350                 if (radeon_emit_state(dev_priv, filp_priv,
2351                                       &sarea_priv->context_state,
2352                                       sarea_priv->tex_state,
2353                                       sarea_priv->dirty)) {
2354                         DRM_ERROR("radeon_emit_state failed\n");
2355                         return DRM_ERR(EINVAL);
2356                 }
2357
2358                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2359                                        RADEON_UPLOAD_TEX1IMAGES |
2360                                        RADEON_UPLOAD_TEX2IMAGES |
2361                                        RADEON_REQUIRE_QUIESCENCE);
2362         }
2363
2364         /* Build up a prim_t record:
2365          */
2366         prim.start = elts.start;
2367         prim.finish = elts.end;
2368         prim.prim = elts.prim;
2369         prim.offset = 0;        /* offset from start of dma buffers */
2370         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2371         prim.vc_format = dev_priv->sarea_priv->vc_format;
2372
2373         radeon_cp_dispatch_indices(dev, buf, &prim);
2374         if (elts.discard) {
2375                 radeon_cp_discard_buffer(dev, buf);
2376         }
2377
2378         COMMIT_RING();
2379         return 0;
2380 }
2381
2382 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2383 {
2384         DRM_DEVICE;
2385         drm_radeon_private_t *dev_priv = dev->dev_private;
2386         drm_radeon_texture_t tex;
2387         drm_radeon_tex_image_t image;
2388         int ret;
2389
2390         LOCK_TEST_WITH_RETURN(dev, filp);
2391
2392         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2393                                  sizeof(tex));
2394
2395         if (tex.image == NULL) {
2396                 DRM_ERROR("null texture image!\n");
2397                 return DRM_ERR(EINVAL);
2398         }
2399
2400         if (DRM_COPY_FROM_USER(&image,
2401                                (drm_radeon_tex_image_t __user *) tex.image,
2402                                sizeof(image)))
2403                 return DRM_ERR(EFAULT);
2404
2405         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2406         VB_AGE_TEST_WITH_RETURN(dev_priv);
2407
2408         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2409
2410         COMMIT_RING();
2411         return ret;
2412 }
2413
2414 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2415 {
2416         DRM_DEVICE;
2417         drm_radeon_private_t *dev_priv = dev->dev_private;
2418         drm_radeon_stipple_t stipple;
2419         u32 mask[32];
2420
2421         LOCK_TEST_WITH_RETURN(dev, filp);
2422
2423         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2424                                  sizeof(stipple));
2425
2426         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2427                 return DRM_ERR(EFAULT);
2428
2429         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2430
2431         radeon_cp_dispatch_stipple(dev, mask);
2432
2433         COMMIT_RING();
2434         return 0;
2435 }
2436
2437 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2438 {
2439         DRM_DEVICE;
2440         drm_radeon_private_t *dev_priv = dev->dev_private;
2441         struct drm_device_dma *dma = dev->dma;
2442         struct drm_buf *buf;
2443         drm_radeon_indirect_t indirect;
2444         RING_LOCALS;
2445
2446         LOCK_TEST_WITH_RETURN(dev, filp);
2447
2448         DRM_COPY_FROM_USER_IOCTL(indirect,
2449                                  (drm_radeon_indirect_t __user *) data,
2450                                  sizeof(indirect));
2451
2452         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2453                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2454
2455         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2456                 DRM_ERROR("buffer index %d (of %d max)\n",
2457                           indirect.idx, dma->buf_count - 1);
2458                 return DRM_ERR(EINVAL);
2459         }
2460
2461         buf = dma->buflist[indirect.idx];
2462
2463         if (buf->filp != filp) {
2464                 DRM_ERROR("process %d using buffer owned by %p\n",
2465                           DRM_CURRENTPID, buf->filp);
2466                 return DRM_ERR(EINVAL);
2467         }
2468         if (buf->pending) {
2469                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2470                 return DRM_ERR(EINVAL);
2471         }
2472
2473         if (indirect.start < buf->used) {
2474                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2475                           indirect.start, buf->used);
2476                 return DRM_ERR(EINVAL);
2477         }
2478
2479         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2480         VB_AGE_TEST_WITH_RETURN(dev_priv);
2481
2482         buf->used = indirect.end;
2483
2484         /* Wait for the 3D stream to idle before the indirect buffer
2485          * containing 2D acceleration commands is processed.
2486          */
2487         BEGIN_RING(2);
2488
2489         RADEON_WAIT_UNTIL_3D_IDLE();
2490
2491         ADVANCE_RING();
2492
2493         /* Dispatch the indirect buffer full of commands from the
2494          * X server.  This is insecure and is thus only available to
2495          * privileged clients.
2496          */
2497         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2498         if (indirect.discard) {
2499                 radeon_cp_discard_buffer(dev, buf);
2500         }
2501
2502         COMMIT_RING();
2503         return 0;
2504 }
2505
2506 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2507 {
2508         DRM_DEVICE;
2509         drm_radeon_private_t *dev_priv = dev->dev_private;
2510         struct drm_file *filp_priv;
2511         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2512         struct drm_device_dma *dma = dev->dma;
2513         struct drm_buf *buf;
2514         drm_radeon_vertex2_t vertex;
2515         int i;
2516         unsigned char laststate;
2517
2518         LOCK_TEST_WITH_RETURN(dev, filp);
2519
2520         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2521
2522         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2523                                  sizeof(vertex));
2524
2525         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2526                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2527
2528         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2529                 DRM_ERROR("buffer index %d (of %d max)\n",
2530                           vertex.idx, dma->buf_count - 1);
2531                 return DRM_ERR(EINVAL);
2532         }
2533
2534         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2535         VB_AGE_TEST_WITH_RETURN(dev_priv);
2536
2537         buf = dma->buflist[vertex.idx];
2538
2539         if (buf->filp != filp) {
2540                 DRM_ERROR("process %d using buffer owned by %p\n",
2541                           DRM_CURRENTPID, buf->filp);
2542                 return DRM_ERR(EINVAL);
2543         }
2544
2545         if (buf->pending) {
2546                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2547                 return DRM_ERR(EINVAL);
2548         }
2549
2550         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2551                 return DRM_ERR(EINVAL);
2552
2553         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2554                 drm_radeon_prim_t prim;
2555                 drm_radeon_tcl_prim_t tclprim;
2556
2557                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2558                         return DRM_ERR(EFAULT);
2559
2560                 if (prim.stateidx != laststate) {
2561                         drm_radeon_state_t state;
2562
2563                         if (DRM_COPY_FROM_USER(&state,
2564                                                &vertex.state[prim.stateidx],
2565                                                sizeof(state)))
2566                                 return DRM_ERR(EFAULT);
2567
2568                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2569                                 DRM_ERROR("radeon_emit_state2 failed\n");
2570                                 return DRM_ERR(EINVAL);
2571                         }
2572
2573                         laststate = prim.stateidx;
2574                 }
2575
2576                 tclprim.start = prim.start;
2577                 tclprim.finish = prim.finish;
2578                 tclprim.prim = prim.prim;
2579                 tclprim.vc_format = prim.vc_format;
2580
2581                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2582                         tclprim.offset = prim.numverts * 64;
2583                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2584
2585                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2586                 } else {
2587                         tclprim.numverts = prim.numverts;
2588                         tclprim.offset = 0;     /* not used */
2589
2590                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2591                 }
2592
2593                 if (sarea_priv->nbox == 1)
2594                         sarea_priv->nbox = 0;
2595         }
2596
2597         if (vertex.discard) {
2598                 radeon_cp_discard_buffer(dev, buf);
2599         }
2600
2601         COMMIT_RING();
2602         return 0;
2603 }
2604
2605 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2606                                struct drm_file * filp_priv,
2607                                drm_radeon_cmd_header_t header,
2608                                drm_radeon_kcmd_buffer_t *cmdbuf)
2609 {
2610         int id = (int)header.packet.packet_id;
2611         int sz, reg;
2612         int *data = (int *)cmdbuf->buf;
2613         RING_LOCALS;
2614
2615         if (id >= RADEON_MAX_STATE_PACKETS)
2616                 return DRM_ERR(EINVAL);
2617
2618         sz = packet[id].len;
2619         reg = packet[id].start;
2620
2621         if (sz * sizeof(int) > cmdbuf->bufsz) {
2622                 DRM_ERROR("Packet size provided larger than data provided\n");
2623                 return DRM_ERR(EINVAL);
2624         }
2625
2626         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2627                 DRM_ERROR("Packet verification failed\n");
2628                 return DRM_ERR(EINVAL);
2629         }
2630
2631         BEGIN_RING(sz + 1);
2632         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2633         OUT_RING_TABLE(data, sz);
2634         ADVANCE_RING();
2635
2636         cmdbuf->buf += sz * sizeof(int);
2637         cmdbuf->bufsz -= sz * sizeof(int);
2638         return 0;
2639 }
2640
2641 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2642                                           drm_radeon_cmd_header_t header,
2643                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2644 {
2645         int sz = header.scalars.count;
2646         int start = header.scalars.offset;
2647         int stride = header.scalars.stride;
2648         RING_LOCALS;
2649
2650         BEGIN_RING(3 + sz);
2651         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2652         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2653         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2654         OUT_RING_TABLE(cmdbuf->buf, sz);
2655         ADVANCE_RING();
2656         cmdbuf->buf += sz * sizeof(int);
2657         cmdbuf->bufsz -= sz * sizeof(int);
2658         return 0;
2659 }
2660
2661 /* God this is ugly
2662  */
2663 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2664                                            drm_radeon_cmd_header_t header,
2665                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2666 {
2667         int sz = header.scalars.count;
2668         int start = ((unsigned int)header.scalars.offset) + 0x100;
2669         int stride = header.scalars.stride;
2670         RING_LOCALS;
2671
2672         BEGIN_RING(3 + sz);
2673         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2674         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2675         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2676         OUT_RING_TABLE(cmdbuf->buf, sz);
2677         ADVANCE_RING();
2678         cmdbuf->buf += sz * sizeof(int);
2679         cmdbuf->bufsz -= sz * sizeof(int);
2680         return 0;
2681 }
2682
2683 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2684                                           drm_radeon_cmd_header_t header,
2685                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2686 {
2687         int sz = header.vectors.count;
2688         int start = header.vectors.offset;
2689         int stride = header.vectors.stride;
2690         RING_LOCALS;
2691
2692         BEGIN_RING(5 + sz);
2693         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2694         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2695         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2696         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2697         OUT_RING_TABLE(cmdbuf->buf, sz);
2698         ADVANCE_RING();
2699
2700         cmdbuf->buf += sz * sizeof(int);
2701         cmdbuf->bufsz -= sz * sizeof(int);
2702         return 0;
2703 }
2704
2705 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2706                                           drm_radeon_cmd_header_t header,
2707                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2708 {
2709         int sz = header.veclinear.count * 4;
2710         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2711         RING_LOCALS;
2712
2713         if (!sz)
2714                 return 0;
2715         if (sz * 4 > cmdbuf->bufsz)
2716                 return DRM_ERR(EINVAL);
2717
2718         BEGIN_RING(5 + sz);
2719         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2720         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2721         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2722         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2723         OUT_RING_TABLE(cmdbuf->buf, sz);
2724         ADVANCE_RING();
2725
2726         cmdbuf->buf += sz * sizeof(int);
2727         cmdbuf->bufsz -= sz * sizeof(int);
2728         return 0;
2729 }
2730
2731 static int radeon_emit_packet3(struct drm_device * dev,
2732                                struct drm_file * filp_priv,
2733                                drm_radeon_kcmd_buffer_t *cmdbuf)
2734 {
2735         drm_radeon_private_t *dev_priv = dev->dev_private;
2736         unsigned int cmdsz;
2737         int ret;
2738         RING_LOCALS;
2739
2740         DRM_DEBUG("\n");
2741
2742         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2743                                                   cmdbuf, &cmdsz))) {
2744                 DRM_ERROR("Packet verification failed\n");
2745                 return ret;
2746         }
2747
2748         BEGIN_RING(cmdsz);
2749         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2750         ADVANCE_RING();
2751
2752         cmdbuf->buf += cmdsz * 4;
2753         cmdbuf->bufsz -= cmdsz * 4;
2754         return 0;
2755 }
2756
2757 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2758                                         struct drm_file *filp_priv,
2759                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2760                                         int orig_nbox)
2761 {
2762         drm_radeon_private_t *dev_priv = dev->dev_private;
2763         struct drm_clip_rect box;
2764         unsigned int cmdsz;
2765         int ret;
2766         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2767         int i = 0;
2768         RING_LOCALS;
2769
2770         DRM_DEBUG("\n");
2771
2772         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2773                                                   cmdbuf, &cmdsz))) {
2774                 DRM_ERROR("Packet verification failed\n");
2775                 return ret;
2776         }
2777
2778         if (!orig_nbox)
2779                 goto out;
2780
2781         do {
2782                 if (i < cmdbuf->nbox) {
2783                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2784                                 return DRM_ERR(EFAULT);
2785                         /* FIXME The second and subsequent times round
2786                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2787                          * calling emit_clip_rect(). This fixes a
2788                          * lockup on fast machines when sending
2789                          * several cliprects with a cmdbuf, as when
2790                          * waving a 2D window over a 3D
2791                          * window. Something in the commands from user
2792                          * space seems to hang the card when they're
2793                          * sent several times in a row. That would be
2794                          * the correct place to fix it but this works
2795                          * around it until I can figure that out - Tim
2796                          * Smith */
2797                         if (i) {
2798                                 BEGIN_RING(2);
2799                                 RADEON_WAIT_UNTIL_3D_IDLE();
2800                                 ADVANCE_RING();
2801                         }
2802                         radeon_emit_clip_rect(dev_priv, &box);
2803                 }
2804
2805                 BEGIN_RING(cmdsz);
2806                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2807                 ADVANCE_RING();
2808
2809         } while (++i < cmdbuf->nbox);
2810         if (cmdbuf->nbox == 1)
2811                 cmdbuf->nbox = 0;
2812
2813       out:
2814         cmdbuf->buf += cmdsz * 4;
2815         cmdbuf->bufsz -= cmdsz * 4;
2816         return 0;
2817 }
2818
2819 static int radeon_emit_wait(struct drm_device * dev, int flags)
2820 {
2821         drm_radeon_private_t *dev_priv = dev->dev_private;
2822         RING_LOCALS;
2823
2824         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2825         switch (flags) {
2826         case RADEON_WAIT_2D:
2827                 BEGIN_RING(2);
2828                 RADEON_WAIT_UNTIL_2D_IDLE();
2829                 ADVANCE_RING();
2830                 break;
2831         case RADEON_WAIT_3D:
2832                 BEGIN_RING(2);
2833                 RADEON_WAIT_UNTIL_3D_IDLE();
2834                 ADVANCE_RING();
2835                 break;
2836         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2837                 BEGIN_RING(2);
2838                 RADEON_WAIT_UNTIL_IDLE();
2839                 ADVANCE_RING();
2840                 break;
2841         default:
2842                 return DRM_ERR(EINVAL);
2843         }
2844
2845         return 0;
2846 }
2847
2848 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2849 {
2850         DRM_DEVICE;
2851         drm_radeon_private_t *dev_priv = dev->dev_private;
2852         struct drm_file *filp_priv;
2853         struct drm_device_dma *dma = dev->dma;
2854         struct drm_buf *buf = NULL;
2855         int idx;
2856         drm_radeon_kcmd_buffer_t cmdbuf;
2857         drm_radeon_cmd_header_t header;
2858         int orig_nbox, orig_bufsz;
2859         char *kbuf = NULL;
2860
2861         LOCK_TEST_WITH_RETURN(dev, filp);
2862
2863         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2864
2865         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2866                                  (drm_radeon_cmd_buffer_t __user *) data,
2867                                  sizeof(cmdbuf));
2868
2869         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2870         VB_AGE_TEST_WITH_RETURN(dev_priv);
2871
2872         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2873                 return DRM_ERR(EINVAL);
2874         }
2875
2876         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2877          * races between checking values and using those values in other code,
2878          * and simply to avoid a lot of function calls to copy in data.
2879          */
2880         orig_bufsz = cmdbuf.bufsz;
2881         if (orig_bufsz != 0) {
2882                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2883                 if (kbuf == NULL)
2884                         return DRM_ERR(ENOMEM);
2885                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2886                                        cmdbuf.bufsz)) {
2887                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2888                         return DRM_ERR(EFAULT);
2889                 }
2890                 cmdbuf.buf = kbuf;
2891         }
2892
2893         orig_nbox = cmdbuf.nbox;
2894
2895         if (dev_priv->microcode_version == UCODE_R300) {
2896                 int temp;
2897                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2898
2899                 if (orig_bufsz != 0)
2900                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2901
2902                 return temp;
2903         }
2904
2905         /* microcode_version != r300 */
2906         while (cmdbuf.bufsz >= sizeof(header)) {
2907
2908                 header.i = *(int *)cmdbuf.buf;
2909                 cmdbuf.buf += sizeof(header);
2910                 cmdbuf.bufsz -= sizeof(header);
2911
2912                 switch (header.header.cmd_type) {
2913                 case RADEON_CMD_PACKET:
2914                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2915                         if (radeon_emit_packets
2916                             (dev_priv, filp_priv, header, &cmdbuf)) {
2917                                 DRM_ERROR("radeon_emit_packets failed\n");
2918                                 goto err;
2919                         }
2920                         break;
2921
2922                 case RADEON_CMD_SCALARS:
2923                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2924                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2925                                 DRM_ERROR("radeon_emit_scalars failed\n");
2926                                 goto err;
2927                         }
2928                         break;
2929
2930                 case RADEON_CMD_VECTORS:
2931                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2932                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2933                                 DRM_ERROR("radeon_emit_vectors failed\n");
2934                                 goto err;
2935                         }
2936                         break;
2937
2938                 case RADEON_CMD_DMA_DISCARD:
2939                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2940                         idx = header.dma.buf_idx;
2941                         if (idx < 0 || idx >= dma->buf_count) {
2942                                 DRM_ERROR("buffer index %d (of %d max)\n",
2943                                           idx, dma->buf_count - 1);
2944                                 goto err;
2945                         }
2946
2947                         buf = dma->buflist[idx];
2948                         if (buf->filp != filp || buf->pending) {
2949                                 DRM_ERROR("bad buffer %p %p %d\n",
2950                                           buf->filp, filp, buf->pending);
2951                                 goto err;
2952                         }
2953
2954                         radeon_cp_discard_buffer(dev, buf);
2955                         break;
2956
2957                 case RADEON_CMD_PACKET3:
2958                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2959                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2960                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2961                                 goto err;
2962                         }
2963                         break;
2964
2965                 case RADEON_CMD_PACKET3_CLIP:
2966                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2967                         if (radeon_emit_packet3_cliprect
2968                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2969                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2970                                 goto err;
2971                         }
2972                         break;
2973
2974                 case RADEON_CMD_SCALARS2:
2975                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2976                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2977                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2978                                 goto err;
2979                         }
2980                         break;
2981
2982                 case RADEON_CMD_WAIT:
2983                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2984                         if (radeon_emit_wait(dev, header.wait.flags)) {
2985                                 DRM_ERROR("radeon_emit_wait failed\n");
2986                                 goto err;
2987                         }
2988                         break;
2989                 case RADEON_CMD_VECLINEAR:
2990                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2991                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2992                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2993                                 goto err;
2994                         }
2995                         break;
2996
2997                 default:
2998                         DRM_ERROR("bad cmd_type %d at %p\n",
2999                                   header.header.cmd_type,
3000                                   cmdbuf.buf - sizeof(header));
3001                         goto err;
3002                 }
3003         }
3004
3005         if (orig_bufsz != 0)
3006                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3007
3008         DRM_DEBUG("DONE\n");
3009         COMMIT_RING();
3010         return 0;
3011
3012       err:
3013         if (orig_bufsz != 0)
3014                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
3015         return DRM_ERR(EINVAL);
3016 }
3017
3018 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
3019 {
3020         DRM_DEVICE;
3021         drm_radeon_private_t *dev_priv = dev->dev_private;
3022         drm_radeon_getparam_t param;
3023         int value;
3024
3025         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
3026                                  sizeof(param));
3027
3028         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3029
3030         switch (param.param) {
3031         case RADEON_PARAM_GART_BUFFER_OFFSET:
3032                 value = dev_priv->gart_buffers_offset;
3033                 break;
3034         case RADEON_PARAM_LAST_FRAME:
3035                 dev_priv->stats.last_frame_reads++;
3036                 value = GET_SCRATCH(0);
3037                 break;
3038         case RADEON_PARAM_LAST_DISPATCH:
3039                 value = GET_SCRATCH(1);
3040                 break;
3041         case RADEON_PARAM_LAST_CLEAR:
3042                 dev_priv->stats.last_clear_reads++;
3043                 value = GET_SCRATCH(2);
3044                 break;
3045         case RADEON_PARAM_IRQ_NR:
3046                 value = dev->irq;
3047                 break;
3048         case RADEON_PARAM_GART_BASE:
3049                 value = dev_priv->gart_vm_start;
3050                 break;
3051         case RADEON_PARAM_REGISTER_HANDLE:
3052                 value = dev_priv->mmio->offset;
3053                 break;
3054         case RADEON_PARAM_STATUS_HANDLE:
3055                 value = dev_priv->ring_rptr_offset;
3056                 break;
3057 #if BITS_PER_LONG == 32
3058                 /*
3059                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3060                  * pointer which can't fit into an int-sized variable.  According to
3061                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3062                  * not supporting it shouldn't be a problem.  If the same functionality
3063                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
3064                  * so backwards-compatibility for the embedded platforms can be
3065                  * maintained.  --davidm 4-Feb-2004.
3066                  */
3067         case RADEON_PARAM_SAREA_HANDLE:
3068                 /* The lock is the first dword in the sarea. */
3069                 value = (long)dev->lock.hw_lock;
3070                 break;
3071 #endif
3072         case RADEON_PARAM_GART_TEX_HANDLE:
3073                 value = dev_priv->gart_textures_offset;
3074                 break;
3075         case RADEON_PARAM_SCRATCH_OFFSET:
3076                 if (!dev_priv->writeback_works)
3077                         return DRM_ERR(EINVAL);
3078                 value = RADEON_SCRATCH_REG_OFFSET;
3079                 break;
3080         case RADEON_PARAM_CARD_TYPE:
3081                 if (dev_priv->flags & RADEON_IS_PCIE)
3082                         value = RADEON_CARD_PCIE;
3083                 else if (dev_priv->flags & RADEON_IS_AGP)
3084                         value = RADEON_CARD_AGP;
3085                 else
3086                         value = RADEON_CARD_PCI;
3087                 break;
3088         case RADEON_PARAM_VBLANK_CRTC:
3089                 value = radeon_vblank_crtc_get(dev);
3090                 break;
3091         default:
3092                 DRM_DEBUG("Invalid parameter %d\n", param.param);
3093                 return DRM_ERR(EINVAL);
3094         }
3095
3096         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3097                 DRM_ERROR("copy_to_user\n");
3098                 return DRM_ERR(EFAULT);
3099         }
3100
3101         return 0;
3102 }
3103
3104 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3105 {
3106         DRM_DEVICE;
3107         drm_radeon_private_t *dev_priv = dev->dev_private;
3108         struct drm_file *filp_priv;
3109         drm_radeon_setparam_t sp;
3110         struct drm_radeon_driver_file_fields *radeon_priv;
3111
3112         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3113
3114         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3115                                  sizeof(sp));
3116
3117         switch (sp.param) {
3118         case RADEON_SETPARAM_FB_LOCATION:
3119                 radeon_priv = filp_priv->driver_priv;
3120                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3121                 break;
3122         case RADEON_SETPARAM_SWITCH_TILING:
3123                 if (sp.value == 0) {
3124                         DRM_DEBUG("color tiling disabled\n");
3125                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3126                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3127                         dev_priv->sarea_priv->tiling_enabled = 0;
3128                 } else if (sp.value == 1) {
3129                         DRM_DEBUG("color tiling enabled\n");
3130                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3131                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3132                         dev_priv->sarea_priv->tiling_enabled = 1;
3133                 }
3134                 break;
3135         case RADEON_SETPARAM_PCIGART_LOCATION:
3136                 dev_priv->pcigart_offset = sp.value;
3137                 dev_priv->pcigart_offset_set = 1;
3138                 break;
3139         case RADEON_SETPARAM_NEW_MEMMAP:
3140                 dev_priv->new_memmap = sp.value;
3141                 break;
3142         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3143                 dev_priv->gart_info.table_size = sp.value;
3144                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3145                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3146                 break;
3147         case RADEON_SETPARAM_VBLANK_CRTC:
3148                 return radeon_vblank_crtc_set(dev, sp.value);
3149                 break;
3150         default:
3151                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3152                 return DRM_ERR(EINVAL);
3153         }
3154
3155         return 0;
3156 }
3157
3158 /* When a client dies:
3159  *    - Check for and clean up flipped page state
3160  *    - Free any alloced GART memory.
3161  *    - Free any alloced radeon surfaces.
3162  *
3163  * DRM infrastructure takes care of reclaiming dma buffers.
3164  */
3165 void radeon_driver_preclose(struct drm_device *dev, DRMFILE filp)
3166 {
3167         if (dev->dev_private) {
3168                 drm_radeon_private_t *dev_priv = dev->dev_private;
3169                 dev_priv->page_flipping = 0;
3170                 radeon_mem_release(filp, dev_priv->gart_heap);
3171                 radeon_mem_release(filp, dev_priv->fb_heap);
3172                 radeon_surfaces_release(filp, dev_priv);
3173         }
3174 }
3175
3176 void radeon_driver_lastclose(struct drm_device *dev)
3177 {
3178         if (dev->dev_private) {
3179                 drm_radeon_private_t *dev_priv = dev->dev_private;
3180
3181                 if (dev_priv->sarea_priv &&
3182                     dev_priv->sarea_priv->pfCurrentPage != 0)
3183                         radeon_cp_dispatch_flip(dev);
3184         }
3185
3186         radeon_do_release(dev);
3187 }
3188
3189 int radeon_driver_open(struct drm_device *dev, struct drm_file *filp_priv)
3190 {
3191         drm_radeon_private_t *dev_priv = dev->dev_private;
3192         struct drm_radeon_driver_file_fields *radeon_priv;
3193
3194         DRM_DEBUG("\n");
3195         radeon_priv =
3196             (struct drm_radeon_driver_file_fields *)
3197             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3198
3199         if (!radeon_priv)
3200                 return -ENOMEM;
3201
3202         filp_priv->driver_priv = radeon_priv;
3203
3204         if (dev_priv)
3205                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3206         else
3207                 radeon_priv->radeon_fb_delta = 0;
3208         return 0;
3209 }
3210
3211 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *filp_priv)
3212 {
3213         struct drm_radeon_driver_file_fields *radeon_priv =
3214             filp_priv->driver_priv;
3215
3216         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3217 }
3218
3219 drm_ioctl_desc_t radeon_ioctls[] = {
3220         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3221         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3222         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3223         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3224         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3225         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3226         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3227         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3228         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3229         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3230         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3231         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3232         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3233         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3234         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3235         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3236         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3237         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3238         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3239         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3240         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3241         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3242         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3243         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3244         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3245         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3246         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3247 };
3248
3249 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);