[PATCH] drm: radeon flush TCL VAP for vertex program enable/disable
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if ((off >= dev_priv->fb_location &&
66              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67             (off >= dev_priv->gart_vm_start &&
68              off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = filp_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > (dev_priv->fb_location + dev_priv->fb_size))
82                 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83                         dev_priv->gart_vm_start;
84
85         /* Now recheck and fail if out of bounds */
86         if ((off >= dev_priv->fb_location &&
87              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88             (off >= dev_priv->gart_vm_start &&
89              off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90                 DRM_DEBUG("offset fixed up to 0x%x\n", off);
91                 *offset = off;
92                 return 0;
93         }
94         return DRM_ERR(EINVAL);
95 }
96
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98                                                      dev_priv,
99                                                      drm_file_t * filp_priv,
100                                                      int id, u32 *data)
101 {
102         switch (id) {
103
104         case RADEON_EMIT_PP_MISC:
105                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107                         DRM_ERROR("Invalid depth buffer offset\n");
108                         return DRM_ERR(EINVAL);
109                 }
110                 break;
111
112         case RADEON_EMIT_PP_CNTL:
113                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115                         DRM_ERROR("Invalid colour buffer offset\n");
116                         return DRM_ERR(EINVAL);
117                 }
118                 break;
119
120         case R200_EMIT_PP_TXOFFSET_0:
121         case R200_EMIT_PP_TXOFFSET_1:
122         case R200_EMIT_PP_TXOFFSET_2:
123         case R200_EMIT_PP_TXOFFSET_3:
124         case R200_EMIT_PP_TXOFFSET_4:
125         case R200_EMIT_PP_TXOFFSET_5:
126                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127                                                   &data[0])) {
128                         DRM_ERROR("Invalid R200 texture offset\n");
129                         return DRM_ERR(EINVAL);
130                 }
131                 break;
132
133         case RADEON_EMIT_PP_TXFILTER_0:
134         case RADEON_EMIT_PP_TXFILTER_1:
135         case RADEON_EMIT_PP_TXFILTER_2:
136                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138                         DRM_ERROR("Invalid R100 texture offset\n");
139                         return DRM_ERR(EINVAL);
140                 }
141                 break;
142
143         case R200_EMIT_PP_CUBIC_OFFSETS_0:
144         case R200_EMIT_PP_CUBIC_OFFSETS_1:
145         case R200_EMIT_PP_CUBIC_OFFSETS_2:
146         case R200_EMIT_PP_CUBIC_OFFSETS_3:
147         case R200_EMIT_PP_CUBIC_OFFSETS_4:
148         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149                         int i;
150                         for (i = 0; i < 5; i++) {
151                                 if (radeon_check_and_fixup_offset(dev_priv,
152                                                                   filp_priv,
153                                                                   &data[i])) {
154                                         DRM_ERROR
155                                             ("Invalid R200 cubic texture offset\n");
156                                         return DRM_ERR(EINVAL);
157                                 }
158                         }
159                         break;
160                 }
161
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165                         int i;
166                         for (i = 0; i < 5; i++) {
167                                 if (radeon_check_and_fixup_offset(dev_priv,
168                                                                   filp_priv,
169                                                                   &data[i])) {
170                                         DRM_ERROR
171                                             ("Invalid R100 cubic texture offset\n");
172                                         return DRM_ERR(EINVAL);
173                                 }
174                         }
175                 }
176                 break;
177
178         case R200_EMIT_VAP_CTL:{
179                         RING_LOCALS;
180                         BEGIN_RING(2);
181                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
182                         ADVANCE_RING();
183                 }
184                 break;
185
186         case RADEON_EMIT_RB3D_COLORPITCH:
187         case RADEON_EMIT_RE_LINE_PATTERN:
188         case RADEON_EMIT_SE_LINE_WIDTH:
189         case RADEON_EMIT_PP_LUM_MATRIX:
190         case RADEON_EMIT_PP_ROT_MATRIX_0:
191         case RADEON_EMIT_RB3D_STENCILREFMASK:
192         case RADEON_EMIT_SE_VPORT_XSCALE:
193         case RADEON_EMIT_SE_CNTL:
194         case RADEON_EMIT_SE_CNTL_STATUS:
195         case RADEON_EMIT_RE_MISC:
196         case RADEON_EMIT_PP_BORDER_COLOR_0:
197         case RADEON_EMIT_PP_BORDER_COLOR_1:
198         case RADEON_EMIT_PP_BORDER_COLOR_2:
199         case RADEON_EMIT_SE_ZBIAS_FACTOR:
200         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
201         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
202         case R200_EMIT_PP_TXCBLEND_0:
203         case R200_EMIT_PP_TXCBLEND_1:
204         case R200_EMIT_PP_TXCBLEND_2:
205         case R200_EMIT_PP_TXCBLEND_3:
206         case R200_EMIT_PP_TXCBLEND_4:
207         case R200_EMIT_PP_TXCBLEND_5:
208         case R200_EMIT_PP_TXCBLEND_6:
209         case R200_EMIT_PP_TXCBLEND_7:
210         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
211         case R200_EMIT_TFACTOR_0:
212         case R200_EMIT_VTX_FMT_0:
213         case R200_EMIT_MATRIX_SELECT_0:
214         case R200_EMIT_TEX_PROC_CTL_2:
215         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
216         case R200_EMIT_PP_TXFILTER_0:
217         case R200_EMIT_PP_TXFILTER_1:
218         case R200_EMIT_PP_TXFILTER_2:
219         case R200_EMIT_PP_TXFILTER_3:
220         case R200_EMIT_PP_TXFILTER_4:
221         case R200_EMIT_PP_TXFILTER_5:
222         case R200_EMIT_VTE_CNTL:
223         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
224         case R200_EMIT_PP_TAM_DEBUG3:
225         case R200_EMIT_PP_CNTL_X:
226         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
227         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
228         case R200_EMIT_RE_SCISSOR_TL_0:
229         case R200_EMIT_RE_SCISSOR_TL_1:
230         case R200_EMIT_RE_SCISSOR_TL_2:
231         case R200_EMIT_SE_VAP_CNTL_STATUS:
232         case R200_EMIT_SE_VTX_STATE_CNTL:
233         case R200_EMIT_RE_POINTSIZE:
234         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
235         case R200_EMIT_PP_CUBIC_FACES_0:
236         case R200_EMIT_PP_CUBIC_FACES_1:
237         case R200_EMIT_PP_CUBIC_FACES_2:
238         case R200_EMIT_PP_CUBIC_FACES_3:
239         case R200_EMIT_PP_CUBIC_FACES_4:
240         case R200_EMIT_PP_CUBIC_FACES_5:
241         case RADEON_EMIT_PP_TEX_SIZE_0:
242         case RADEON_EMIT_PP_TEX_SIZE_1:
243         case RADEON_EMIT_PP_TEX_SIZE_2:
244         case R200_EMIT_RB3D_BLENDCOLOR:
245         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
246         case RADEON_EMIT_PP_CUBIC_FACES_0:
247         case RADEON_EMIT_PP_CUBIC_FACES_1:
248         case RADEON_EMIT_PP_CUBIC_FACES_2:
249         case R200_EMIT_PP_TRI_PERF_CNTL:
250         case R200_EMIT_PP_AFS_0:
251         case R200_EMIT_PP_AFS_1:
252         case R200_EMIT_ATF_TFACTOR:
253         case R200_EMIT_PP_TXCTLALL_0:
254         case R200_EMIT_PP_TXCTLALL_1:
255         case R200_EMIT_PP_TXCTLALL_2:
256         case R200_EMIT_PP_TXCTLALL_3:
257         case R200_EMIT_PP_TXCTLALL_4:
258         case R200_EMIT_PP_TXCTLALL_5:
259         case R200_EMIT_VAP_PVS_CNTL:
260                 /* These packets don't contain memory offsets */
261                 break;
262
263         default:
264                 DRM_ERROR("Unknown state packet ID %d\n", id);
265                 return DRM_ERR(EINVAL);
266         }
267
268         return 0;
269 }
270
271 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
272                                                      dev_priv,
273                                                      drm_file_t *filp_priv,
274                                                      drm_radeon_kcmd_buffer_t *
275                                                      cmdbuf,
276                                                      unsigned int *cmdsz)
277 {
278         u32 *cmd = (u32 *) cmdbuf->buf;
279
280         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
281
282         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
283                 DRM_ERROR("Not a type 3 packet\n");
284                 return DRM_ERR(EINVAL);
285         }
286
287         if (4 * *cmdsz > cmdbuf->bufsz) {
288                 DRM_ERROR("Packet size larger than size of data provided\n");
289                 return DRM_ERR(EINVAL);
290         }
291
292         /* Check client state and fix it up if necessary */
293         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
294                 u32 offset;
295
296                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
297                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
298                         offset = cmd[2] << 10;
299                         if (radeon_check_and_fixup_offset
300                             (dev_priv, filp_priv, &offset)) {
301                                 DRM_ERROR("Invalid first packet offset\n");
302                                 return DRM_ERR(EINVAL);
303                         }
304                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
305                 }
306
307                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
308                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
309                         offset = cmd[3] << 10;
310                         if (radeon_check_and_fixup_offset
311                             (dev_priv, filp_priv, &offset)) {
312                                 DRM_ERROR("Invalid second packet offset\n");
313                                 return DRM_ERR(EINVAL);
314                         }
315                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
316                 }
317         }
318
319         return 0;
320 }
321
322 /* ================================================================
323  * CP hardware state programming functions
324  */
325
326 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
327                                              drm_clip_rect_t * box)
328 {
329         RING_LOCALS;
330
331         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
332                   box->x1, box->y1, box->x2, box->y2);
333
334         BEGIN_RING(4);
335         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
336         OUT_RING((box->y1 << 16) | box->x1);
337         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
338         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
339         ADVANCE_RING();
340 }
341
342 /* Emit 1.1 state
343  */
344 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
345                              drm_file_t * filp_priv,
346                              drm_radeon_context_regs_t * ctx,
347                              drm_radeon_texture_regs_t * tex,
348                              unsigned int dirty)
349 {
350         RING_LOCALS;
351         DRM_DEBUG("dirty=0x%08x\n", dirty);
352
353         if (dirty & RADEON_UPLOAD_CONTEXT) {
354                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
355                                                   &ctx->rb3d_depthoffset)) {
356                         DRM_ERROR("Invalid depth buffer offset\n");
357                         return DRM_ERR(EINVAL);
358                 }
359
360                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
361                                                   &ctx->rb3d_coloroffset)) {
362                         DRM_ERROR("Invalid depth buffer offset\n");
363                         return DRM_ERR(EINVAL);
364                 }
365
366                 BEGIN_RING(14);
367                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
368                 OUT_RING(ctx->pp_misc);
369                 OUT_RING(ctx->pp_fog_color);
370                 OUT_RING(ctx->re_solid_color);
371                 OUT_RING(ctx->rb3d_blendcntl);
372                 OUT_RING(ctx->rb3d_depthoffset);
373                 OUT_RING(ctx->rb3d_depthpitch);
374                 OUT_RING(ctx->rb3d_zstencilcntl);
375                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
376                 OUT_RING(ctx->pp_cntl);
377                 OUT_RING(ctx->rb3d_cntl);
378                 OUT_RING(ctx->rb3d_coloroffset);
379                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
380                 OUT_RING(ctx->rb3d_colorpitch);
381                 ADVANCE_RING();
382         }
383
384         if (dirty & RADEON_UPLOAD_VERTFMT) {
385                 BEGIN_RING(2);
386                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
387                 OUT_RING(ctx->se_coord_fmt);
388                 ADVANCE_RING();
389         }
390
391         if (dirty & RADEON_UPLOAD_LINE) {
392                 BEGIN_RING(5);
393                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
394                 OUT_RING(ctx->re_line_pattern);
395                 OUT_RING(ctx->re_line_state);
396                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
397                 OUT_RING(ctx->se_line_width);
398                 ADVANCE_RING();
399         }
400
401         if (dirty & RADEON_UPLOAD_BUMPMAP) {
402                 BEGIN_RING(5);
403                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
404                 OUT_RING(ctx->pp_lum_matrix);
405                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
406                 OUT_RING(ctx->pp_rot_matrix_0);
407                 OUT_RING(ctx->pp_rot_matrix_1);
408                 ADVANCE_RING();
409         }
410
411         if (dirty & RADEON_UPLOAD_MASKS) {
412                 BEGIN_RING(4);
413                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
414                 OUT_RING(ctx->rb3d_stencilrefmask);
415                 OUT_RING(ctx->rb3d_ropcntl);
416                 OUT_RING(ctx->rb3d_planemask);
417                 ADVANCE_RING();
418         }
419
420         if (dirty & RADEON_UPLOAD_VIEWPORT) {
421                 BEGIN_RING(7);
422                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
423                 OUT_RING(ctx->se_vport_xscale);
424                 OUT_RING(ctx->se_vport_xoffset);
425                 OUT_RING(ctx->se_vport_yscale);
426                 OUT_RING(ctx->se_vport_yoffset);
427                 OUT_RING(ctx->se_vport_zscale);
428                 OUT_RING(ctx->se_vport_zoffset);
429                 ADVANCE_RING();
430         }
431
432         if (dirty & RADEON_UPLOAD_SETUP) {
433                 BEGIN_RING(4);
434                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
435                 OUT_RING(ctx->se_cntl);
436                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
437                 OUT_RING(ctx->se_cntl_status);
438                 ADVANCE_RING();
439         }
440
441         if (dirty & RADEON_UPLOAD_MISC) {
442                 BEGIN_RING(2);
443                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
444                 OUT_RING(ctx->re_misc);
445                 ADVANCE_RING();
446         }
447
448         if (dirty & RADEON_UPLOAD_TEX0) {
449                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
450                                                   &tex[0].pp_txoffset)) {
451                         DRM_ERROR("Invalid texture offset for unit 0\n");
452                         return DRM_ERR(EINVAL);
453                 }
454
455                 BEGIN_RING(9);
456                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
457                 OUT_RING(tex[0].pp_txfilter);
458                 OUT_RING(tex[0].pp_txformat);
459                 OUT_RING(tex[0].pp_txoffset);
460                 OUT_RING(tex[0].pp_txcblend);
461                 OUT_RING(tex[0].pp_txablend);
462                 OUT_RING(tex[0].pp_tfactor);
463                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
464                 OUT_RING(tex[0].pp_border_color);
465                 ADVANCE_RING();
466         }
467
468         if (dirty & RADEON_UPLOAD_TEX1) {
469                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
470                                                   &tex[1].pp_txoffset)) {
471                         DRM_ERROR("Invalid texture offset for unit 1\n");
472                         return DRM_ERR(EINVAL);
473                 }
474
475                 BEGIN_RING(9);
476                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
477                 OUT_RING(tex[1].pp_txfilter);
478                 OUT_RING(tex[1].pp_txformat);
479                 OUT_RING(tex[1].pp_txoffset);
480                 OUT_RING(tex[1].pp_txcblend);
481                 OUT_RING(tex[1].pp_txablend);
482                 OUT_RING(tex[1].pp_tfactor);
483                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
484                 OUT_RING(tex[1].pp_border_color);
485                 ADVANCE_RING();
486         }
487
488         if (dirty & RADEON_UPLOAD_TEX2) {
489                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
490                                                   &tex[2].pp_txoffset)) {
491                         DRM_ERROR("Invalid texture offset for unit 2\n");
492                         return DRM_ERR(EINVAL);
493                 }
494
495                 BEGIN_RING(9);
496                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
497                 OUT_RING(tex[2].pp_txfilter);
498                 OUT_RING(tex[2].pp_txformat);
499                 OUT_RING(tex[2].pp_txoffset);
500                 OUT_RING(tex[2].pp_txcblend);
501                 OUT_RING(tex[2].pp_txablend);
502                 OUT_RING(tex[2].pp_tfactor);
503                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
504                 OUT_RING(tex[2].pp_border_color);
505                 ADVANCE_RING();
506         }
507
508         return 0;
509 }
510
511 /* Emit 1.2 state
512  */
513 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
514                               drm_file_t * filp_priv,
515                               drm_radeon_state_t * state)
516 {
517         RING_LOCALS;
518
519         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
520                 BEGIN_RING(3);
521                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
522                 OUT_RING(state->context2.se_zbias_factor);
523                 OUT_RING(state->context2.se_zbias_constant);
524                 ADVANCE_RING();
525         }
526
527         return radeon_emit_state(dev_priv, filp_priv, &state->context,
528                                  state->tex, state->dirty);
529 }
530
531 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
532  * 1.3 cmdbuffers allow all previous state to be updated as well as
533  * the tcl scalar and vector areas.
534  */
535 static struct {
536         int start;
537         int len;
538         const char *name;
539 } packet[RADEON_MAX_STATE_PACKETS] = {
540         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
541         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
542         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
543         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
544         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
545         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
546         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
547         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
548         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
549         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
550         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
551         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
552         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
553         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
554         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
555         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
556         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
557         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
558         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
559         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
560         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
561                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
562         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
563         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
564         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
565         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
566         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
567         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
568         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
569         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
570         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
571         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
572         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
573         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
574         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
575         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
576         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
577         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
578         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
579         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
580         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
581         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
582         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
583         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
584         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
585         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
586         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
587         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
588         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
589         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
590         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
591          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
592         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
593         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
594         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
595         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
596         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
597         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
598         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
599         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
600         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
601         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
602         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
603                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
604         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
605         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
606         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
607         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
608         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
609         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
610         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
611         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
612         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
613         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
614         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
615         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
616         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
617         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
618         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
619         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
620         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
621         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
622         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
623         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
624         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
625         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
626         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
627         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
628         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
629         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
630         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
631         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
632         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
633         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
634         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
635         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
636         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
637         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
638 };
639
640 /* ================================================================
641  * Performance monitoring functions
642  */
643
644 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
645                              int x, int y, int w, int h, int r, int g, int b)
646 {
647         u32 color;
648         RING_LOCALS;
649
650         x += dev_priv->sarea_priv->boxes[0].x1;
651         y += dev_priv->sarea_priv->boxes[0].y1;
652
653         switch (dev_priv->color_fmt) {
654         case RADEON_COLOR_FORMAT_RGB565:
655                 color = (((r & 0xf8) << 8) |
656                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
657                 break;
658         case RADEON_COLOR_FORMAT_ARGB8888:
659         default:
660                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
661                 break;
662         }
663
664         BEGIN_RING(4);
665         RADEON_WAIT_UNTIL_3D_IDLE();
666         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
667         OUT_RING(0xffffffff);
668         ADVANCE_RING();
669
670         BEGIN_RING(6);
671
672         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
673         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
674                  RADEON_GMC_BRUSH_SOLID_COLOR |
675                  (dev_priv->color_fmt << 8) |
676                  RADEON_GMC_SRC_DATATYPE_COLOR |
677                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
678
679         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
680                 OUT_RING(dev_priv->front_pitch_offset);
681         } else {
682                 OUT_RING(dev_priv->back_pitch_offset);
683         }
684
685         OUT_RING(color);
686
687         OUT_RING((x << 16) | y);
688         OUT_RING((w << 16) | h);
689
690         ADVANCE_RING();
691 }
692
693 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
694 {
695         /* Collapse various things into a wait flag -- trying to
696          * guess if userspase slept -- better just to have them tell us.
697          */
698         if (dev_priv->stats.last_frame_reads > 1 ||
699             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
700                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
701         }
702
703         if (dev_priv->stats.freelist_loops) {
704                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
705         }
706
707         /* Purple box for page flipping
708          */
709         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
710                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
711
712         /* Red box if we have to wait for idle at any point
713          */
714         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
715                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
716
717         /* Blue box: lost context?
718          */
719
720         /* Yellow box for texture swaps
721          */
722         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
723                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
724
725         /* Green box if hardware never idles (as far as we can tell)
726          */
727         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
728                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
729
730         /* Draw bars indicating number of buffers allocated
731          * (not a great measure, easily confused)
732          */
733         if (dev_priv->stats.requested_bufs) {
734                 if (dev_priv->stats.requested_bufs > 100)
735                         dev_priv->stats.requested_bufs = 100;
736
737                 radeon_clear_box(dev_priv, 4, 16,
738                                  dev_priv->stats.requested_bufs, 4,
739                                  196, 128, 128);
740         }
741
742         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
743
744 }
745
746 /* ================================================================
747  * CP command dispatch functions
748  */
749
750 static void radeon_cp_dispatch_clear(drm_device_t * dev,
751                                      drm_radeon_clear_t * clear,
752                                      drm_radeon_clear_rect_t * depth_boxes)
753 {
754         drm_radeon_private_t *dev_priv = dev->dev_private;
755         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
756         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
757         int nbox = sarea_priv->nbox;
758         drm_clip_rect_t *pbox = sarea_priv->boxes;
759         unsigned int flags = clear->flags;
760         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
761         int i;
762         RING_LOCALS;
763         DRM_DEBUG("flags = 0x%x\n", flags);
764
765         dev_priv->stats.clears++;
766
767         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
768                 unsigned int tmp = flags;
769
770                 flags &= ~(RADEON_FRONT | RADEON_BACK);
771                 if (tmp & RADEON_FRONT)
772                         flags |= RADEON_BACK;
773                 if (tmp & RADEON_BACK)
774                         flags |= RADEON_FRONT;
775         }
776
777         if (flags & (RADEON_FRONT | RADEON_BACK)) {
778
779                 BEGIN_RING(4);
780
781                 /* Ensure the 3D stream is idle before doing a
782                  * 2D fill to clear the front or back buffer.
783                  */
784                 RADEON_WAIT_UNTIL_3D_IDLE();
785
786                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
787                 OUT_RING(clear->color_mask);
788
789                 ADVANCE_RING();
790
791                 /* Make sure we restore the 3D state next time.
792                  */
793                 dev_priv->sarea_priv->ctx_owner = 0;
794
795                 for (i = 0; i < nbox; i++) {
796                         int x = pbox[i].x1;
797                         int y = pbox[i].y1;
798                         int w = pbox[i].x2 - x;
799                         int h = pbox[i].y2 - y;
800
801                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
802                                   x, y, w, h, flags);
803
804                         if (flags & RADEON_FRONT) {
805                                 BEGIN_RING(6);
806
807                                 OUT_RING(CP_PACKET3
808                                          (RADEON_CNTL_PAINT_MULTI, 4));
809                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
810                                          RADEON_GMC_BRUSH_SOLID_COLOR |
811                                          (dev_priv->
812                                           color_fmt << 8) |
813                                          RADEON_GMC_SRC_DATATYPE_COLOR |
814                                          RADEON_ROP3_P |
815                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
816
817                                 OUT_RING(dev_priv->front_pitch_offset);
818                                 OUT_RING(clear->clear_color);
819
820                                 OUT_RING((x << 16) | y);
821                                 OUT_RING((w << 16) | h);
822
823                                 ADVANCE_RING();
824                         }
825
826                         if (flags & RADEON_BACK) {
827                                 BEGIN_RING(6);
828
829                                 OUT_RING(CP_PACKET3
830                                          (RADEON_CNTL_PAINT_MULTI, 4));
831                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
832                                          RADEON_GMC_BRUSH_SOLID_COLOR |
833                                          (dev_priv->
834                                           color_fmt << 8) |
835                                          RADEON_GMC_SRC_DATATYPE_COLOR |
836                                          RADEON_ROP3_P |
837                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
838
839                                 OUT_RING(dev_priv->back_pitch_offset);
840                                 OUT_RING(clear->clear_color);
841
842                                 OUT_RING((x << 16) | y);
843                                 OUT_RING((w << 16) | h);
844
845                                 ADVANCE_RING();
846                         }
847                 }
848         }
849
850         /* hyper z clear */
851         /* no docs available, based on reverse engeneering by Stephane Marchesin */
852         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
853             && (flags & RADEON_CLEAR_FASTZ)) {
854
855                 int i;
856                 int depthpixperline =
857                     dev_priv->depth_fmt ==
858                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
859                                                        2) : (dev_priv->
860                                                              depth_pitch / 4);
861
862                 u32 clearmask;
863
864                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
865                     ((clear->depth_mask & 0xff) << 24);
866
867                 /* Make sure we restore the 3D state next time.
868                  * we haven't touched any "normal" state - still need this?
869                  */
870                 dev_priv->sarea_priv->ctx_owner = 0;
871
872                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
873                     && (flags & RADEON_USE_HIERZ)) {
874                         /* FIXME : reverse engineer that for Rx00 cards */
875                         /* FIXME : the mask supposedly contains low-res z values. So can't set
876                            just to the max (0xff? or actually 0x3fff?), need to take z clear
877                            value into account? */
878                         /* pattern seems to work for r100, though get slight
879                            rendering errors with glxgears. If hierz is not enabled for r100,
880                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
881                            other ones are ignored, and the same clear mask can be used. That's
882                            very different behaviour than R200 which needs different clear mask
883                            and different number of tiles to clear if hierz is enabled or not !?!
884                          */
885                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
886                 } else {
887                         /* clear mask : chooses the clearing pattern.
888                            rv250: could be used to clear only parts of macrotiles
889                            (but that would get really complicated...)?
890                            bit 0 and 1 (either or both of them ?!?!) are used to
891                            not clear tile (or maybe one of the bits indicates if the tile is
892                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
893                            Pattern is as follows:
894                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
895                            bits -------------------------------------------------
896                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
897                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
898                            covers 256 pixels ?!?
899                          */
900                         clearmask = 0x0;
901                 }
902
903                 BEGIN_RING(8);
904                 RADEON_WAIT_UNTIL_2D_IDLE();
905                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
906                              tempRB3D_DEPTHCLEARVALUE);
907                 /* what offset is this exactly ? */
908                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
909                 /* need ctlstat, otherwise get some strange black flickering */
910                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
911                              RADEON_RB3D_ZC_FLUSH_ALL);
912                 ADVANCE_RING();
913
914                 for (i = 0; i < nbox; i++) {
915                         int tileoffset, nrtilesx, nrtilesy, j;
916                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
917                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
918                             && !(dev_priv->microcode_version == UCODE_R200)) {
919                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
920                                    maybe r200 actually doesn't need to put the low-res z value into
921                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
922                                    Works for R100, both with hierz and without.
923                                    R100 seems to operate on 2x1 8x8 tiles, but...
924                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
925                                    problematic with resolutions which are not 64 pix aligned? */
926                                 tileoffset =
927                                     ((pbox[i].y1 >> 3) * depthpixperline +
928                                      pbox[i].x1) >> 6;
929                                 nrtilesx =
930                                     ((pbox[i].x2 & ~63) -
931                                      (pbox[i].x1 & ~63)) >> 4;
932                                 nrtilesy =
933                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
934                                 for (j = 0; j <= nrtilesy; j++) {
935                                         BEGIN_RING(4);
936                                         OUT_RING(CP_PACKET3
937                                                  (RADEON_3D_CLEAR_ZMASK, 2));
938                                         /* first tile */
939                                         OUT_RING(tileoffset * 8);
940                                         /* the number of tiles to clear */
941                                         OUT_RING(nrtilesx + 4);
942                                         /* clear mask : chooses the clearing pattern. */
943                                         OUT_RING(clearmask);
944                                         ADVANCE_RING();
945                                         tileoffset += depthpixperline >> 6;
946                                 }
947                         } else if (dev_priv->microcode_version == UCODE_R200) {
948                                 /* works for rv250. */
949                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
950                                 tileoffset =
951                                     ((pbox[i].y1 >> 3) * depthpixperline +
952                                      pbox[i].x1) >> 5;
953                                 nrtilesx =
954                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
955                                 nrtilesy =
956                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
957                                 for (j = 0; j <= nrtilesy; j++) {
958                                         BEGIN_RING(4);
959                                         OUT_RING(CP_PACKET3
960                                                  (RADEON_3D_CLEAR_ZMASK, 2));
961                                         /* first tile */
962                                         /* judging by the first tile offset needed, could possibly
963                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
964                                            macro tiles, though would still need clear mask for
965                                            right/bottom if truely 4x4 granularity is desired ? */
966                                         OUT_RING(tileoffset * 16);
967                                         /* the number of tiles to clear */
968                                         OUT_RING(nrtilesx + 1);
969                                         /* clear mask : chooses the clearing pattern. */
970                                         OUT_RING(clearmask);
971                                         ADVANCE_RING();
972                                         tileoffset += depthpixperline >> 5;
973                                 }
974                         } else {        /* rv 100 */
975                                 /* rv100 might not need 64 pix alignment, who knows */
976                                 /* offsets are, hmm, weird */
977                                 tileoffset =
978                                     ((pbox[i].y1 >> 4) * depthpixperline +
979                                      pbox[i].x1) >> 6;
980                                 nrtilesx =
981                                     ((pbox[i].x2 & ~63) -
982                                      (pbox[i].x1 & ~63)) >> 4;
983                                 nrtilesy =
984                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
985                                 for (j = 0; j <= nrtilesy; j++) {
986                                         BEGIN_RING(4);
987                                         OUT_RING(CP_PACKET3
988                                                  (RADEON_3D_CLEAR_ZMASK, 2));
989                                         OUT_RING(tileoffset * 128);
990                                         /* the number of tiles to clear */
991                                         OUT_RING(nrtilesx + 4);
992                                         /* clear mask : chooses the clearing pattern. */
993                                         OUT_RING(clearmask);
994                                         ADVANCE_RING();
995                                         tileoffset += depthpixperline >> 6;
996                                 }
997                         }
998                 }
999
1000                 /* TODO don't always clear all hi-level z tiles */
1001                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
1002                     && (dev_priv->microcode_version == UCODE_R200)
1003                     && (flags & RADEON_USE_HIERZ))
1004                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1005                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1006                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1007                            value into account? */
1008                 {
1009                         BEGIN_RING(4);
1010                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1011                         OUT_RING(0x0);  /* First tile */
1012                         OUT_RING(0x3cc0);
1013                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1014                         ADVANCE_RING();
1015                 }
1016         }
1017
1018         /* We have to clear the depth and/or stencil buffers by
1019          * rendering a quad into just those buffers.  Thus, we have to
1020          * make sure the 3D engine is configured correctly.
1021          */
1022         else if ((dev_priv->microcode_version == UCODE_R200) &&
1023                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1024
1025                 int tempPP_CNTL;
1026                 int tempRE_CNTL;
1027                 int tempRB3D_CNTL;
1028                 int tempRB3D_ZSTENCILCNTL;
1029                 int tempRB3D_STENCILREFMASK;
1030                 int tempRB3D_PLANEMASK;
1031                 int tempSE_CNTL;
1032                 int tempSE_VTE_CNTL;
1033                 int tempSE_VTX_FMT_0;
1034                 int tempSE_VTX_FMT_1;
1035                 int tempSE_VAP_CNTL;
1036                 int tempRE_AUX_SCISSOR_CNTL;
1037
1038                 tempPP_CNTL = 0;
1039                 tempRE_CNTL = 0;
1040
1041                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1042
1043                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1044                 tempRB3D_STENCILREFMASK = 0x0;
1045
1046                 tempSE_CNTL = depth_clear->se_cntl;
1047
1048                 /* Disable TCL */
1049
1050                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1051                                           (0x9 <<
1052                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1053
1054                 tempRB3D_PLANEMASK = 0x0;
1055
1056                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1057
1058                 tempSE_VTE_CNTL =
1059                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1060
1061                 /* Vertex format (X, Y, Z, W) */
1062                 tempSE_VTX_FMT_0 =
1063                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1064                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1065                 tempSE_VTX_FMT_1 = 0x0;
1066
1067                 /*
1068                  * Depth buffer specific enables
1069                  */
1070                 if (flags & RADEON_DEPTH) {
1071                         /* Enable depth buffer */
1072                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1073                 } else {
1074                         /* Disable depth buffer */
1075                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1076                 }
1077
1078                 /*
1079                  * Stencil buffer specific enables
1080                  */
1081                 if (flags & RADEON_STENCIL) {
1082                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1083                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1084                 } else {
1085                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1086                         tempRB3D_STENCILREFMASK = 0x00000000;
1087                 }
1088
1089                 if (flags & RADEON_USE_COMP_ZBUF) {
1090                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1091                             RADEON_Z_DECOMPRESSION_ENABLE;
1092                 }
1093                 if (flags & RADEON_USE_HIERZ) {
1094                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1095                 }
1096
1097                 BEGIN_RING(26);
1098                 RADEON_WAIT_UNTIL_2D_IDLE();
1099
1100                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1101                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1102                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1103                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1104                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1105                              tempRB3D_STENCILREFMASK);
1106                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1107                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1108                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1109                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1110                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1111                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1112                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1113                 ADVANCE_RING();
1114
1115                 /* Make sure we restore the 3D state next time.
1116                  */
1117                 dev_priv->sarea_priv->ctx_owner = 0;
1118
1119                 for (i = 0; i < nbox; i++) {
1120
1121                         /* Funny that this should be required --
1122                          *  sets top-left?
1123                          */
1124                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1125
1126                         BEGIN_RING(14);
1127                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1128                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1129                                   RADEON_PRIM_WALK_RING |
1130                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1131                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1133                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1134                         OUT_RING(0x3f800000);
1135                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1136                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1137                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1138                         OUT_RING(0x3f800000);
1139                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1140                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1141                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1142                         OUT_RING(0x3f800000);
1143                         ADVANCE_RING();
1144                 }
1145         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1146
1147                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1148
1149                 rb3d_cntl = depth_clear->rb3d_cntl;
1150
1151                 if (flags & RADEON_DEPTH) {
1152                         rb3d_cntl |= RADEON_Z_ENABLE;
1153                 } else {
1154                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1155                 }
1156
1157                 if (flags & RADEON_STENCIL) {
1158                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1159                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1160                 } else {
1161                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1162                         rb3d_stencilrefmask = 0x00000000;
1163                 }
1164
1165                 if (flags & RADEON_USE_COMP_ZBUF) {
1166                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1167                             RADEON_Z_DECOMPRESSION_ENABLE;
1168                 }
1169                 if (flags & RADEON_USE_HIERZ) {
1170                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1171                 }
1172
1173                 BEGIN_RING(13);
1174                 RADEON_WAIT_UNTIL_2D_IDLE();
1175
1176                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1177                 OUT_RING(0x00000000);
1178                 OUT_RING(rb3d_cntl);
1179
1180                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1181                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1182                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1183                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1184                 ADVANCE_RING();
1185
1186                 /* Make sure we restore the 3D state next time.
1187                  */
1188                 dev_priv->sarea_priv->ctx_owner = 0;
1189
1190                 for (i = 0; i < nbox; i++) {
1191
1192                         /* Funny that this should be required --
1193                          *  sets top-left?
1194                          */
1195                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1196
1197                         BEGIN_RING(15);
1198
1199                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1200                         OUT_RING(RADEON_VTX_Z_PRESENT |
1201                                  RADEON_VTX_PKCOLOR_PRESENT);
1202                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1203                                   RADEON_PRIM_WALK_RING |
1204                                   RADEON_MAOS_ENABLE |
1205                                   RADEON_VTX_FMT_RADEON_MODE |
1206                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1207
1208                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1209                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1210                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1211                         OUT_RING(0x0);
1212
1213                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1214                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1215                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1216                         OUT_RING(0x0);
1217
1218                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1219                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1220                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1221                         OUT_RING(0x0);
1222
1223                         ADVANCE_RING();
1224                 }
1225         }
1226
1227         /* Increment the clear counter.  The client-side 3D driver must
1228          * wait on this value before performing the clear ioctl.  We
1229          * need this because the card's so damned fast...
1230          */
1231         dev_priv->sarea_priv->last_clear++;
1232
1233         BEGIN_RING(4);
1234
1235         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1236         RADEON_WAIT_UNTIL_IDLE();
1237
1238         ADVANCE_RING();
1239 }
1240
1241 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1242 {
1243         drm_radeon_private_t *dev_priv = dev->dev_private;
1244         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1245         int nbox = sarea_priv->nbox;
1246         drm_clip_rect_t *pbox = sarea_priv->boxes;
1247         int i;
1248         RING_LOCALS;
1249         DRM_DEBUG("\n");
1250
1251         /* Do some trivial performance monitoring...
1252          */
1253         if (dev_priv->do_boxes)
1254                 radeon_cp_performance_boxes(dev_priv);
1255
1256         /* Wait for the 3D stream to idle before dispatching the bitblt.
1257          * This will prevent data corruption between the two streams.
1258          */
1259         BEGIN_RING(2);
1260
1261         RADEON_WAIT_UNTIL_3D_IDLE();
1262
1263         ADVANCE_RING();
1264
1265         for (i = 0; i < nbox; i++) {
1266                 int x = pbox[i].x1;
1267                 int y = pbox[i].y1;
1268                 int w = pbox[i].x2 - x;
1269                 int h = pbox[i].y2 - y;
1270
1271                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1272
1273                 BEGIN_RING(7);
1274
1275                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1276                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1277                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1278                          RADEON_GMC_BRUSH_NONE |
1279                          (dev_priv->color_fmt << 8) |
1280                          RADEON_GMC_SRC_DATATYPE_COLOR |
1281                          RADEON_ROP3_S |
1282                          RADEON_DP_SRC_SOURCE_MEMORY |
1283                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1284
1285                 /* Make this work even if front & back are flipped:
1286                  */
1287                 if (dev_priv->current_page == 0) {
1288                         OUT_RING(dev_priv->back_pitch_offset);
1289                         OUT_RING(dev_priv->front_pitch_offset);
1290                 } else {
1291                         OUT_RING(dev_priv->front_pitch_offset);
1292                         OUT_RING(dev_priv->back_pitch_offset);
1293                 }
1294
1295                 OUT_RING((x << 16) | y);
1296                 OUT_RING((x << 16) | y);
1297                 OUT_RING((w << 16) | h);
1298
1299                 ADVANCE_RING();
1300         }
1301
1302         /* Increment the frame counter.  The client-side 3D driver must
1303          * throttle the framerate by waiting for this value before
1304          * performing the swapbuffer ioctl.
1305          */
1306         dev_priv->sarea_priv->last_frame++;
1307
1308         BEGIN_RING(4);
1309
1310         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1311         RADEON_WAIT_UNTIL_2D_IDLE();
1312
1313         ADVANCE_RING();
1314 }
1315
1316 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1317 {
1318         drm_radeon_private_t *dev_priv = dev->dev_private;
1319         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1320         int offset = (dev_priv->current_page == 1)
1321             ? dev_priv->front_offset : dev_priv->back_offset;
1322         RING_LOCALS;
1323         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1324                   __FUNCTION__,
1325                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1326
1327         /* Do some trivial performance monitoring...
1328          */
1329         if (dev_priv->do_boxes) {
1330                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1331                 radeon_cp_performance_boxes(dev_priv);
1332         }
1333
1334         /* Update the frame offsets for both CRTCs
1335          */
1336         BEGIN_RING(6);
1337
1338         RADEON_WAIT_UNTIL_3D_IDLE();
1339         OUT_RING_REG(RADEON_CRTC_OFFSET,
1340                      ((sarea->frame.y * dev_priv->front_pitch +
1341                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1342                      + offset);
1343         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1344                      + offset);
1345
1346         ADVANCE_RING();
1347
1348         /* Increment the frame counter.  The client-side 3D driver must
1349          * throttle the framerate by waiting for this value before
1350          * performing the swapbuffer ioctl.
1351          */
1352         dev_priv->sarea_priv->last_frame++;
1353         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1354             1 - dev_priv->current_page;
1355
1356         BEGIN_RING(2);
1357
1358         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1359
1360         ADVANCE_RING();
1361 }
1362
1363 static int bad_prim_vertex_nr(int primitive, int nr)
1364 {
1365         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1366         case RADEON_PRIM_TYPE_NONE:
1367         case RADEON_PRIM_TYPE_POINT:
1368                 return nr < 1;
1369         case RADEON_PRIM_TYPE_LINE:
1370                 return (nr & 1) || nr == 0;
1371         case RADEON_PRIM_TYPE_LINE_STRIP:
1372                 return nr < 2;
1373         case RADEON_PRIM_TYPE_TRI_LIST:
1374         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1375         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1376         case RADEON_PRIM_TYPE_RECT_LIST:
1377                 return nr % 3 || nr == 0;
1378         case RADEON_PRIM_TYPE_TRI_FAN:
1379         case RADEON_PRIM_TYPE_TRI_STRIP:
1380                 return nr < 3;
1381         default:
1382                 return 1;
1383         }
1384 }
1385
1386 typedef struct {
1387         unsigned int start;
1388         unsigned int finish;
1389         unsigned int prim;
1390         unsigned int numverts;
1391         unsigned int offset;
1392         unsigned int vc_format;
1393 } drm_radeon_tcl_prim_t;
1394
1395 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1396                                       drm_buf_t * buf,
1397                                       drm_radeon_tcl_prim_t * prim)
1398 {
1399         drm_radeon_private_t *dev_priv = dev->dev_private;
1400         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1401         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1402         int numverts = (int)prim->numverts;
1403         int nbox = sarea_priv->nbox;
1404         int i = 0;
1405         RING_LOCALS;
1406
1407         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1408                   prim->prim,
1409                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1410
1411         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1412                 DRM_ERROR("bad prim %x numverts %d\n",
1413                           prim->prim, prim->numverts);
1414                 return;
1415         }
1416
1417         do {
1418                 /* Emit the next cliprect */
1419                 if (i < nbox) {
1420                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1421                 }
1422
1423                 /* Emit the vertex buffer rendering commands */
1424                 BEGIN_RING(5);
1425
1426                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1427                 OUT_RING(offset);
1428                 OUT_RING(numverts);
1429                 OUT_RING(prim->vc_format);
1430                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1431                          RADEON_COLOR_ORDER_RGBA |
1432                          RADEON_VTX_FMT_RADEON_MODE |
1433                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1434
1435                 ADVANCE_RING();
1436
1437                 i++;
1438         } while (i < nbox);
1439 }
1440
1441 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1442 {
1443         drm_radeon_private_t *dev_priv = dev->dev_private;
1444         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1445         RING_LOCALS;
1446
1447         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1448
1449         /* Emit the vertex buffer age */
1450         BEGIN_RING(2);
1451         RADEON_DISPATCH_AGE(buf_priv->age);
1452         ADVANCE_RING();
1453
1454         buf->pending = 1;
1455         buf->used = 0;
1456 }
1457
1458 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1459                                         drm_buf_t * buf, int start, int end)
1460 {
1461         drm_radeon_private_t *dev_priv = dev->dev_private;
1462         RING_LOCALS;
1463         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1464
1465         if (start != end) {
1466                 int offset = (dev_priv->gart_buffers_offset
1467                               + buf->offset + start);
1468                 int dwords = (end - start + 3) / sizeof(u32);
1469
1470                 /* Indirect buffer data must be an even number of
1471                  * dwords, so if we've been given an odd number we must
1472                  * pad the data with a Type-2 CP packet.
1473                  */
1474                 if (dwords & 1) {
1475                         u32 *data = (u32 *)
1476                             ((char *)dev->agp_buffer_map->handle
1477                              + buf->offset + start);
1478                         data[dwords++] = RADEON_CP_PACKET2;
1479                 }
1480
1481                 /* Fire off the indirect buffer */
1482                 BEGIN_RING(3);
1483
1484                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1485                 OUT_RING(offset);
1486                 OUT_RING(dwords);
1487
1488                 ADVANCE_RING();
1489         }
1490 }
1491
1492 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1493                                        drm_buf_t * elt_buf,
1494                                        drm_radeon_tcl_prim_t * prim)
1495 {
1496         drm_radeon_private_t *dev_priv = dev->dev_private;
1497         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1498         int offset = dev_priv->gart_buffers_offset + prim->offset;
1499         u32 *data;
1500         int dwords;
1501         int i = 0;
1502         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1503         int count = (prim->finish - start) / sizeof(u16);
1504         int nbox = sarea_priv->nbox;
1505
1506         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1507                   prim->prim,
1508                   prim->vc_format,
1509                   prim->start, prim->finish, prim->offset, prim->numverts);
1510
1511         if (bad_prim_vertex_nr(prim->prim, count)) {
1512                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1513                 return;
1514         }
1515
1516         if (start >= prim->finish || (prim->start & 0x7)) {
1517                 DRM_ERROR("buffer prim %d\n", prim->prim);
1518                 return;
1519         }
1520
1521         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1522
1523         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1524                         elt_buf->offset + prim->start);
1525
1526         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1527         data[1] = offset;
1528         data[2] = prim->numverts;
1529         data[3] = prim->vc_format;
1530         data[4] = (prim->prim |
1531                    RADEON_PRIM_WALK_IND |
1532                    RADEON_COLOR_ORDER_RGBA |
1533                    RADEON_VTX_FMT_RADEON_MODE |
1534                    (count << RADEON_NUM_VERTICES_SHIFT));
1535
1536         do {
1537                 if (i < nbox)
1538                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1539
1540                 radeon_cp_dispatch_indirect(dev, elt_buf,
1541                                             prim->start, prim->finish);
1542
1543                 i++;
1544         } while (i < nbox);
1545
1546 }
1547
1548 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1549
1550 static int radeon_cp_dispatch_texture(DRMFILE filp,
1551                                       drm_device_t * dev,
1552                                       drm_radeon_texture_t * tex,
1553                                       drm_radeon_tex_image_t * image)
1554 {
1555         drm_radeon_private_t *dev_priv = dev->dev_private;
1556         drm_file_t *filp_priv;
1557         drm_buf_t *buf;
1558         u32 format;
1559         u32 *buffer;
1560         const u8 __user *data;
1561         int size, dwords, tex_width, blit_width, spitch;
1562         u32 height;
1563         int i;
1564         u32 texpitch, microtile;
1565         u32 offset;
1566         RING_LOCALS;
1567
1568         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1569
1570         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1571                 DRM_ERROR("Invalid destination offset\n");
1572                 return DRM_ERR(EINVAL);
1573         }
1574
1575         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1576
1577         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1578          * up with the texture data from the host data blit, otherwise
1579          * part of the texture image may be corrupted.
1580          */
1581         BEGIN_RING(4);
1582         RADEON_FLUSH_CACHE();
1583         RADEON_WAIT_UNTIL_IDLE();
1584         ADVANCE_RING();
1585
1586         /* The compiler won't optimize away a division by a variable,
1587          * even if the only legal values are powers of two.  Thus, we'll
1588          * use a shift instead.
1589          */
1590         switch (tex->format) {
1591         case RADEON_TXFORMAT_ARGB8888:
1592         case RADEON_TXFORMAT_RGBA8888:
1593                 format = RADEON_COLOR_FORMAT_ARGB8888;
1594                 tex_width = tex->width * 4;
1595                 blit_width = image->width * 4;
1596                 break;
1597         case RADEON_TXFORMAT_AI88:
1598         case RADEON_TXFORMAT_ARGB1555:
1599         case RADEON_TXFORMAT_RGB565:
1600         case RADEON_TXFORMAT_ARGB4444:
1601         case RADEON_TXFORMAT_VYUY422:
1602         case RADEON_TXFORMAT_YVYU422:
1603                 format = RADEON_COLOR_FORMAT_RGB565;
1604                 tex_width = tex->width * 2;
1605                 blit_width = image->width * 2;
1606                 break;
1607         case RADEON_TXFORMAT_I8:
1608         case RADEON_TXFORMAT_RGB332:
1609                 format = RADEON_COLOR_FORMAT_CI8;
1610                 tex_width = tex->width * 1;
1611                 blit_width = image->width * 1;
1612                 break;
1613         default:
1614                 DRM_ERROR("invalid texture format %d\n", tex->format);
1615                 return DRM_ERR(EINVAL);
1616         }
1617         spitch = blit_width >> 6;
1618         if (spitch == 0 && image->height > 1)
1619                 return DRM_ERR(EINVAL);
1620
1621         texpitch = tex->pitch;
1622         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1623                 microtile = 1;
1624                 if (tex_width < 64) {
1625                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1626                         /* we got tiled coordinates, untile them */
1627                         image->x *= 2;
1628                 }
1629         } else
1630                 microtile = 0;
1631
1632         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1633
1634         do {
1635                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1636                           tex->offset >> 10, tex->pitch, tex->format,
1637                           image->x, image->y, image->width, image->height);
1638
1639                 /* Make a copy of some parameters in case we have to
1640                  * update them for a multi-pass texture blit.
1641                  */
1642                 height = image->height;
1643                 data = (const u8 __user *)image->data;
1644
1645                 size = height * blit_width;
1646
1647                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1648                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1649                         size = height * blit_width;
1650                 } else if (size < 4 && size > 0) {
1651                         size = 4;
1652                 } else if (size == 0) {
1653                         return 0;
1654                 }
1655
1656                 buf = radeon_freelist_get(dev);
1657                 if (0 && !buf) {
1658                         radeon_do_cp_idle(dev_priv);
1659                         buf = radeon_freelist_get(dev);
1660                 }
1661                 if (!buf) {
1662                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1663                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1664                                 return DRM_ERR(EFAULT);
1665                         return DRM_ERR(EAGAIN);
1666                 }
1667
1668                 /* Dispatch the indirect buffer.
1669                  */
1670                 buffer =
1671                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1672                 dwords = size / 4;
1673
1674 #define RADEON_COPY_MT(_buf, _data, _width) \
1675         do { \
1676                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1677                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1678                         return DRM_ERR(EFAULT); \
1679                 } \
1680         } while(0)
1681
1682                 if (microtile) {
1683                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1684                            however, we cannot use blitter directly for texture width < 64 bytes,
1685                            since minimum tex pitch is 64 bytes and we need this to match
1686                            the texture width, otherwise the blitter will tile it wrong.
1687                            Thus, tiling manually in this case. Additionally, need to special
1688                            case tex height = 1, since our actual image will have height 2
1689                            and we need to ensure we don't read beyond the texture size
1690                            from user space. */
1691                         if (tex->height == 1) {
1692                                 if (tex_width >= 64 || tex_width <= 16) {
1693                                         RADEON_COPY_MT(buffer, data,
1694                                                 (int)(tex_width * sizeof(u32)));
1695                                 } else if (tex_width == 32) {
1696                                         RADEON_COPY_MT(buffer, data, 16);
1697                                         RADEON_COPY_MT(buffer + 8,
1698                                                        data + 16, 16);
1699                                 }
1700                         } else if (tex_width >= 64 || tex_width == 16) {
1701                                 RADEON_COPY_MT(buffer, data,
1702                                                (int)(dwords * sizeof(u32)));
1703                         } else if (tex_width < 16) {
1704                                 for (i = 0; i < tex->height; i++) {
1705                                         RADEON_COPY_MT(buffer, data, tex_width);
1706                                         buffer += 4;
1707                                         data += tex_width;
1708                                 }
1709                         } else if (tex_width == 32) {
1710                                 /* TODO: make sure this works when not fitting in one buffer
1711                                    (i.e. 32bytes x 2048...) */
1712                                 for (i = 0; i < tex->height; i += 2) {
1713                                         RADEON_COPY_MT(buffer, data, 16);
1714                                         data += 16;
1715                                         RADEON_COPY_MT(buffer + 8, data, 16);
1716                                         data += 16;
1717                                         RADEON_COPY_MT(buffer + 4, data, 16);
1718                                         data += 16;
1719                                         RADEON_COPY_MT(buffer + 12, data, 16);
1720                                         data += 16;
1721                                         buffer += 16;
1722                                 }
1723                         }
1724                 } else {
1725                         if (tex_width >= 32) {
1726                                 /* Texture image width is larger than the minimum, so we
1727                                  * can upload it directly.
1728                                  */
1729                                 RADEON_COPY_MT(buffer, data,
1730                                                (int)(dwords * sizeof(u32)));
1731                         } else {
1732                                 /* Texture image width is less than the minimum, so we
1733                                  * need to pad out each image scanline to the minimum
1734                                  * width.
1735                                  */
1736                                 for (i = 0; i < tex->height; i++) {
1737                                         RADEON_COPY_MT(buffer, data, tex_width);
1738                                         buffer += 8;
1739                                         data += tex_width;
1740                                 }
1741                         }
1742                 }
1743
1744 #undef RADEON_COPY_MT
1745                 buf->filp = filp;
1746                 buf->used = size;
1747                 offset = dev_priv->gart_buffers_offset + buf->offset;
1748                 BEGIN_RING(9);
1749                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1750                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1751                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1752                          RADEON_GMC_BRUSH_NONE |
1753                          (format << 8) |
1754                          RADEON_GMC_SRC_DATATYPE_COLOR |
1755                          RADEON_ROP3_S |
1756                          RADEON_DP_SRC_SOURCE_MEMORY |
1757                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1758                 OUT_RING((spitch << 22) | (offset >> 10));
1759                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1760                 OUT_RING(0);
1761                 OUT_RING((image->x << 16) | image->y);
1762                 OUT_RING((image->width << 16) | height);
1763                 RADEON_WAIT_UNTIL_2D_IDLE();
1764                 ADVANCE_RING();
1765
1766                 radeon_cp_discard_buffer(dev, buf);
1767
1768                 /* Update the input parameters for next time */
1769                 image->y += height;
1770                 image->height -= height;
1771                 image->data = (const u8 __user *)image->data + size;
1772         } while (image->height > 0);
1773
1774         /* Flush the pixel cache after the blit completes.  This ensures
1775          * the texture data is written out to memory before rendering
1776          * continues.
1777          */
1778         BEGIN_RING(4);
1779         RADEON_FLUSH_CACHE();
1780         RADEON_WAIT_UNTIL_2D_IDLE();
1781         ADVANCE_RING();
1782         return 0;
1783 }
1784
1785 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1786 {
1787         drm_radeon_private_t *dev_priv = dev->dev_private;
1788         int i;
1789         RING_LOCALS;
1790         DRM_DEBUG("\n");
1791
1792         BEGIN_RING(35);
1793
1794         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1795         OUT_RING(0x00000000);
1796
1797         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1798         for (i = 0; i < 32; i++) {
1799                 OUT_RING(stipple[i]);
1800         }
1801
1802         ADVANCE_RING();
1803 }
1804
1805 static void radeon_apply_surface_regs(int surf_index,
1806                                       drm_radeon_private_t *dev_priv)
1807 {
1808         if (!dev_priv->mmio)
1809                 return;
1810
1811         radeon_do_cp_idle(dev_priv);
1812
1813         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1814                      dev_priv->surfaces[surf_index].flags);
1815         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1816                      dev_priv->surfaces[surf_index].lower);
1817         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1818                      dev_priv->surfaces[surf_index].upper);
1819 }
1820
1821 /* Allocates a virtual surface
1822  * doesn't always allocate a real surface, will stretch an existing
1823  * surface when possible.
1824  *
1825  * Note that refcount can be at most 2, since during a free refcount=3
1826  * might mean we have to allocate a new surface which might not always
1827  * be available.
1828  * For example : we allocate three contigous surfaces ABC. If B is
1829  * freed, we suddenly need two surfaces to store A and C, which might
1830  * not always be available.
1831  */
1832 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1833                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1834 {
1835         struct radeon_virt_surface *s;
1836         int i;
1837         int virt_surface_index;
1838         uint32_t new_upper, new_lower;
1839
1840         new_lower = new->address;
1841         new_upper = new_lower + new->size - 1;
1842
1843         /* sanity check */
1844         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1845             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1846              RADEON_SURF_ADDRESS_FIXED_MASK)
1847             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1848                 return -1;
1849
1850         /* make sure there is no overlap with existing surfaces */
1851         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1852                 if ((dev_priv->surfaces[i].refcount != 0) &&
1853                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1854                       (new_lower < dev_priv->surfaces[i].upper)) ||
1855                      ((new_lower < dev_priv->surfaces[i].lower) &&
1856                       (new_upper > dev_priv->surfaces[i].lower)))) {
1857                         return -1;
1858                 }
1859         }
1860
1861         /* find a virtual surface */
1862         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1863                 if (dev_priv->virt_surfaces[i].filp == 0)
1864                         break;
1865         if (i == 2 * RADEON_MAX_SURFACES) {
1866                 return -1;
1867         }
1868         virt_surface_index = i;
1869
1870         /* try to reuse an existing surface */
1871         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1872                 /* extend before */
1873                 if ((dev_priv->surfaces[i].refcount == 1) &&
1874                     (new->flags == dev_priv->surfaces[i].flags) &&
1875                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1876                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1877                         s->surface_index = i;
1878                         s->lower = new_lower;
1879                         s->upper = new_upper;
1880                         s->flags = new->flags;
1881                         s->filp = filp;
1882                         dev_priv->surfaces[i].refcount++;
1883                         dev_priv->surfaces[i].lower = s->lower;
1884                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1885                         return virt_surface_index;
1886                 }
1887
1888                 /* extend after */
1889                 if ((dev_priv->surfaces[i].refcount == 1) &&
1890                     (new->flags == dev_priv->surfaces[i].flags) &&
1891                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1892                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1893                         s->surface_index = i;
1894                         s->lower = new_lower;
1895                         s->upper = new_upper;
1896                         s->flags = new->flags;
1897                         s->filp = filp;
1898                         dev_priv->surfaces[i].refcount++;
1899                         dev_priv->surfaces[i].upper = s->upper;
1900                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1901                         return virt_surface_index;
1902                 }
1903         }
1904
1905         /* okay, we need a new one */
1906         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1907                 if (dev_priv->surfaces[i].refcount == 0) {
1908                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1909                         s->surface_index = i;
1910                         s->lower = new_lower;
1911                         s->upper = new_upper;
1912                         s->flags = new->flags;
1913                         s->filp = filp;
1914                         dev_priv->surfaces[i].refcount = 1;
1915                         dev_priv->surfaces[i].lower = s->lower;
1916                         dev_priv->surfaces[i].upper = s->upper;
1917                         dev_priv->surfaces[i].flags = s->flags;
1918                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1919                         return virt_surface_index;
1920                 }
1921         }
1922
1923         /* we didn't find anything */
1924         return -1;
1925 }
1926
1927 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1928                         int lower)
1929 {
1930         struct radeon_virt_surface *s;
1931         int i;
1932         /* find the virtual surface */
1933         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1934                 s = &(dev_priv->virt_surfaces[i]);
1935                 if (s->filp) {
1936                         if ((lower == s->lower) && (filp == s->filp)) {
1937                                 if (dev_priv->surfaces[s->surface_index].
1938                                     lower == s->lower)
1939                                         dev_priv->surfaces[s->surface_index].
1940                                             lower = s->upper;
1941
1942                                 if (dev_priv->surfaces[s->surface_index].
1943                                     upper == s->upper)
1944                                         dev_priv->surfaces[s->surface_index].
1945                                             upper = s->lower;
1946
1947                                 dev_priv->surfaces[s->surface_index].refcount--;
1948                                 if (dev_priv->surfaces[s->surface_index].
1949                                     refcount == 0)
1950                                         dev_priv->surfaces[s->surface_index].
1951                                             flags = 0;
1952                                 s->filp = NULL;
1953                                 radeon_apply_surface_regs(s->surface_index,
1954                                                           dev_priv);
1955                                 return 0;
1956                         }
1957                 }
1958         }
1959         return 1;
1960 }
1961
1962 static void radeon_surfaces_release(DRMFILE filp,
1963                                     drm_radeon_private_t * dev_priv)
1964 {
1965         int i;
1966         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1967                 if (dev_priv->virt_surfaces[i].filp == filp)
1968                         free_surface(filp, dev_priv,
1969                                      dev_priv->virt_surfaces[i].lower);
1970         }
1971 }
1972
1973 /* ================================================================
1974  * IOCTL functions
1975  */
1976 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1977 {
1978         DRM_DEVICE;
1979         drm_radeon_private_t *dev_priv = dev->dev_private;
1980         drm_radeon_surface_alloc_t alloc;
1981
1982         DRM_COPY_FROM_USER_IOCTL(alloc,
1983                                  (drm_radeon_surface_alloc_t __user *) data,
1984                                  sizeof(alloc));
1985
1986         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1987                 return DRM_ERR(EINVAL);
1988         else
1989                 return 0;
1990 }
1991
1992 static int radeon_surface_free(DRM_IOCTL_ARGS)
1993 {
1994         DRM_DEVICE;
1995         drm_radeon_private_t *dev_priv = dev->dev_private;
1996         drm_radeon_surface_free_t memfree;
1997
1998         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1999                                  sizeof(memfree));
2000
2001         if (free_surface(filp, dev_priv, memfree.address))
2002                 return DRM_ERR(EINVAL);
2003         else
2004                 return 0;
2005 }
2006
2007 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2008 {
2009         DRM_DEVICE;
2010         drm_radeon_private_t *dev_priv = dev->dev_private;
2011         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2012         drm_radeon_clear_t clear;
2013         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2014         DRM_DEBUG("\n");
2015
2016         LOCK_TEST_WITH_RETURN(dev, filp);
2017
2018         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2019                                  sizeof(clear));
2020
2021         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2022
2023         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2024                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2025
2026         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2027                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2028                 return DRM_ERR(EFAULT);
2029
2030         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2031
2032         COMMIT_RING();
2033         return 0;
2034 }
2035
2036 /* Not sure why this isn't set all the time:
2037  */
2038 static int radeon_do_init_pageflip(drm_device_t * dev)
2039 {
2040         drm_radeon_private_t *dev_priv = dev->dev_private;
2041         RING_LOCALS;
2042
2043         DRM_DEBUG("\n");
2044
2045         BEGIN_RING(6);
2046         RADEON_WAIT_UNTIL_3D_IDLE();
2047         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2048         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2049                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2050         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2051         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2052                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2053         ADVANCE_RING();
2054
2055         dev_priv->page_flipping = 1;
2056         dev_priv->current_page = 0;
2057         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2058
2059         return 0;
2060 }
2061
2062 /* Called whenever a client dies, from drm_release.
2063  * NOTE:  Lock isn't necessarily held when this is called!
2064  */
2065 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2066 {
2067         drm_radeon_private_t *dev_priv = dev->dev_private;
2068         DRM_DEBUG("\n");
2069
2070         if (dev_priv->current_page != 0)
2071                 radeon_cp_dispatch_flip(dev);
2072
2073         dev_priv->page_flipping = 0;
2074         return 0;
2075 }
2076
2077 /* Swapping and flipping are different operations, need different ioctls.
2078  * They can & should be intermixed to support multiple 3d windows.
2079  */
2080 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2081 {
2082         DRM_DEVICE;
2083         drm_radeon_private_t *dev_priv = dev->dev_private;
2084         DRM_DEBUG("\n");
2085
2086         LOCK_TEST_WITH_RETURN(dev, filp);
2087
2088         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2089
2090         if (!dev_priv->page_flipping)
2091                 radeon_do_init_pageflip(dev);
2092
2093         radeon_cp_dispatch_flip(dev);
2094
2095         COMMIT_RING();
2096         return 0;
2097 }
2098
2099 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2100 {
2101         DRM_DEVICE;
2102         drm_radeon_private_t *dev_priv = dev->dev_private;
2103         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2104         DRM_DEBUG("\n");
2105
2106         LOCK_TEST_WITH_RETURN(dev, filp);
2107
2108         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2109
2110         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2111                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2112
2113         radeon_cp_dispatch_swap(dev);
2114         dev_priv->sarea_priv->ctx_owner = 0;
2115
2116         COMMIT_RING();
2117         return 0;
2118 }
2119
2120 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2121 {
2122         DRM_DEVICE;
2123         drm_radeon_private_t *dev_priv = dev->dev_private;
2124         drm_file_t *filp_priv;
2125         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2126         drm_device_dma_t *dma = dev->dma;
2127         drm_buf_t *buf;
2128         drm_radeon_vertex_t vertex;
2129         drm_radeon_tcl_prim_t prim;
2130
2131         LOCK_TEST_WITH_RETURN(dev, filp);
2132
2133         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2134
2135         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2136                                  sizeof(vertex));
2137
2138         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2139                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2140
2141         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2142                 DRM_ERROR("buffer index %d (of %d max)\n",
2143                           vertex.idx, dma->buf_count - 1);
2144                 return DRM_ERR(EINVAL);
2145         }
2146         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2147                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2148                 return DRM_ERR(EINVAL);
2149         }
2150
2151         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2152         VB_AGE_TEST_WITH_RETURN(dev_priv);
2153
2154         buf = dma->buflist[vertex.idx];
2155
2156         if (buf->filp != filp) {
2157                 DRM_ERROR("process %d using buffer owned by %p\n",
2158                           DRM_CURRENTPID, buf->filp);
2159                 return DRM_ERR(EINVAL);
2160         }
2161         if (buf->pending) {
2162                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2163                 return DRM_ERR(EINVAL);
2164         }
2165
2166         /* Build up a prim_t record:
2167          */
2168         if (vertex.count) {
2169                 buf->used = vertex.count;       /* not used? */
2170
2171                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2172                         if (radeon_emit_state(dev_priv, filp_priv,
2173                                               &sarea_priv->context_state,
2174                                               sarea_priv->tex_state,
2175                                               sarea_priv->dirty)) {
2176                                 DRM_ERROR("radeon_emit_state failed\n");
2177                                 return DRM_ERR(EINVAL);
2178                         }
2179
2180                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2181                                                RADEON_UPLOAD_TEX1IMAGES |
2182                                                RADEON_UPLOAD_TEX2IMAGES |
2183                                                RADEON_REQUIRE_QUIESCENCE);
2184                 }
2185
2186                 prim.start = 0;
2187                 prim.finish = vertex.count;     /* unused */
2188                 prim.prim = vertex.prim;
2189                 prim.numverts = vertex.count;
2190                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2191
2192                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2193         }
2194
2195         if (vertex.discard) {
2196                 radeon_cp_discard_buffer(dev, buf);
2197         }
2198
2199         COMMIT_RING();
2200         return 0;
2201 }
2202
2203 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2204 {
2205         DRM_DEVICE;
2206         drm_radeon_private_t *dev_priv = dev->dev_private;
2207         drm_file_t *filp_priv;
2208         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2209         drm_device_dma_t *dma = dev->dma;
2210         drm_buf_t *buf;
2211         drm_radeon_indices_t elts;
2212         drm_radeon_tcl_prim_t prim;
2213         int count;
2214
2215         LOCK_TEST_WITH_RETURN(dev, filp);
2216
2217         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2218
2219         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2220                                  sizeof(elts));
2221
2222         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2223                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2224
2225         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2226                 DRM_ERROR("buffer index %d (of %d max)\n",
2227                           elts.idx, dma->buf_count - 1);
2228                 return DRM_ERR(EINVAL);
2229         }
2230         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2231                 DRM_ERROR("buffer prim %d\n", elts.prim);
2232                 return DRM_ERR(EINVAL);
2233         }
2234
2235         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2236         VB_AGE_TEST_WITH_RETURN(dev_priv);
2237
2238         buf = dma->buflist[elts.idx];
2239
2240         if (buf->filp != filp) {
2241                 DRM_ERROR("process %d using buffer owned by %p\n",
2242                           DRM_CURRENTPID, buf->filp);
2243                 return DRM_ERR(EINVAL);
2244         }
2245         if (buf->pending) {
2246                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2247                 return DRM_ERR(EINVAL);
2248         }
2249
2250         count = (elts.end - elts.start) / sizeof(u16);
2251         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2252
2253         if (elts.start & 0x7) {
2254                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2255                 return DRM_ERR(EINVAL);
2256         }
2257         if (elts.start < buf->used) {
2258                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2259                 return DRM_ERR(EINVAL);
2260         }
2261
2262         buf->used = elts.end;
2263
2264         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2265                 if (radeon_emit_state(dev_priv, filp_priv,
2266                                       &sarea_priv->context_state,
2267                                       sarea_priv->tex_state,
2268                                       sarea_priv->dirty)) {
2269                         DRM_ERROR("radeon_emit_state failed\n");
2270                         return DRM_ERR(EINVAL);
2271                 }
2272
2273                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2274                                        RADEON_UPLOAD_TEX1IMAGES |
2275                                        RADEON_UPLOAD_TEX2IMAGES |
2276                                        RADEON_REQUIRE_QUIESCENCE);
2277         }
2278
2279         /* Build up a prim_t record:
2280          */
2281         prim.start = elts.start;
2282         prim.finish = elts.end;
2283         prim.prim = elts.prim;
2284         prim.offset = 0;        /* offset from start of dma buffers */
2285         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2286         prim.vc_format = dev_priv->sarea_priv->vc_format;
2287
2288         radeon_cp_dispatch_indices(dev, buf, &prim);
2289         if (elts.discard) {
2290                 radeon_cp_discard_buffer(dev, buf);
2291         }
2292
2293         COMMIT_RING();
2294         return 0;
2295 }
2296
2297 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2298 {
2299         DRM_DEVICE;
2300         drm_radeon_private_t *dev_priv = dev->dev_private;
2301         drm_radeon_texture_t tex;
2302         drm_radeon_tex_image_t image;
2303         int ret;
2304
2305         LOCK_TEST_WITH_RETURN(dev, filp);
2306
2307         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2308                                  sizeof(tex));
2309
2310         if (tex.image == NULL) {
2311                 DRM_ERROR("null texture image!\n");
2312                 return DRM_ERR(EINVAL);
2313         }
2314
2315         if (DRM_COPY_FROM_USER(&image,
2316                                (drm_radeon_tex_image_t __user *) tex.image,
2317                                sizeof(image)))
2318                 return DRM_ERR(EFAULT);
2319
2320         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2321         VB_AGE_TEST_WITH_RETURN(dev_priv);
2322
2323         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2324
2325         COMMIT_RING();
2326         return ret;
2327 }
2328
2329 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2330 {
2331         DRM_DEVICE;
2332         drm_radeon_private_t *dev_priv = dev->dev_private;
2333         drm_radeon_stipple_t stipple;
2334         u32 mask[32];
2335
2336         LOCK_TEST_WITH_RETURN(dev, filp);
2337
2338         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2339                                  sizeof(stipple));
2340
2341         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2342                 return DRM_ERR(EFAULT);
2343
2344         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2345
2346         radeon_cp_dispatch_stipple(dev, mask);
2347
2348         COMMIT_RING();
2349         return 0;
2350 }
2351
2352 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2353 {
2354         DRM_DEVICE;
2355         drm_radeon_private_t *dev_priv = dev->dev_private;
2356         drm_device_dma_t *dma = dev->dma;
2357         drm_buf_t *buf;
2358         drm_radeon_indirect_t indirect;
2359         RING_LOCALS;
2360
2361         LOCK_TEST_WITH_RETURN(dev, filp);
2362
2363         DRM_COPY_FROM_USER_IOCTL(indirect,
2364                                  (drm_radeon_indirect_t __user *) data,
2365                                  sizeof(indirect));
2366
2367         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2368                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2369
2370         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2371                 DRM_ERROR("buffer index %d (of %d max)\n",
2372                           indirect.idx, dma->buf_count - 1);
2373                 return DRM_ERR(EINVAL);
2374         }
2375
2376         buf = dma->buflist[indirect.idx];
2377
2378         if (buf->filp != filp) {
2379                 DRM_ERROR("process %d using buffer owned by %p\n",
2380                           DRM_CURRENTPID, buf->filp);
2381                 return DRM_ERR(EINVAL);
2382         }
2383         if (buf->pending) {
2384                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2385                 return DRM_ERR(EINVAL);
2386         }
2387
2388         if (indirect.start < buf->used) {
2389                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2390                           indirect.start, buf->used);
2391                 return DRM_ERR(EINVAL);
2392         }
2393
2394         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2395         VB_AGE_TEST_WITH_RETURN(dev_priv);
2396
2397         buf->used = indirect.end;
2398
2399         /* Wait for the 3D stream to idle before the indirect buffer
2400          * containing 2D acceleration commands is processed.
2401          */
2402         BEGIN_RING(2);
2403
2404         RADEON_WAIT_UNTIL_3D_IDLE();
2405
2406         ADVANCE_RING();
2407
2408         /* Dispatch the indirect buffer full of commands from the
2409          * X server.  This is insecure and is thus only available to
2410          * privileged clients.
2411          */
2412         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2413         if (indirect.discard) {
2414                 radeon_cp_discard_buffer(dev, buf);
2415         }
2416
2417         COMMIT_RING();
2418         return 0;
2419 }
2420
2421 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2422 {
2423         DRM_DEVICE;
2424         drm_radeon_private_t *dev_priv = dev->dev_private;
2425         drm_file_t *filp_priv;
2426         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2427         drm_device_dma_t *dma = dev->dma;
2428         drm_buf_t *buf;
2429         drm_radeon_vertex2_t vertex;
2430         int i;
2431         unsigned char laststate;
2432
2433         LOCK_TEST_WITH_RETURN(dev, filp);
2434
2435         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2436
2437         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2438                                  sizeof(vertex));
2439
2440         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2441                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2442
2443         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2444                 DRM_ERROR("buffer index %d (of %d max)\n",
2445                           vertex.idx, dma->buf_count - 1);
2446                 return DRM_ERR(EINVAL);
2447         }
2448
2449         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2450         VB_AGE_TEST_WITH_RETURN(dev_priv);
2451
2452         buf = dma->buflist[vertex.idx];
2453
2454         if (buf->filp != filp) {
2455                 DRM_ERROR("process %d using buffer owned by %p\n",
2456                           DRM_CURRENTPID, buf->filp);
2457                 return DRM_ERR(EINVAL);
2458         }
2459
2460         if (buf->pending) {
2461                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2462                 return DRM_ERR(EINVAL);
2463         }
2464
2465         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466                 return DRM_ERR(EINVAL);
2467
2468         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2469                 drm_radeon_prim_t prim;
2470                 drm_radeon_tcl_prim_t tclprim;
2471
2472                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2473                         return DRM_ERR(EFAULT);
2474
2475                 if (prim.stateidx != laststate) {
2476                         drm_radeon_state_t state;
2477
2478                         if (DRM_COPY_FROM_USER(&state,
2479                                                &vertex.state[prim.stateidx],
2480                                                sizeof(state)))
2481                                 return DRM_ERR(EFAULT);
2482
2483                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2484                                 DRM_ERROR("radeon_emit_state2 failed\n");
2485                                 return DRM_ERR(EINVAL);
2486                         }
2487
2488                         laststate = prim.stateidx;
2489                 }
2490
2491                 tclprim.start = prim.start;
2492                 tclprim.finish = prim.finish;
2493                 tclprim.prim = prim.prim;
2494                 tclprim.vc_format = prim.vc_format;
2495
2496                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2497                         tclprim.offset = prim.numverts * 64;
2498                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2499
2500                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2501                 } else {
2502                         tclprim.numverts = prim.numverts;
2503                         tclprim.offset = 0;     /* not used */
2504
2505                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2506                 }
2507
2508                 if (sarea_priv->nbox == 1)
2509                         sarea_priv->nbox = 0;
2510         }
2511
2512         if (vertex.discard) {
2513                 radeon_cp_discard_buffer(dev, buf);
2514         }
2515
2516         COMMIT_RING();
2517         return 0;
2518 }
2519
2520 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2521                                drm_file_t * filp_priv,
2522                                drm_radeon_cmd_header_t header,
2523                                drm_radeon_kcmd_buffer_t *cmdbuf)
2524 {
2525         int id = (int)header.packet.packet_id;
2526         int sz, reg;
2527         int *data = (int *)cmdbuf->buf;
2528         RING_LOCALS;
2529
2530         if (id >= RADEON_MAX_STATE_PACKETS)
2531                 return DRM_ERR(EINVAL);
2532
2533         sz = packet[id].len;
2534         reg = packet[id].start;
2535
2536         if (sz * sizeof(int) > cmdbuf->bufsz) {
2537                 DRM_ERROR("Packet size provided larger than data provided\n");
2538                 return DRM_ERR(EINVAL);
2539         }
2540
2541         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2542                 DRM_ERROR("Packet verification failed\n");
2543                 return DRM_ERR(EINVAL);
2544         }
2545
2546         BEGIN_RING(sz + 1);
2547         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2548         OUT_RING_TABLE(data, sz);
2549         ADVANCE_RING();
2550
2551         cmdbuf->buf += sz * sizeof(int);
2552         cmdbuf->bufsz -= sz * sizeof(int);
2553         return 0;
2554 }
2555
2556 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2557                                           drm_radeon_cmd_header_t header,
2558                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2559 {
2560         int sz = header.scalars.count;
2561         int start = header.scalars.offset;
2562         int stride = header.scalars.stride;
2563         RING_LOCALS;
2564
2565         BEGIN_RING(3 + sz);
2566         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2567         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2568         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2569         OUT_RING_TABLE(cmdbuf->buf, sz);
2570         ADVANCE_RING();
2571         cmdbuf->buf += sz * sizeof(int);
2572         cmdbuf->bufsz -= sz * sizeof(int);
2573         return 0;
2574 }
2575
2576 /* God this is ugly
2577  */
2578 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2579                                            drm_radeon_cmd_header_t header,
2580                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2581 {
2582         int sz = header.scalars.count;
2583         int start = ((unsigned int)header.scalars.offset) + 0x100;
2584         int stride = header.scalars.stride;
2585         RING_LOCALS;
2586
2587         BEGIN_RING(3 + sz);
2588         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2589         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2590         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2591         OUT_RING_TABLE(cmdbuf->buf, sz);
2592         ADVANCE_RING();
2593         cmdbuf->buf += sz * sizeof(int);
2594         cmdbuf->bufsz -= sz * sizeof(int);
2595         return 0;
2596 }
2597
2598 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2599                                           drm_radeon_cmd_header_t header,
2600                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2601 {
2602         int sz = header.vectors.count;
2603         int start = header.vectors.offset;
2604         int stride = header.vectors.stride;
2605         RING_LOCALS;
2606
2607         BEGIN_RING(5 + sz);
2608         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2609         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2610         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2611         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2612         OUT_RING_TABLE(cmdbuf->buf, sz);
2613         ADVANCE_RING();
2614
2615         cmdbuf->buf += sz * sizeof(int);
2616         cmdbuf->bufsz -= sz * sizeof(int);
2617         return 0;
2618 }
2619
2620 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2621                                           drm_radeon_cmd_header_t header,
2622                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2623 {
2624         int sz = header.veclinear.count * 4;
2625         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2626         RING_LOCALS;
2627
2628         if (!sz)
2629                 return 0;
2630         if (sz * 4 > cmdbuf->bufsz)
2631                 return DRM_ERR(EINVAL);
2632
2633         BEGIN_RING(5 + sz);
2634         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2635         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2636         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2637         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2638         OUT_RING_TABLE(cmdbuf->buf, sz);
2639         ADVANCE_RING();
2640
2641         cmdbuf->buf += sz * sizeof(int);
2642         cmdbuf->bufsz -= sz * sizeof(int);
2643         return 0;
2644 }
2645
2646 static int radeon_emit_packet3(drm_device_t * dev,
2647                                drm_file_t * filp_priv,
2648                                drm_radeon_kcmd_buffer_t *cmdbuf)
2649 {
2650         drm_radeon_private_t *dev_priv = dev->dev_private;
2651         unsigned int cmdsz;
2652         int ret;
2653         RING_LOCALS;
2654
2655         DRM_DEBUG("\n");
2656
2657         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2658                                                   cmdbuf, &cmdsz))) {
2659                 DRM_ERROR("Packet verification failed\n");
2660                 return ret;
2661         }
2662
2663         BEGIN_RING(cmdsz);
2664         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2665         ADVANCE_RING();
2666
2667         cmdbuf->buf += cmdsz * 4;
2668         cmdbuf->bufsz -= cmdsz * 4;
2669         return 0;
2670 }
2671
2672 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2673                                         drm_file_t *filp_priv,
2674                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2675                                         int orig_nbox)
2676 {
2677         drm_radeon_private_t *dev_priv = dev->dev_private;
2678         drm_clip_rect_t box;
2679         unsigned int cmdsz;
2680         int ret;
2681         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2682         int i = 0;
2683         RING_LOCALS;
2684
2685         DRM_DEBUG("\n");
2686
2687         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2688                                                   cmdbuf, &cmdsz))) {
2689                 DRM_ERROR("Packet verification failed\n");
2690                 return ret;
2691         }
2692
2693         if (!orig_nbox)
2694                 goto out;
2695
2696         do {
2697                 if (i < cmdbuf->nbox) {
2698                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2699                                 return DRM_ERR(EFAULT);
2700                         /* FIXME The second and subsequent times round
2701                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2702                          * calling emit_clip_rect(). This fixes a
2703                          * lockup on fast machines when sending
2704                          * several cliprects with a cmdbuf, as when
2705                          * waving a 2D window over a 3D
2706                          * window. Something in the commands from user
2707                          * space seems to hang the card when they're
2708                          * sent several times in a row. That would be
2709                          * the correct place to fix it but this works
2710                          * around it until I can figure that out - Tim
2711                          * Smith */
2712                         if (i) {
2713                                 BEGIN_RING(2);
2714                                 RADEON_WAIT_UNTIL_3D_IDLE();
2715                                 ADVANCE_RING();
2716                         }
2717                         radeon_emit_clip_rect(dev_priv, &box);
2718                 }
2719
2720                 BEGIN_RING(cmdsz);
2721                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2722                 ADVANCE_RING();
2723
2724         } while (++i < cmdbuf->nbox);
2725         if (cmdbuf->nbox == 1)
2726                 cmdbuf->nbox = 0;
2727
2728       out:
2729         cmdbuf->buf += cmdsz * 4;
2730         cmdbuf->bufsz -= cmdsz * 4;
2731         return 0;
2732 }
2733
2734 static int radeon_emit_wait(drm_device_t * dev, int flags)
2735 {
2736         drm_radeon_private_t *dev_priv = dev->dev_private;
2737         RING_LOCALS;
2738
2739         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2740         switch (flags) {
2741         case RADEON_WAIT_2D:
2742                 BEGIN_RING(2);
2743                 RADEON_WAIT_UNTIL_2D_IDLE();
2744                 ADVANCE_RING();
2745                 break;
2746         case RADEON_WAIT_3D:
2747                 BEGIN_RING(2);
2748                 RADEON_WAIT_UNTIL_3D_IDLE();
2749                 ADVANCE_RING();
2750                 break;
2751         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2752                 BEGIN_RING(2);
2753                 RADEON_WAIT_UNTIL_IDLE();
2754                 ADVANCE_RING();
2755                 break;
2756         default:
2757                 return DRM_ERR(EINVAL);
2758         }
2759
2760         return 0;
2761 }
2762
2763 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2764 {
2765         DRM_DEVICE;
2766         drm_radeon_private_t *dev_priv = dev->dev_private;
2767         drm_file_t *filp_priv;
2768         drm_device_dma_t *dma = dev->dma;
2769         drm_buf_t *buf = NULL;
2770         int idx;
2771         drm_radeon_kcmd_buffer_t cmdbuf;
2772         drm_radeon_cmd_header_t header;
2773         int orig_nbox, orig_bufsz;
2774         char *kbuf = NULL;
2775
2776         LOCK_TEST_WITH_RETURN(dev, filp);
2777
2778         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2779
2780         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2781                                  (drm_radeon_cmd_buffer_t __user *) data,
2782                                  sizeof(cmdbuf));
2783
2784         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2785         VB_AGE_TEST_WITH_RETURN(dev_priv);
2786
2787         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2788                 return DRM_ERR(EINVAL);
2789         }
2790
2791         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2792          * races between checking values and using those values in other code,
2793          * and simply to avoid a lot of function calls to copy in data.
2794          */
2795         orig_bufsz = cmdbuf.bufsz;
2796         if (orig_bufsz != 0) {
2797                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2798                 if (kbuf == NULL)
2799                         return DRM_ERR(ENOMEM);
2800                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2801                                        cmdbuf.bufsz)) {
2802                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2803                         return DRM_ERR(EFAULT);
2804                 }
2805                 cmdbuf.buf = kbuf;
2806         }
2807
2808         orig_nbox = cmdbuf.nbox;
2809
2810         if (dev_priv->microcode_version == UCODE_R300) {
2811                 int temp;
2812                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2813
2814                 if (orig_bufsz != 0)
2815                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2816
2817                 return temp;
2818         }
2819
2820         /* microcode_version != r300 */
2821         while (cmdbuf.bufsz >= sizeof(header)) {
2822
2823                 header.i = *(int *)cmdbuf.buf;
2824                 cmdbuf.buf += sizeof(header);
2825                 cmdbuf.bufsz -= sizeof(header);
2826
2827                 switch (header.header.cmd_type) {
2828                 case RADEON_CMD_PACKET:
2829                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2830                         if (radeon_emit_packets
2831                             (dev_priv, filp_priv, header, &cmdbuf)) {
2832                                 DRM_ERROR("radeon_emit_packets failed\n");
2833                                 goto err;
2834                         }
2835                         break;
2836
2837                 case RADEON_CMD_SCALARS:
2838                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2839                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2840                                 DRM_ERROR("radeon_emit_scalars failed\n");
2841                                 goto err;
2842                         }
2843                         break;
2844
2845                 case RADEON_CMD_VECTORS:
2846                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2847                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2848                                 DRM_ERROR("radeon_emit_vectors failed\n");
2849                                 goto err;
2850                         }
2851                         break;
2852
2853                 case RADEON_CMD_DMA_DISCARD:
2854                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2855                         idx = header.dma.buf_idx;
2856                         if (idx < 0 || idx >= dma->buf_count) {
2857                                 DRM_ERROR("buffer index %d (of %d max)\n",
2858                                           idx, dma->buf_count - 1);
2859                                 goto err;
2860                         }
2861
2862                         buf = dma->buflist[idx];
2863                         if (buf->filp != filp || buf->pending) {
2864                                 DRM_ERROR("bad buffer %p %p %d\n",
2865                                           buf->filp, filp, buf->pending);
2866                                 goto err;
2867                         }
2868
2869                         radeon_cp_discard_buffer(dev, buf);
2870                         break;
2871
2872                 case RADEON_CMD_PACKET3:
2873                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2874                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2875                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2876                                 goto err;
2877                         }
2878                         break;
2879
2880                 case RADEON_CMD_PACKET3_CLIP:
2881                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2882                         if (radeon_emit_packet3_cliprect
2883                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2884                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2885                                 goto err;
2886                         }
2887                         break;
2888
2889                 case RADEON_CMD_SCALARS2:
2890                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2891                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2892                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2893                                 goto err;
2894                         }
2895                         break;
2896
2897                 case RADEON_CMD_WAIT:
2898                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2899                         if (radeon_emit_wait(dev, header.wait.flags)) {
2900                                 DRM_ERROR("radeon_emit_wait failed\n");
2901                                 goto err;
2902                         }
2903                         break;
2904                 case RADEON_CMD_VECLINEAR:
2905                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2906                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2907                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2908                                 goto err;
2909                         }
2910                         break;
2911
2912                 default:
2913                         DRM_ERROR("bad cmd_type %d at %p\n",
2914                                   header.header.cmd_type,
2915                                   cmdbuf.buf - sizeof(header));
2916                         goto err;
2917                 }
2918         }
2919
2920         if (orig_bufsz != 0)
2921                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2922
2923         DRM_DEBUG("DONE\n");
2924         COMMIT_RING();
2925         return 0;
2926
2927       err:
2928         if (orig_bufsz != 0)
2929                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2930         return DRM_ERR(EINVAL);
2931 }
2932
2933 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2934 {
2935         DRM_DEVICE;
2936         drm_radeon_private_t *dev_priv = dev->dev_private;
2937         drm_radeon_getparam_t param;
2938         int value;
2939
2940         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2941                                  sizeof(param));
2942
2943         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2944
2945         switch (param.param) {
2946         case RADEON_PARAM_GART_BUFFER_OFFSET:
2947                 value = dev_priv->gart_buffers_offset;
2948                 break;
2949         case RADEON_PARAM_LAST_FRAME:
2950                 dev_priv->stats.last_frame_reads++;
2951                 value = GET_SCRATCH(0);
2952                 break;
2953         case RADEON_PARAM_LAST_DISPATCH:
2954                 value = GET_SCRATCH(1);
2955                 break;
2956         case RADEON_PARAM_LAST_CLEAR:
2957                 dev_priv->stats.last_clear_reads++;
2958                 value = GET_SCRATCH(2);
2959                 break;
2960         case RADEON_PARAM_IRQ_NR:
2961                 value = dev->irq;
2962                 break;
2963         case RADEON_PARAM_GART_BASE:
2964                 value = dev_priv->gart_vm_start;
2965                 break;
2966         case RADEON_PARAM_REGISTER_HANDLE:
2967                 value = dev_priv->mmio->offset;
2968                 break;
2969         case RADEON_PARAM_STATUS_HANDLE:
2970                 value = dev_priv->ring_rptr_offset;
2971                 break;
2972 #if BITS_PER_LONG == 32
2973                 /*
2974                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2975                  * pointer which can't fit into an int-sized variable.  According to
2976                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2977                  * not supporting it shouldn't be a problem.  If the same functionality
2978                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2979                  * so backwards-compatibility for the embedded platforms can be
2980                  * maintained.  --davidm 4-Feb-2004.
2981                  */
2982         case RADEON_PARAM_SAREA_HANDLE:
2983                 /* The lock is the first dword in the sarea. */
2984                 value = (long)dev->lock.hw_lock;
2985                 break;
2986 #endif
2987         case RADEON_PARAM_GART_TEX_HANDLE:
2988                 value = dev_priv->gart_textures_offset;
2989                 break;
2990         
2991         case RADEON_PARAM_CARD_TYPE:
2992                 if (dev_priv->flags & CHIP_IS_PCIE)
2993                         value = RADEON_CARD_PCIE;
2994                 else if (dev_priv->flags & CHIP_IS_AGP)
2995                         value = RADEON_CARD_AGP;
2996                 else
2997                         value = RADEON_CARD_PCI;
2998                 break;
2999         default:
3000                 return DRM_ERR(EINVAL);
3001         }
3002
3003         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3004                 DRM_ERROR("copy_to_user\n");
3005                 return DRM_ERR(EFAULT);
3006         }
3007
3008         return 0;
3009 }
3010
3011 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3012 {
3013         DRM_DEVICE;
3014         drm_radeon_private_t *dev_priv = dev->dev_private;
3015         drm_file_t *filp_priv;
3016         drm_radeon_setparam_t sp;
3017         struct drm_radeon_driver_file_fields *radeon_priv;
3018
3019         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3020
3021         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3022                                  sizeof(sp));
3023
3024         switch (sp.param) {
3025         case RADEON_SETPARAM_FB_LOCATION:
3026                 radeon_priv = filp_priv->driver_priv;
3027                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3028                 break;
3029         case RADEON_SETPARAM_SWITCH_TILING:
3030                 if (sp.value == 0) {
3031                         DRM_DEBUG("color tiling disabled\n");
3032                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3033                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3034                         dev_priv->sarea_priv->tiling_enabled = 0;
3035                 } else if (sp.value == 1) {
3036                         DRM_DEBUG("color tiling enabled\n");
3037                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3038                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3039                         dev_priv->sarea_priv->tiling_enabled = 1;
3040                 }
3041                 break;
3042         case RADEON_SETPARAM_PCIGART_LOCATION:
3043                 dev_priv->pcigart_offset = sp.value;
3044                 break;
3045         case RADEON_SETPARAM_NEW_MEMMAP:
3046                 dev_priv->new_memmap = sp.value;
3047                 break;
3048         default:
3049                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3050                 return DRM_ERR(EINVAL);
3051         }
3052
3053         return 0;
3054 }
3055
3056 /* When a client dies:
3057  *    - Check for and clean up flipped page state
3058  *    - Free any alloced GART memory.
3059  *    - Free any alloced radeon surfaces.
3060  *
3061  * DRM infrastructure takes care of reclaiming dma buffers.
3062  */
3063 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3064 {
3065         if (dev->dev_private) {
3066                 drm_radeon_private_t *dev_priv = dev->dev_private;
3067                 if (dev_priv->page_flipping) {
3068                         radeon_do_cleanup_pageflip(dev);
3069                 }
3070                 radeon_mem_release(filp, dev_priv->gart_heap);
3071                 radeon_mem_release(filp, dev_priv->fb_heap);
3072                 radeon_surfaces_release(filp, dev_priv);
3073         }
3074 }
3075
3076 void radeon_driver_lastclose(drm_device_t * dev)
3077 {
3078         radeon_do_release(dev);
3079 }
3080
3081 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3082 {
3083         drm_radeon_private_t *dev_priv = dev->dev_private;
3084         struct drm_radeon_driver_file_fields *radeon_priv;
3085
3086         DRM_DEBUG("\n");
3087         radeon_priv =
3088             (struct drm_radeon_driver_file_fields *)
3089             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3090
3091         if (!radeon_priv)
3092                 return -ENOMEM;
3093
3094         filp_priv->driver_priv = radeon_priv;
3095
3096         if (dev_priv)
3097                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3098         else
3099                 radeon_priv->radeon_fb_delta = 0;
3100         return 0;
3101 }
3102
3103 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3104 {
3105         struct drm_radeon_driver_file_fields *radeon_priv =
3106             filp_priv->driver_priv;
3107
3108         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3109 }
3110
3111 drm_ioctl_desc_t radeon_ioctls[] = {
3112         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3113         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3114         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3115         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3116         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3117         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3118         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3119         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3120         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3121         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3122         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3123         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3124         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3125         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3126         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3127         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3128         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3129         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3130         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3131         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3132         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3133         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3134         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3135         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3136         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3137         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3138         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3139 };
3140
3141 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);