Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u64 off = *offset;
46         u32 fb_start = dev_priv->fb_location;
47         u32 fb_end = fb_start + dev_priv->fb_size - 1;
48         u32 gart_start = dev_priv->gart_vm_start;
49         u32 gart_end = gart_start + dev_priv->gart_size - 1;
50         struct drm_radeon_driver_file_fields *radeon_priv;
51
52         /* Hrm ... the story of the offset ... So this function converts
53          * the various ideas of what userland clients might have for an
54          * offset in the card address space into an offset into the card
55          * address space :) So with a sane client, it should just keep
56          * the value intact and just do some boundary checking. However,
57          * not all clients are sane. Some older clients pass us 0 based
58          * offsets relative to the start of the framebuffer and some may
59          * assume the AGP aperture it appended to the framebuffer, so we
60          * try to detect those cases and fix them up.
61          *
62          * Note: It might be a good idea here to make sure the offset lands
63          * in some "allowed" area to protect things like the PCIE GART...
64          */
65
66         /* First, the best case, the offset already lands in either the
67          * framebuffer or the GART mapped space
68          */
69         if ((off >= fb_start && off <= fb_end) ||
70             (off >= gart_start && off <= gart_end))
71                 return 0;
72
73         /* Ok, that didn't happen... now check if we have a zero based
74          * offset that fits in the framebuffer + gart space, apply the
75          * magic offset we get from SETPARAM or calculated from fb_location
76          */
77         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
78                 radeon_priv = filp_priv->driver_priv;
79                 off += radeon_priv->radeon_fb_delta;
80         }
81
82         /* Finally, assume we aimed at a GART offset if beyond the fb */
83         if (off > fb_end)
84                 off = off - fb_end - 1 + gart_start;
85
86         /* Now recheck and fail if out of bounds */
87         if ((off >= fb_start && off <= fb_end) ||
88             (off >= gart_start && off <= gart_end)) {
89                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
90                 *offset = off;
91                 return 0;
92         }
93         return DRM_ERR(EINVAL);
94 }
95
96 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
97                                                      dev_priv,
98                                                      drm_file_t * filp_priv,
99                                                      int id, u32 *data)
100 {
101         switch (id) {
102
103         case RADEON_EMIT_PP_MISC:
104                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
105                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
106                         DRM_ERROR("Invalid depth buffer offset\n");
107                         return DRM_ERR(EINVAL);
108                 }
109                 break;
110
111         case RADEON_EMIT_PP_CNTL:
112                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
113                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
114                         DRM_ERROR("Invalid colour buffer offset\n");
115                         return DRM_ERR(EINVAL);
116                 }
117                 break;
118
119         case R200_EMIT_PP_TXOFFSET_0:
120         case R200_EMIT_PP_TXOFFSET_1:
121         case R200_EMIT_PP_TXOFFSET_2:
122         case R200_EMIT_PP_TXOFFSET_3:
123         case R200_EMIT_PP_TXOFFSET_4:
124         case R200_EMIT_PP_TXOFFSET_5:
125                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
126                                                   &data[0])) {
127                         DRM_ERROR("Invalid R200 texture offset\n");
128                         return DRM_ERR(EINVAL);
129                 }
130                 break;
131
132         case RADEON_EMIT_PP_TXFILTER_0:
133         case RADEON_EMIT_PP_TXFILTER_1:
134         case RADEON_EMIT_PP_TXFILTER_2:
135                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
136                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
137                         DRM_ERROR("Invalid R100 texture offset\n");
138                         return DRM_ERR(EINVAL);
139                 }
140                 break;
141
142         case R200_EMIT_PP_CUBIC_OFFSETS_0:
143         case R200_EMIT_PP_CUBIC_OFFSETS_1:
144         case R200_EMIT_PP_CUBIC_OFFSETS_2:
145         case R200_EMIT_PP_CUBIC_OFFSETS_3:
146         case R200_EMIT_PP_CUBIC_OFFSETS_4:
147         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
148                         int i;
149                         for (i = 0; i < 5; i++) {
150                                 if (radeon_check_and_fixup_offset(dev_priv,
151                                                                   filp_priv,
152                                                                   &data[i])) {
153                                         DRM_ERROR
154                                             ("Invalid R200 cubic texture offset\n");
155                                         return DRM_ERR(EINVAL);
156                                 }
157                         }
158                         break;
159                 }
160
161         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
164                         int i;
165                         for (i = 0; i < 5; i++) {
166                                 if (radeon_check_and_fixup_offset(dev_priv,
167                                                                   filp_priv,
168                                                                   &data[i])) {
169                                         DRM_ERROR
170                                             ("Invalid R100 cubic texture offset\n");
171                                         return DRM_ERR(EINVAL);
172                                 }
173                         }
174                 }
175                 break;
176
177         case R200_EMIT_VAP_CTL:{
178                         RING_LOCALS;
179                         BEGIN_RING(2);
180                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
181                         ADVANCE_RING();
182                 }
183                 break;
184
185         case RADEON_EMIT_RB3D_COLORPITCH:
186         case RADEON_EMIT_RE_LINE_PATTERN:
187         case RADEON_EMIT_SE_LINE_WIDTH:
188         case RADEON_EMIT_PP_LUM_MATRIX:
189         case RADEON_EMIT_PP_ROT_MATRIX_0:
190         case RADEON_EMIT_RB3D_STENCILREFMASK:
191         case RADEON_EMIT_SE_VPORT_XSCALE:
192         case RADEON_EMIT_SE_CNTL:
193         case RADEON_EMIT_SE_CNTL_STATUS:
194         case RADEON_EMIT_RE_MISC:
195         case RADEON_EMIT_PP_BORDER_COLOR_0:
196         case RADEON_EMIT_PP_BORDER_COLOR_1:
197         case RADEON_EMIT_PP_BORDER_COLOR_2:
198         case RADEON_EMIT_SE_ZBIAS_FACTOR:
199         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
200         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
201         case R200_EMIT_PP_TXCBLEND_0:
202         case R200_EMIT_PP_TXCBLEND_1:
203         case R200_EMIT_PP_TXCBLEND_2:
204         case R200_EMIT_PP_TXCBLEND_3:
205         case R200_EMIT_PP_TXCBLEND_4:
206         case R200_EMIT_PP_TXCBLEND_5:
207         case R200_EMIT_PP_TXCBLEND_6:
208         case R200_EMIT_PP_TXCBLEND_7:
209         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
210         case R200_EMIT_TFACTOR_0:
211         case R200_EMIT_VTX_FMT_0:
212         case R200_EMIT_MATRIX_SELECT_0:
213         case R200_EMIT_TEX_PROC_CTL_2:
214         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
215         case R200_EMIT_PP_TXFILTER_0:
216         case R200_EMIT_PP_TXFILTER_1:
217         case R200_EMIT_PP_TXFILTER_2:
218         case R200_EMIT_PP_TXFILTER_3:
219         case R200_EMIT_PP_TXFILTER_4:
220         case R200_EMIT_PP_TXFILTER_5:
221         case R200_EMIT_VTE_CNTL:
222         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
223         case R200_EMIT_PP_TAM_DEBUG3:
224         case R200_EMIT_PP_CNTL_X:
225         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
226         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
227         case R200_EMIT_RE_SCISSOR_TL_0:
228         case R200_EMIT_RE_SCISSOR_TL_1:
229         case R200_EMIT_RE_SCISSOR_TL_2:
230         case R200_EMIT_SE_VAP_CNTL_STATUS:
231         case R200_EMIT_SE_VTX_STATE_CNTL:
232         case R200_EMIT_RE_POINTSIZE:
233         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
234         case R200_EMIT_PP_CUBIC_FACES_0:
235         case R200_EMIT_PP_CUBIC_FACES_1:
236         case R200_EMIT_PP_CUBIC_FACES_2:
237         case R200_EMIT_PP_CUBIC_FACES_3:
238         case R200_EMIT_PP_CUBIC_FACES_4:
239         case R200_EMIT_PP_CUBIC_FACES_5:
240         case RADEON_EMIT_PP_TEX_SIZE_0:
241         case RADEON_EMIT_PP_TEX_SIZE_1:
242         case RADEON_EMIT_PP_TEX_SIZE_2:
243         case R200_EMIT_RB3D_BLENDCOLOR:
244         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
245         case RADEON_EMIT_PP_CUBIC_FACES_0:
246         case RADEON_EMIT_PP_CUBIC_FACES_1:
247         case RADEON_EMIT_PP_CUBIC_FACES_2:
248         case R200_EMIT_PP_TRI_PERF_CNTL:
249         case R200_EMIT_PP_AFS_0:
250         case R200_EMIT_PP_AFS_1:
251         case R200_EMIT_ATF_TFACTOR:
252         case R200_EMIT_PP_TXCTLALL_0:
253         case R200_EMIT_PP_TXCTLALL_1:
254         case R200_EMIT_PP_TXCTLALL_2:
255         case R200_EMIT_PP_TXCTLALL_3:
256         case R200_EMIT_PP_TXCTLALL_4:
257         case R200_EMIT_PP_TXCTLALL_5:
258         case R200_EMIT_VAP_PVS_CNTL:
259                 /* These packets don't contain memory offsets */
260                 break;
261
262         default:
263                 DRM_ERROR("Unknown state packet ID %d\n", id);
264                 return DRM_ERR(EINVAL);
265         }
266
267         return 0;
268 }
269
270 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
271                                                      dev_priv,
272                                                      drm_file_t *filp_priv,
273                                                      drm_radeon_kcmd_buffer_t *
274                                                      cmdbuf,
275                                                      unsigned int *cmdsz)
276 {
277         u32 *cmd = (u32 *) cmdbuf->buf;
278
279         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
280
281         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
282                 DRM_ERROR("Not a type 3 packet\n");
283                 return DRM_ERR(EINVAL);
284         }
285
286         if (4 * *cmdsz > cmdbuf->bufsz) {
287                 DRM_ERROR("Packet size larger than size of data provided\n");
288                 return DRM_ERR(EINVAL);
289         }
290
291         /* Check client state and fix it up if necessary */
292         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
293                 u32 offset;
294
295                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
296                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
297                         offset = cmd[2] << 10;
298                         if (radeon_check_and_fixup_offset
299                             (dev_priv, filp_priv, &offset)) {
300                                 DRM_ERROR("Invalid first packet offset\n");
301                                 return DRM_ERR(EINVAL);
302                         }
303                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
304                 }
305
306                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
307                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
308                         offset = cmd[3] << 10;
309                         if (radeon_check_and_fixup_offset
310                             (dev_priv, filp_priv, &offset)) {
311                                 DRM_ERROR("Invalid second packet offset\n");
312                                 return DRM_ERR(EINVAL);
313                         }
314                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
315                 }
316         }
317
318         return 0;
319 }
320
321 /* ================================================================
322  * CP hardware state programming functions
323  */
324
325 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
326                                              drm_clip_rect_t * box)
327 {
328         RING_LOCALS;
329
330         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
331                   box->x1, box->y1, box->x2, box->y2);
332
333         BEGIN_RING(4);
334         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
335         OUT_RING((box->y1 << 16) | box->x1);
336         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
337         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
338         ADVANCE_RING();
339 }
340
341 /* Emit 1.1 state
342  */
343 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
344                              drm_file_t * filp_priv,
345                              drm_radeon_context_regs_t * ctx,
346                              drm_radeon_texture_regs_t * tex,
347                              unsigned int dirty)
348 {
349         RING_LOCALS;
350         DRM_DEBUG("dirty=0x%08x\n", dirty);
351
352         if (dirty & RADEON_UPLOAD_CONTEXT) {
353                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354                                                   &ctx->rb3d_depthoffset)) {
355                         DRM_ERROR("Invalid depth buffer offset\n");
356                         return DRM_ERR(EINVAL);
357                 }
358
359                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
360                                                   &ctx->rb3d_coloroffset)) {
361                         DRM_ERROR("Invalid depth buffer offset\n");
362                         return DRM_ERR(EINVAL);
363                 }
364
365                 BEGIN_RING(14);
366                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
367                 OUT_RING(ctx->pp_misc);
368                 OUT_RING(ctx->pp_fog_color);
369                 OUT_RING(ctx->re_solid_color);
370                 OUT_RING(ctx->rb3d_blendcntl);
371                 OUT_RING(ctx->rb3d_depthoffset);
372                 OUT_RING(ctx->rb3d_depthpitch);
373                 OUT_RING(ctx->rb3d_zstencilcntl);
374                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
375                 OUT_RING(ctx->pp_cntl);
376                 OUT_RING(ctx->rb3d_cntl);
377                 OUT_RING(ctx->rb3d_coloroffset);
378                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
379                 OUT_RING(ctx->rb3d_colorpitch);
380                 ADVANCE_RING();
381         }
382
383         if (dirty & RADEON_UPLOAD_VERTFMT) {
384                 BEGIN_RING(2);
385                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
386                 OUT_RING(ctx->se_coord_fmt);
387                 ADVANCE_RING();
388         }
389
390         if (dirty & RADEON_UPLOAD_LINE) {
391                 BEGIN_RING(5);
392                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
393                 OUT_RING(ctx->re_line_pattern);
394                 OUT_RING(ctx->re_line_state);
395                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
396                 OUT_RING(ctx->se_line_width);
397                 ADVANCE_RING();
398         }
399
400         if (dirty & RADEON_UPLOAD_BUMPMAP) {
401                 BEGIN_RING(5);
402                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
403                 OUT_RING(ctx->pp_lum_matrix);
404                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
405                 OUT_RING(ctx->pp_rot_matrix_0);
406                 OUT_RING(ctx->pp_rot_matrix_1);
407                 ADVANCE_RING();
408         }
409
410         if (dirty & RADEON_UPLOAD_MASKS) {
411                 BEGIN_RING(4);
412                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
413                 OUT_RING(ctx->rb3d_stencilrefmask);
414                 OUT_RING(ctx->rb3d_ropcntl);
415                 OUT_RING(ctx->rb3d_planemask);
416                 ADVANCE_RING();
417         }
418
419         if (dirty & RADEON_UPLOAD_VIEWPORT) {
420                 BEGIN_RING(7);
421                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
422                 OUT_RING(ctx->se_vport_xscale);
423                 OUT_RING(ctx->se_vport_xoffset);
424                 OUT_RING(ctx->se_vport_yscale);
425                 OUT_RING(ctx->se_vport_yoffset);
426                 OUT_RING(ctx->se_vport_zscale);
427                 OUT_RING(ctx->se_vport_zoffset);
428                 ADVANCE_RING();
429         }
430
431         if (dirty & RADEON_UPLOAD_SETUP) {
432                 BEGIN_RING(4);
433                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
434                 OUT_RING(ctx->se_cntl);
435                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
436                 OUT_RING(ctx->se_cntl_status);
437                 ADVANCE_RING();
438         }
439
440         if (dirty & RADEON_UPLOAD_MISC) {
441                 BEGIN_RING(2);
442                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
443                 OUT_RING(ctx->re_misc);
444                 ADVANCE_RING();
445         }
446
447         if (dirty & RADEON_UPLOAD_TEX0) {
448                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
449                                                   &tex[0].pp_txoffset)) {
450                         DRM_ERROR("Invalid texture offset for unit 0\n");
451                         return DRM_ERR(EINVAL);
452                 }
453
454                 BEGIN_RING(9);
455                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
456                 OUT_RING(tex[0].pp_txfilter);
457                 OUT_RING(tex[0].pp_txformat);
458                 OUT_RING(tex[0].pp_txoffset);
459                 OUT_RING(tex[0].pp_txcblend);
460                 OUT_RING(tex[0].pp_txablend);
461                 OUT_RING(tex[0].pp_tfactor);
462                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
463                 OUT_RING(tex[0].pp_border_color);
464                 ADVANCE_RING();
465         }
466
467         if (dirty & RADEON_UPLOAD_TEX1) {
468                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469                                                   &tex[1].pp_txoffset)) {
470                         DRM_ERROR("Invalid texture offset for unit 1\n");
471                         return DRM_ERR(EINVAL);
472                 }
473
474                 BEGIN_RING(9);
475                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
476                 OUT_RING(tex[1].pp_txfilter);
477                 OUT_RING(tex[1].pp_txformat);
478                 OUT_RING(tex[1].pp_txoffset);
479                 OUT_RING(tex[1].pp_txcblend);
480                 OUT_RING(tex[1].pp_txablend);
481                 OUT_RING(tex[1].pp_tfactor);
482                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
483                 OUT_RING(tex[1].pp_border_color);
484                 ADVANCE_RING();
485         }
486
487         if (dirty & RADEON_UPLOAD_TEX2) {
488                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
489                                                   &tex[2].pp_txoffset)) {
490                         DRM_ERROR("Invalid texture offset for unit 2\n");
491                         return DRM_ERR(EINVAL);
492                 }
493
494                 BEGIN_RING(9);
495                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
496                 OUT_RING(tex[2].pp_txfilter);
497                 OUT_RING(tex[2].pp_txformat);
498                 OUT_RING(tex[2].pp_txoffset);
499                 OUT_RING(tex[2].pp_txcblend);
500                 OUT_RING(tex[2].pp_txablend);
501                 OUT_RING(tex[2].pp_tfactor);
502                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
503                 OUT_RING(tex[2].pp_border_color);
504                 ADVANCE_RING();
505         }
506
507         return 0;
508 }
509
510 /* Emit 1.2 state
511  */
512 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
513                               drm_file_t * filp_priv,
514                               drm_radeon_state_t * state)
515 {
516         RING_LOCALS;
517
518         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
519                 BEGIN_RING(3);
520                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
521                 OUT_RING(state->context2.se_zbias_factor);
522                 OUT_RING(state->context2.se_zbias_constant);
523                 ADVANCE_RING();
524         }
525
526         return radeon_emit_state(dev_priv, filp_priv, &state->context,
527                                  state->tex, state->dirty);
528 }
529
530 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
531  * 1.3 cmdbuffers allow all previous state to be updated as well as
532  * the tcl scalar and vector areas.
533  */
534 static struct {
535         int start;
536         int len;
537         const char *name;
538 } packet[RADEON_MAX_STATE_PACKETS] = {
539         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
540         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
541         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
542         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
543         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
544         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
545         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
546         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
547         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
548         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
549         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
550         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
551         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
552         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
553         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
554         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
555         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
556         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
557         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
558         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
559         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
560                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
561         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
562         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
563         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
564         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
565         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
566         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
567         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
568         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
569         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
570         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
571         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
572         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
573         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
574         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
575         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
576         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
577         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
578         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
579         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
580         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
581         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
582         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
583         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
584         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
585         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
586         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
587         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
588         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
589         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
590          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
591         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
592         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
593         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
594         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
595         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
596         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
597         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
598         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
599         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
600         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
601         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
602                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
603         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
604         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
605         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
606         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
607         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
608         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
609         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
610         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
611         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
612         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
613         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
614         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
615         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
616         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
617         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
618         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
619         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
620         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
621         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
622         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
623         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
624         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
625         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
626         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
627         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
628         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
629         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
630         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
631         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
632         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
633         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
634         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
635         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
636         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
637 };
638
639 /* ================================================================
640  * Performance monitoring functions
641  */
642
643 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
644                              int x, int y, int w, int h, int r, int g, int b)
645 {
646         u32 color;
647         RING_LOCALS;
648
649         x += dev_priv->sarea_priv->boxes[0].x1;
650         y += dev_priv->sarea_priv->boxes[0].y1;
651
652         switch (dev_priv->color_fmt) {
653         case RADEON_COLOR_FORMAT_RGB565:
654                 color = (((r & 0xf8) << 8) |
655                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
656                 break;
657         case RADEON_COLOR_FORMAT_ARGB8888:
658         default:
659                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
660                 break;
661         }
662
663         BEGIN_RING(4);
664         RADEON_WAIT_UNTIL_3D_IDLE();
665         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
666         OUT_RING(0xffffffff);
667         ADVANCE_RING();
668
669         BEGIN_RING(6);
670
671         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
672         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
673                  RADEON_GMC_BRUSH_SOLID_COLOR |
674                  (dev_priv->color_fmt << 8) |
675                  RADEON_GMC_SRC_DATATYPE_COLOR |
676                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
677
678         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
679                 OUT_RING(dev_priv->front_pitch_offset);
680         } else {
681                 OUT_RING(dev_priv->back_pitch_offset);
682         }
683
684         OUT_RING(color);
685
686         OUT_RING((x << 16) | y);
687         OUT_RING((w << 16) | h);
688
689         ADVANCE_RING();
690 }
691
692 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
693 {
694         /* Collapse various things into a wait flag -- trying to
695          * guess if userspase slept -- better just to have them tell us.
696          */
697         if (dev_priv->stats.last_frame_reads > 1 ||
698             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
699                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
700         }
701
702         if (dev_priv->stats.freelist_loops) {
703                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
704         }
705
706         /* Purple box for page flipping
707          */
708         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
709                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
710
711         /* Red box if we have to wait for idle at any point
712          */
713         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
714                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
715
716         /* Blue box: lost context?
717          */
718
719         /* Yellow box for texture swaps
720          */
721         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
722                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
723
724         /* Green box if hardware never idles (as far as we can tell)
725          */
726         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
727                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
728
729         /* Draw bars indicating number of buffers allocated
730          * (not a great measure, easily confused)
731          */
732         if (dev_priv->stats.requested_bufs) {
733                 if (dev_priv->stats.requested_bufs > 100)
734                         dev_priv->stats.requested_bufs = 100;
735
736                 radeon_clear_box(dev_priv, 4, 16,
737                                  dev_priv->stats.requested_bufs, 4,
738                                  196, 128, 128);
739         }
740
741         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
742
743 }
744
745 /* ================================================================
746  * CP command dispatch functions
747  */
748
749 static void radeon_cp_dispatch_clear(drm_device_t * dev,
750                                      drm_radeon_clear_t * clear,
751                                      drm_radeon_clear_rect_t * depth_boxes)
752 {
753         drm_radeon_private_t *dev_priv = dev->dev_private;
754         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
755         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
756         int nbox = sarea_priv->nbox;
757         drm_clip_rect_t *pbox = sarea_priv->boxes;
758         unsigned int flags = clear->flags;
759         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
760         int i;
761         RING_LOCALS;
762         DRM_DEBUG("flags = 0x%x\n", flags);
763
764         dev_priv->stats.clears++;
765
766         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
767                 unsigned int tmp = flags;
768
769                 flags &= ~(RADEON_FRONT | RADEON_BACK);
770                 if (tmp & RADEON_FRONT)
771                         flags |= RADEON_BACK;
772                 if (tmp & RADEON_BACK)
773                         flags |= RADEON_FRONT;
774         }
775
776         if (flags & (RADEON_FRONT | RADEON_BACK)) {
777
778                 BEGIN_RING(4);
779
780                 /* Ensure the 3D stream is idle before doing a
781                  * 2D fill to clear the front or back buffer.
782                  */
783                 RADEON_WAIT_UNTIL_3D_IDLE();
784
785                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
786                 OUT_RING(clear->color_mask);
787
788                 ADVANCE_RING();
789
790                 /* Make sure we restore the 3D state next time.
791                  */
792                 dev_priv->sarea_priv->ctx_owner = 0;
793
794                 for (i = 0; i < nbox; i++) {
795                         int x = pbox[i].x1;
796                         int y = pbox[i].y1;
797                         int w = pbox[i].x2 - x;
798                         int h = pbox[i].y2 - y;
799
800                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
801                                   x, y, w, h, flags);
802
803                         if (flags & RADEON_FRONT) {
804                                 BEGIN_RING(6);
805
806                                 OUT_RING(CP_PACKET3
807                                          (RADEON_CNTL_PAINT_MULTI, 4));
808                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
809                                          RADEON_GMC_BRUSH_SOLID_COLOR |
810                                          (dev_priv->
811                                           color_fmt << 8) |
812                                          RADEON_GMC_SRC_DATATYPE_COLOR |
813                                          RADEON_ROP3_P |
814                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
815
816                                 OUT_RING(dev_priv->front_pitch_offset);
817                                 OUT_RING(clear->clear_color);
818
819                                 OUT_RING((x << 16) | y);
820                                 OUT_RING((w << 16) | h);
821
822                                 ADVANCE_RING();
823                         }
824
825                         if (flags & RADEON_BACK) {
826                                 BEGIN_RING(6);
827
828                                 OUT_RING(CP_PACKET3
829                                          (RADEON_CNTL_PAINT_MULTI, 4));
830                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
831                                          RADEON_GMC_BRUSH_SOLID_COLOR |
832                                          (dev_priv->
833                                           color_fmt << 8) |
834                                          RADEON_GMC_SRC_DATATYPE_COLOR |
835                                          RADEON_ROP3_P |
836                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
837
838                                 OUT_RING(dev_priv->back_pitch_offset);
839                                 OUT_RING(clear->clear_color);
840
841                                 OUT_RING((x << 16) | y);
842                                 OUT_RING((w << 16) | h);
843
844                                 ADVANCE_RING();
845                         }
846                 }
847         }
848
849         /* hyper z clear */
850         /* no docs available, based on reverse engeneering by Stephane Marchesin */
851         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
852             && (flags & RADEON_CLEAR_FASTZ)) {
853
854                 int i;
855                 int depthpixperline =
856                     dev_priv->depth_fmt ==
857                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
858                                                        2) : (dev_priv->
859                                                              depth_pitch / 4);
860
861                 u32 clearmask;
862
863                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
864                     ((clear->depth_mask & 0xff) << 24);
865
866                 /* Make sure we restore the 3D state next time.
867                  * we haven't touched any "normal" state - still need this?
868                  */
869                 dev_priv->sarea_priv->ctx_owner = 0;
870
871                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
872                     && (flags & RADEON_USE_HIERZ)) {
873                         /* FIXME : reverse engineer that for Rx00 cards */
874                         /* FIXME : the mask supposedly contains low-res z values. So can't set
875                            just to the max (0xff? or actually 0x3fff?), need to take z clear
876                            value into account? */
877                         /* pattern seems to work for r100, though get slight
878                            rendering errors with glxgears. If hierz is not enabled for r100,
879                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
880                            other ones are ignored, and the same clear mask can be used. That's
881                            very different behaviour than R200 which needs different clear mask
882                            and different number of tiles to clear if hierz is enabled or not !?!
883                          */
884                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
885                 } else {
886                         /* clear mask : chooses the clearing pattern.
887                            rv250: could be used to clear only parts of macrotiles
888                            (but that would get really complicated...)?
889                            bit 0 and 1 (either or both of them ?!?!) are used to
890                            not clear tile (or maybe one of the bits indicates if the tile is
891                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
892                            Pattern is as follows:
893                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
894                            bits -------------------------------------------------
895                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
896                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
897                            covers 256 pixels ?!?
898                          */
899                         clearmask = 0x0;
900                 }
901
902                 BEGIN_RING(8);
903                 RADEON_WAIT_UNTIL_2D_IDLE();
904                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
905                              tempRB3D_DEPTHCLEARVALUE);
906                 /* what offset is this exactly ? */
907                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
908                 /* need ctlstat, otherwise get some strange black flickering */
909                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
910                              RADEON_RB3D_ZC_FLUSH_ALL);
911                 ADVANCE_RING();
912
913                 for (i = 0; i < nbox; i++) {
914                         int tileoffset, nrtilesx, nrtilesy, j;
915                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
916                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
917                             && !(dev_priv->microcode_version == UCODE_R200)) {
918                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
919                                    maybe r200 actually doesn't need to put the low-res z value into
920                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
921                                    Works for R100, both with hierz and without.
922                                    R100 seems to operate on 2x1 8x8 tiles, but...
923                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
924                                    problematic with resolutions which are not 64 pix aligned? */
925                                 tileoffset =
926                                     ((pbox[i].y1 >> 3) * depthpixperline +
927                                      pbox[i].x1) >> 6;
928                                 nrtilesx =
929                                     ((pbox[i].x2 & ~63) -
930                                      (pbox[i].x1 & ~63)) >> 4;
931                                 nrtilesy =
932                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
933                                 for (j = 0; j <= nrtilesy; j++) {
934                                         BEGIN_RING(4);
935                                         OUT_RING(CP_PACKET3
936                                                  (RADEON_3D_CLEAR_ZMASK, 2));
937                                         /* first tile */
938                                         OUT_RING(tileoffset * 8);
939                                         /* the number of tiles to clear */
940                                         OUT_RING(nrtilesx + 4);
941                                         /* clear mask : chooses the clearing pattern. */
942                                         OUT_RING(clearmask);
943                                         ADVANCE_RING();
944                                         tileoffset += depthpixperline >> 6;
945                                 }
946                         } else if (dev_priv->microcode_version == UCODE_R200) {
947                                 /* works for rv250. */
948                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
949                                 tileoffset =
950                                     ((pbox[i].y1 >> 3) * depthpixperline +
951                                      pbox[i].x1) >> 5;
952                                 nrtilesx =
953                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
954                                 nrtilesy =
955                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
956                                 for (j = 0; j <= nrtilesy; j++) {
957                                         BEGIN_RING(4);
958                                         OUT_RING(CP_PACKET3
959                                                  (RADEON_3D_CLEAR_ZMASK, 2));
960                                         /* first tile */
961                                         /* judging by the first tile offset needed, could possibly
962                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
963                                            macro tiles, though would still need clear mask for
964                                            right/bottom if truely 4x4 granularity is desired ? */
965                                         OUT_RING(tileoffset * 16);
966                                         /* the number of tiles to clear */
967                                         OUT_RING(nrtilesx + 1);
968                                         /* clear mask : chooses the clearing pattern. */
969                                         OUT_RING(clearmask);
970                                         ADVANCE_RING();
971                                         tileoffset += depthpixperline >> 5;
972                                 }
973                         } else {        /* rv 100 */
974                                 /* rv100 might not need 64 pix alignment, who knows */
975                                 /* offsets are, hmm, weird */
976                                 tileoffset =
977                                     ((pbox[i].y1 >> 4) * depthpixperline +
978                                      pbox[i].x1) >> 6;
979                                 nrtilesx =
980                                     ((pbox[i].x2 & ~63) -
981                                      (pbox[i].x1 & ~63)) >> 4;
982                                 nrtilesy =
983                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
984                                 for (j = 0; j <= nrtilesy; j++) {
985                                         BEGIN_RING(4);
986                                         OUT_RING(CP_PACKET3
987                                                  (RADEON_3D_CLEAR_ZMASK, 2));
988                                         OUT_RING(tileoffset * 128);
989                                         /* the number of tiles to clear */
990                                         OUT_RING(nrtilesx + 4);
991                                         /* clear mask : chooses the clearing pattern. */
992                                         OUT_RING(clearmask);
993                                         ADVANCE_RING();
994                                         tileoffset += depthpixperline >> 6;
995                                 }
996                         }
997                 }
998
999                 /* TODO don't always clear all hi-level z tiles */
1000                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1001                     && (dev_priv->microcode_version == UCODE_R200)
1002                     && (flags & RADEON_USE_HIERZ))
1003                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1004                         /* FIXME : the mask supposedly contains low-res z values. So can't set
1005                            just to the max (0xff? or actually 0x3fff?), need to take z clear
1006                            value into account? */
1007                 {
1008                         BEGIN_RING(4);
1009                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1010                         OUT_RING(0x0);  /* First tile */
1011                         OUT_RING(0x3cc0);
1012                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1013                         ADVANCE_RING();
1014                 }
1015         }
1016
1017         /* We have to clear the depth and/or stencil buffers by
1018          * rendering a quad into just those buffers.  Thus, we have to
1019          * make sure the 3D engine is configured correctly.
1020          */
1021         else if ((dev_priv->microcode_version == UCODE_R200) &&
1022                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1023
1024                 int tempPP_CNTL;
1025                 int tempRE_CNTL;
1026                 int tempRB3D_CNTL;
1027                 int tempRB3D_ZSTENCILCNTL;
1028                 int tempRB3D_STENCILREFMASK;
1029                 int tempRB3D_PLANEMASK;
1030                 int tempSE_CNTL;
1031                 int tempSE_VTE_CNTL;
1032                 int tempSE_VTX_FMT_0;
1033                 int tempSE_VTX_FMT_1;
1034                 int tempSE_VAP_CNTL;
1035                 int tempRE_AUX_SCISSOR_CNTL;
1036
1037                 tempPP_CNTL = 0;
1038                 tempRE_CNTL = 0;
1039
1040                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1041
1042                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1043                 tempRB3D_STENCILREFMASK = 0x0;
1044
1045                 tempSE_CNTL = depth_clear->se_cntl;
1046
1047                 /* Disable TCL */
1048
1049                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1050                                           (0x9 <<
1051                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1052
1053                 tempRB3D_PLANEMASK = 0x0;
1054
1055                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1056
1057                 tempSE_VTE_CNTL =
1058                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1059
1060                 /* Vertex format (X, Y, Z, W) */
1061                 tempSE_VTX_FMT_0 =
1062                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1063                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1064                 tempSE_VTX_FMT_1 = 0x0;
1065
1066                 /*
1067                  * Depth buffer specific enables
1068                  */
1069                 if (flags & RADEON_DEPTH) {
1070                         /* Enable depth buffer */
1071                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1072                 } else {
1073                         /* Disable depth buffer */
1074                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1075                 }
1076
1077                 /*
1078                  * Stencil buffer specific enables
1079                  */
1080                 if (flags & RADEON_STENCIL) {
1081                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1082                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1083                 } else {
1084                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1085                         tempRB3D_STENCILREFMASK = 0x00000000;
1086                 }
1087
1088                 if (flags & RADEON_USE_COMP_ZBUF) {
1089                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090                             RADEON_Z_DECOMPRESSION_ENABLE;
1091                 }
1092                 if (flags & RADEON_USE_HIERZ) {
1093                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1094                 }
1095
1096                 BEGIN_RING(26);
1097                 RADEON_WAIT_UNTIL_2D_IDLE();
1098
1099                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1100                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1101                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1102                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1103                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1104                              tempRB3D_STENCILREFMASK);
1105                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1106                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1107                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1108                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1109                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1110                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1111                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1112                 ADVANCE_RING();
1113
1114                 /* Make sure we restore the 3D state next time.
1115                  */
1116                 dev_priv->sarea_priv->ctx_owner = 0;
1117
1118                 for (i = 0; i < nbox; i++) {
1119
1120                         /* Funny that this should be required --
1121                          *  sets top-left?
1122                          */
1123                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1124
1125                         BEGIN_RING(14);
1126                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1127                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1128                                   RADEON_PRIM_WALK_RING |
1129                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1130                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1131                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133                         OUT_RING(0x3f800000);
1134                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1136                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137                         OUT_RING(0x3f800000);
1138                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1139                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1140                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141                         OUT_RING(0x3f800000);
1142                         ADVANCE_RING();
1143                 }
1144         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1145
1146                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1147
1148                 rb3d_cntl = depth_clear->rb3d_cntl;
1149
1150                 if (flags & RADEON_DEPTH) {
1151                         rb3d_cntl |= RADEON_Z_ENABLE;
1152                 } else {
1153                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1154                 }
1155
1156                 if (flags & RADEON_STENCIL) {
1157                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1158                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1159                 } else {
1160                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1161                         rb3d_stencilrefmask = 0x00000000;
1162                 }
1163
1164                 if (flags & RADEON_USE_COMP_ZBUF) {
1165                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1166                             RADEON_Z_DECOMPRESSION_ENABLE;
1167                 }
1168                 if (flags & RADEON_USE_HIERZ) {
1169                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1170                 }
1171
1172                 BEGIN_RING(13);
1173                 RADEON_WAIT_UNTIL_2D_IDLE();
1174
1175                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1176                 OUT_RING(0x00000000);
1177                 OUT_RING(rb3d_cntl);
1178
1179                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1180                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1181                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1182                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1183                 ADVANCE_RING();
1184
1185                 /* Make sure we restore the 3D state next time.
1186                  */
1187                 dev_priv->sarea_priv->ctx_owner = 0;
1188
1189                 for (i = 0; i < nbox; i++) {
1190
1191                         /* Funny that this should be required --
1192                          *  sets top-left?
1193                          */
1194                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1195
1196                         BEGIN_RING(15);
1197
1198                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1199                         OUT_RING(RADEON_VTX_Z_PRESENT |
1200                                  RADEON_VTX_PKCOLOR_PRESENT);
1201                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1202                                   RADEON_PRIM_WALK_RING |
1203                                   RADEON_MAOS_ENABLE |
1204                                   RADEON_VTX_FMT_RADEON_MODE |
1205                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1206
1207                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1208                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1209                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1210                         OUT_RING(0x0);
1211
1212                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1213                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1214                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1215                         OUT_RING(0x0);
1216
1217                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1218                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1219                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1220                         OUT_RING(0x0);
1221
1222                         ADVANCE_RING();
1223                 }
1224         }
1225
1226         /* Increment the clear counter.  The client-side 3D driver must
1227          * wait on this value before performing the clear ioctl.  We
1228          * need this because the card's so damned fast...
1229          */
1230         dev_priv->sarea_priv->last_clear++;
1231
1232         BEGIN_RING(4);
1233
1234         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1235         RADEON_WAIT_UNTIL_IDLE();
1236
1237         ADVANCE_RING();
1238 }
1239
1240 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1241 {
1242         drm_radeon_private_t *dev_priv = dev->dev_private;
1243         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1244         int nbox = sarea_priv->nbox;
1245         drm_clip_rect_t *pbox = sarea_priv->boxes;
1246         int i;
1247         RING_LOCALS;
1248         DRM_DEBUG("\n");
1249
1250         /* Do some trivial performance monitoring...
1251          */
1252         if (dev_priv->do_boxes)
1253                 radeon_cp_performance_boxes(dev_priv);
1254
1255         /* Wait for the 3D stream to idle before dispatching the bitblt.
1256          * This will prevent data corruption between the two streams.
1257          */
1258         BEGIN_RING(2);
1259
1260         RADEON_WAIT_UNTIL_3D_IDLE();
1261
1262         ADVANCE_RING();
1263
1264         for (i = 0; i < nbox; i++) {
1265                 int x = pbox[i].x1;
1266                 int y = pbox[i].y1;
1267                 int w = pbox[i].x2 - x;
1268                 int h = pbox[i].y2 - y;
1269
1270                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1271
1272                 BEGIN_RING(9);
1273
1274                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1275                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1276                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1277                          RADEON_GMC_BRUSH_NONE |
1278                          (dev_priv->color_fmt << 8) |
1279                          RADEON_GMC_SRC_DATATYPE_COLOR |
1280                          RADEON_ROP3_S |
1281                          RADEON_DP_SRC_SOURCE_MEMORY |
1282                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1283
1284                 /* Make this work even if front & back are flipped:
1285                  */
1286                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1287                 if (dev_priv->current_page == 0) {
1288                         OUT_RING(dev_priv->back_pitch_offset);
1289                         OUT_RING(dev_priv->front_pitch_offset);
1290                 } else {
1291                         OUT_RING(dev_priv->front_pitch_offset);
1292                         OUT_RING(dev_priv->back_pitch_offset);
1293                 }
1294
1295                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1296                 OUT_RING((x << 16) | y);
1297                 OUT_RING((x << 16) | y);
1298                 OUT_RING((w << 16) | h);
1299
1300                 ADVANCE_RING();
1301         }
1302
1303         /* Increment the frame counter.  The client-side 3D driver must
1304          * throttle the framerate by waiting for this value before
1305          * performing the swapbuffer ioctl.
1306          */
1307         dev_priv->sarea_priv->last_frame++;
1308
1309         BEGIN_RING(4);
1310
1311         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1312         RADEON_WAIT_UNTIL_2D_IDLE();
1313
1314         ADVANCE_RING();
1315 }
1316
1317 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1318 {
1319         drm_radeon_private_t *dev_priv = dev->dev_private;
1320         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1321         int offset = (dev_priv->current_page == 1)
1322             ? dev_priv->front_offset : dev_priv->back_offset;
1323         RING_LOCALS;
1324         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1325                   __FUNCTION__,
1326                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1327
1328         /* Do some trivial performance monitoring...
1329          */
1330         if (dev_priv->do_boxes) {
1331                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1332                 radeon_cp_performance_boxes(dev_priv);
1333         }
1334
1335         /* Update the frame offsets for both CRTCs
1336          */
1337         BEGIN_RING(6);
1338
1339         RADEON_WAIT_UNTIL_3D_IDLE();
1340         OUT_RING_REG(RADEON_CRTC_OFFSET,
1341                      ((sarea->frame.y * dev_priv->front_pitch +
1342                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1343                      + offset);
1344         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1345                      + offset);
1346
1347         ADVANCE_RING();
1348
1349         /* Increment the frame counter.  The client-side 3D driver must
1350          * throttle the framerate by waiting for this value before
1351          * performing the swapbuffer ioctl.
1352          */
1353         dev_priv->sarea_priv->last_frame++;
1354         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1355             1 - dev_priv->current_page;
1356
1357         BEGIN_RING(2);
1358
1359         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1360
1361         ADVANCE_RING();
1362 }
1363
1364 static int bad_prim_vertex_nr(int primitive, int nr)
1365 {
1366         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1367         case RADEON_PRIM_TYPE_NONE:
1368         case RADEON_PRIM_TYPE_POINT:
1369                 return nr < 1;
1370         case RADEON_PRIM_TYPE_LINE:
1371                 return (nr & 1) || nr == 0;
1372         case RADEON_PRIM_TYPE_LINE_STRIP:
1373                 return nr < 2;
1374         case RADEON_PRIM_TYPE_TRI_LIST:
1375         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1376         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1377         case RADEON_PRIM_TYPE_RECT_LIST:
1378                 return nr % 3 || nr == 0;
1379         case RADEON_PRIM_TYPE_TRI_FAN:
1380         case RADEON_PRIM_TYPE_TRI_STRIP:
1381                 return nr < 3;
1382         default:
1383                 return 1;
1384         }
1385 }
1386
1387 typedef struct {
1388         unsigned int start;
1389         unsigned int finish;
1390         unsigned int prim;
1391         unsigned int numverts;
1392         unsigned int offset;
1393         unsigned int vc_format;
1394 } drm_radeon_tcl_prim_t;
1395
1396 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1397                                       drm_buf_t * buf,
1398                                       drm_radeon_tcl_prim_t * prim)
1399 {
1400         drm_radeon_private_t *dev_priv = dev->dev_private;
1401         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1402         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1403         int numverts = (int)prim->numverts;
1404         int nbox = sarea_priv->nbox;
1405         int i = 0;
1406         RING_LOCALS;
1407
1408         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1409                   prim->prim,
1410                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1411
1412         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1413                 DRM_ERROR("bad prim %x numverts %d\n",
1414                           prim->prim, prim->numverts);
1415                 return;
1416         }
1417
1418         do {
1419                 /* Emit the next cliprect */
1420                 if (i < nbox) {
1421                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1422                 }
1423
1424                 /* Emit the vertex buffer rendering commands */
1425                 BEGIN_RING(5);
1426
1427                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1428                 OUT_RING(offset);
1429                 OUT_RING(numverts);
1430                 OUT_RING(prim->vc_format);
1431                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1432                          RADEON_COLOR_ORDER_RGBA |
1433                          RADEON_VTX_FMT_RADEON_MODE |
1434                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1435
1436                 ADVANCE_RING();
1437
1438                 i++;
1439         } while (i < nbox);
1440 }
1441
1442 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1443 {
1444         drm_radeon_private_t *dev_priv = dev->dev_private;
1445         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1446         RING_LOCALS;
1447
1448         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1449
1450         /* Emit the vertex buffer age */
1451         BEGIN_RING(2);
1452         RADEON_DISPATCH_AGE(buf_priv->age);
1453         ADVANCE_RING();
1454
1455         buf->pending = 1;
1456         buf->used = 0;
1457 }
1458
1459 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1460                                         drm_buf_t * buf, int start, int end)
1461 {
1462         drm_radeon_private_t *dev_priv = dev->dev_private;
1463         RING_LOCALS;
1464         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1465
1466         if (start != end) {
1467                 int offset = (dev_priv->gart_buffers_offset
1468                               + buf->offset + start);
1469                 int dwords = (end - start + 3) / sizeof(u32);
1470
1471                 /* Indirect buffer data must be an even number of
1472                  * dwords, so if we've been given an odd number we must
1473                  * pad the data with a Type-2 CP packet.
1474                  */
1475                 if (dwords & 1) {
1476                         u32 *data = (u32 *)
1477                             ((char *)dev->agp_buffer_map->handle
1478                              + buf->offset + start);
1479                         data[dwords++] = RADEON_CP_PACKET2;
1480                 }
1481
1482                 /* Fire off the indirect buffer */
1483                 BEGIN_RING(3);
1484
1485                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1486                 OUT_RING(offset);
1487                 OUT_RING(dwords);
1488
1489                 ADVANCE_RING();
1490         }
1491 }
1492
1493 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1494                                        drm_buf_t * elt_buf,
1495                                        drm_radeon_tcl_prim_t * prim)
1496 {
1497         drm_radeon_private_t *dev_priv = dev->dev_private;
1498         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1499         int offset = dev_priv->gart_buffers_offset + prim->offset;
1500         u32 *data;
1501         int dwords;
1502         int i = 0;
1503         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1504         int count = (prim->finish - start) / sizeof(u16);
1505         int nbox = sarea_priv->nbox;
1506
1507         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1508                   prim->prim,
1509                   prim->vc_format,
1510                   prim->start, prim->finish, prim->offset, prim->numverts);
1511
1512         if (bad_prim_vertex_nr(prim->prim, count)) {
1513                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1514                 return;
1515         }
1516
1517         if (start >= prim->finish || (prim->start & 0x7)) {
1518                 DRM_ERROR("buffer prim %d\n", prim->prim);
1519                 return;
1520         }
1521
1522         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1523
1524         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1525                         elt_buf->offset + prim->start);
1526
1527         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1528         data[1] = offset;
1529         data[2] = prim->numverts;
1530         data[3] = prim->vc_format;
1531         data[4] = (prim->prim |
1532                    RADEON_PRIM_WALK_IND |
1533                    RADEON_COLOR_ORDER_RGBA |
1534                    RADEON_VTX_FMT_RADEON_MODE |
1535                    (count << RADEON_NUM_VERTICES_SHIFT));
1536
1537         do {
1538                 if (i < nbox)
1539                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1540
1541                 radeon_cp_dispatch_indirect(dev, elt_buf,
1542                                             prim->start, prim->finish);
1543
1544                 i++;
1545         } while (i < nbox);
1546
1547 }
1548
1549 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1550
1551 static int radeon_cp_dispatch_texture(DRMFILE filp,
1552                                       drm_device_t * dev,
1553                                       drm_radeon_texture_t * tex,
1554                                       drm_radeon_tex_image_t * image)
1555 {
1556         drm_radeon_private_t *dev_priv = dev->dev_private;
1557         drm_file_t *filp_priv;
1558         drm_buf_t *buf;
1559         u32 format;
1560         u32 *buffer;
1561         const u8 __user *data;
1562         int size, dwords, tex_width, blit_width, spitch;
1563         u32 height;
1564         int i;
1565         u32 texpitch, microtile;
1566         u32 offset;
1567         RING_LOCALS;
1568
1569         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1570
1571         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1572                 DRM_ERROR("Invalid destination offset\n");
1573                 return DRM_ERR(EINVAL);
1574         }
1575
1576         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1577
1578         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1579          * up with the texture data from the host data blit, otherwise
1580          * part of the texture image may be corrupted.
1581          */
1582         BEGIN_RING(4);
1583         RADEON_FLUSH_CACHE();
1584         RADEON_WAIT_UNTIL_IDLE();
1585         ADVANCE_RING();
1586
1587         /* The compiler won't optimize away a division by a variable,
1588          * even if the only legal values are powers of two.  Thus, we'll
1589          * use a shift instead.
1590          */
1591         switch (tex->format) {
1592         case RADEON_TXFORMAT_ARGB8888:
1593         case RADEON_TXFORMAT_RGBA8888:
1594                 format = RADEON_COLOR_FORMAT_ARGB8888;
1595                 tex_width = tex->width * 4;
1596                 blit_width = image->width * 4;
1597                 break;
1598         case RADEON_TXFORMAT_AI88:
1599         case RADEON_TXFORMAT_ARGB1555:
1600         case RADEON_TXFORMAT_RGB565:
1601         case RADEON_TXFORMAT_ARGB4444:
1602         case RADEON_TXFORMAT_VYUY422:
1603         case RADEON_TXFORMAT_YVYU422:
1604                 format = RADEON_COLOR_FORMAT_RGB565;
1605                 tex_width = tex->width * 2;
1606                 blit_width = image->width * 2;
1607                 break;
1608         case RADEON_TXFORMAT_I8:
1609         case RADEON_TXFORMAT_RGB332:
1610                 format = RADEON_COLOR_FORMAT_CI8;
1611                 tex_width = tex->width * 1;
1612                 blit_width = image->width * 1;
1613                 break;
1614         default:
1615                 DRM_ERROR("invalid texture format %d\n", tex->format);
1616                 return DRM_ERR(EINVAL);
1617         }
1618         spitch = blit_width >> 6;
1619         if (spitch == 0 && image->height > 1)
1620                 return DRM_ERR(EINVAL);
1621
1622         texpitch = tex->pitch;
1623         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1624                 microtile = 1;
1625                 if (tex_width < 64) {
1626                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1627                         /* we got tiled coordinates, untile them */
1628                         image->x *= 2;
1629                 }
1630         } else
1631                 microtile = 0;
1632
1633         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1634
1635         do {
1636                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1637                           tex->offset >> 10, tex->pitch, tex->format,
1638                           image->x, image->y, image->width, image->height);
1639
1640                 /* Make a copy of some parameters in case we have to
1641                  * update them for a multi-pass texture blit.
1642                  */
1643                 height = image->height;
1644                 data = (const u8 __user *)image->data;
1645
1646                 size = height * blit_width;
1647
1648                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1649                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1650                         size = height * blit_width;
1651                 } else if (size < 4 && size > 0) {
1652                         size = 4;
1653                 } else if (size == 0) {
1654                         return 0;
1655                 }
1656
1657                 buf = radeon_freelist_get(dev);
1658                 if (0 && !buf) {
1659                         radeon_do_cp_idle(dev_priv);
1660                         buf = radeon_freelist_get(dev);
1661                 }
1662                 if (!buf) {
1663                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1664                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1665                                 return DRM_ERR(EFAULT);
1666                         return DRM_ERR(EAGAIN);
1667                 }
1668
1669                 /* Dispatch the indirect buffer.
1670                  */
1671                 buffer =
1672                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1673                 dwords = size / 4;
1674
1675 #define RADEON_COPY_MT(_buf, _data, _width) \
1676         do { \
1677                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1678                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1679                         return DRM_ERR(EFAULT); \
1680                 } \
1681         } while(0)
1682
1683                 if (microtile) {
1684                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1685                            however, we cannot use blitter directly for texture width < 64 bytes,
1686                            since minimum tex pitch is 64 bytes and we need this to match
1687                            the texture width, otherwise the blitter will tile it wrong.
1688                            Thus, tiling manually in this case. Additionally, need to special
1689                            case tex height = 1, since our actual image will have height 2
1690                            and we need to ensure we don't read beyond the texture size
1691                            from user space. */
1692                         if (tex->height == 1) {
1693                                 if (tex_width >= 64 || tex_width <= 16) {
1694                                         RADEON_COPY_MT(buffer, data,
1695                                                 (int)(tex_width * sizeof(u32)));
1696                                 } else if (tex_width == 32) {
1697                                         RADEON_COPY_MT(buffer, data, 16);
1698                                         RADEON_COPY_MT(buffer + 8,
1699                                                        data + 16, 16);
1700                                 }
1701                         } else if (tex_width >= 64 || tex_width == 16) {
1702                                 RADEON_COPY_MT(buffer, data,
1703                                                (int)(dwords * sizeof(u32)));
1704                         } else if (tex_width < 16) {
1705                                 for (i = 0; i < tex->height; i++) {
1706                                         RADEON_COPY_MT(buffer, data, tex_width);
1707                                         buffer += 4;
1708                                         data += tex_width;
1709                                 }
1710                         } else if (tex_width == 32) {
1711                                 /* TODO: make sure this works when not fitting in one buffer
1712                                    (i.e. 32bytes x 2048...) */
1713                                 for (i = 0; i < tex->height; i += 2) {
1714                                         RADEON_COPY_MT(buffer, data, 16);
1715                                         data += 16;
1716                                         RADEON_COPY_MT(buffer + 8, data, 16);
1717                                         data += 16;
1718                                         RADEON_COPY_MT(buffer + 4, data, 16);
1719                                         data += 16;
1720                                         RADEON_COPY_MT(buffer + 12, data, 16);
1721                                         data += 16;
1722                                         buffer += 16;
1723                                 }
1724                         }
1725                 } else {
1726                         if (tex_width >= 32) {
1727                                 /* Texture image width is larger than the minimum, so we
1728                                  * can upload it directly.
1729                                  */
1730                                 RADEON_COPY_MT(buffer, data,
1731                                                (int)(dwords * sizeof(u32)));
1732                         } else {
1733                                 /* Texture image width is less than the minimum, so we
1734                                  * need to pad out each image scanline to the minimum
1735                                  * width.
1736                                  */
1737                                 for (i = 0; i < tex->height; i++) {
1738                                         RADEON_COPY_MT(buffer, data, tex_width);
1739                                         buffer += 8;
1740                                         data += tex_width;
1741                                 }
1742                         }
1743                 }
1744
1745 #undef RADEON_COPY_MT
1746                 buf->filp = filp;
1747                 buf->used = size;
1748                 offset = dev_priv->gart_buffers_offset + buf->offset;
1749                 BEGIN_RING(9);
1750                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1751                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1752                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1753                          RADEON_GMC_BRUSH_NONE |
1754                          (format << 8) |
1755                          RADEON_GMC_SRC_DATATYPE_COLOR |
1756                          RADEON_ROP3_S |
1757                          RADEON_DP_SRC_SOURCE_MEMORY |
1758                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1759                 OUT_RING((spitch << 22) | (offset >> 10));
1760                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1761                 OUT_RING(0);
1762                 OUT_RING((image->x << 16) | image->y);
1763                 OUT_RING((image->width << 16) | height);
1764                 RADEON_WAIT_UNTIL_2D_IDLE();
1765                 ADVANCE_RING();
1766
1767                 radeon_cp_discard_buffer(dev, buf);
1768
1769                 /* Update the input parameters for next time */
1770                 image->y += height;
1771                 image->height -= height;
1772                 image->data = (const u8 __user *)image->data + size;
1773         } while (image->height > 0);
1774
1775         /* Flush the pixel cache after the blit completes.  This ensures
1776          * the texture data is written out to memory before rendering
1777          * continues.
1778          */
1779         BEGIN_RING(4);
1780         RADEON_FLUSH_CACHE();
1781         RADEON_WAIT_UNTIL_2D_IDLE();
1782         ADVANCE_RING();
1783         return 0;
1784 }
1785
1786 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1787 {
1788         drm_radeon_private_t *dev_priv = dev->dev_private;
1789         int i;
1790         RING_LOCALS;
1791         DRM_DEBUG("\n");
1792
1793         BEGIN_RING(35);
1794
1795         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1796         OUT_RING(0x00000000);
1797
1798         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1799         for (i = 0; i < 32; i++) {
1800                 OUT_RING(stipple[i]);
1801         }
1802
1803         ADVANCE_RING();
1804 }
1805
1806 static void radeon_apply_surface_regs(int surf_index,
1807                                       drm_radeon_private_t *dev_priv)
1808 {
1809         if (!dev_priv->mmio)
1810                 return;
1811
1812         radeon_do_cp_idle(dev_priv);
1813
1814         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1815                      dev_priv->surfaces[surf_index].flags);
1816         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1817                      dev_priv->surfaces[surf_index].lower);
1818         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1819                      dev_priv->surfaces[surf_index].upper);
1820 }
1821
1822 /* Allocates a virtual surface
1823  * doesn't always allocate a real surface, will stretch an existing
1824  * surface when possible.
1825  *
1826  * Note that refcount can be at most 2, since during a free refcount=3
1827  * might mean we have to allocate a new surface which might not always
1828  * be available.
1829  * For example : we allocate three contigous surfaces ABC. If B is
1830  * freed, we suddenly need two surfaces to store A and C, which might
1831  * not always be available.
1832  */
1833 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1834                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1835 {
1836         struct radeon_virt_surface *s;
1837         int i;
1838         int virt_surface_index;
1839         uint32_t new_upper, new_lower;
1840
1841         new_lower = new->address;
1842         new_upper = new_lower + new->size - 1;
1843
1844         /* sanity check */
1845         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1846             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1847              RADEON_SURF_ADDRESS_FIXED_MASK)
1848             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1849                 return -1;
1850
1851         /* make sure there is no overlap with existing surfaces */
1852         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1853                 if ((dev_priv->surfaces[i].refcount != 0) &&
1854                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1855                       (new_lower < dev_priv->surfaces[i].upper)) ||
1856                      ((new_lower < dev_priv->surfaces[i].lower) &&
1857                       (new_upper > dev_priv->surfaces[i].lower)))) {
1858                         return -1;
1859                 }
1860         }
1861
1862         /* find a virtual surface */
1863         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1864                 if (dev_priv->virt_surfaces[i].filp == 0)
1865                         break;
1866         if (i == 2 * RADEON_MAX_SURFACES) {
1867                 return -1;
1868         }
1869         virt_surface_index = i;
1870
1871         /* try to reuse an existing surface */
1872         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1873                 /* extend before */
1874                 if ((dev_priv->surfaces[i].refcount == 1) &&
1875                     (new->flags == dev_priv->surfaces[i].flags) &&
1876                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1877                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1878                         s->surface_index = i;
1879                         s->lower = new_lower;
1880                         s->upper = new_upper;
1881                         s->flags = new->flags;
1882                         s->filp = filp;
1883                         dev_priv->surfaces[i].refcount++;
1884                         dev_priv->surfaces[i].lower = s->lower;
1885                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1886                         return virt_surface_index;
1887                 }
1888
1889                 /* extend after */
1890                 if ((dev_priv->surfaces[i].refcount == 1) &&
1891                     (new->flags == dev_priv->surfaces[i].flags) &&
1892                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1893                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1894                         s->surface_index = i;
1895                         s->lower = new_lower;
1896                         s->upper = new_upper;
1897                         s->flags = new->flags;
1898                         s->filp = filp;
1899                         dev_priv->surfaces[i].refcount++;
1900                         dev_priv->surfaces[i].upper = s->upper;
1901                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1902                         return virt_surface_index;
1903                 }
1904         }
1905
1906         /* okay, we need a new one */
1907         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1908                 if (dev_priv->surfaces[i].refcount == 0) {
1909                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1910                         s->surface_index = i;
1911                         s->lower = new_lower;
1912                         s->upper = new_upper;
1913                         s->flags = new->flags;
1914                         s->filp = filp;
1915                         dev_priv->surfaces[i].refcount = 1;
1916                         dev_priv->surfaces[i].lower = s->lower;
1917                         dev_priv->surfaces[i].upper = s->upper;
1918                         dev_priv->surfaces[i].flags = s->flags;
1919                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1920                         return virt_surface_index;
1921                 }
1922         }
1923
1924         /* we didn't find anything */
1925         return -1;
1926 }
1927
1928 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1929                         int lower)
1930 {
1931         struct radeon_virt_surface *s;
1932         int i;
1933         /* find the virtual surface */
1934         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1935                 s = &(dev_priv->virt_surfaces[i]);
1936                 if (s->filp) {
1937                         if ((lower == s->lower) && (filp == s->filp)) {
1938                                 if (dev_priv->surfaces[s->surface_index].
1939                                     lower == s->lower)
1940                                         dev_priv->surfaces[s->surface_index].
1941                                             lower = s->upper;
1942
1943                                 if (dev_priv->surfaces[s->surface_index].
1944                                     upper == s->upper)
1945                                         dev_priv->surfaces[s->surface_index].
1946                                             upper = s->lower;
1947
1948                                 dev_priv->surfaces[s->surface_index].refcount--;
1949                                 if (dev_priv->surfaces[s->surface_index].
1950                                     refcount == 0)
1951                                         dev_priv->surfaces[s->surface_index].
1952                                             flags = 0;
1953                                 s->filp = NULL;
1954                                 radeon_apply_surface_regs(s->surface_index,
1955                                                           dev_priv);
1956                                 return 0;
1957                         }
1958                 }
1959         }
1960         return 1;
1961 }
1962
1963 static void radeon_surfaces_release(DRMFILE filp,
1964                                     drm_radeon_private_t * dev_priv)
1965 {
1966         int i;
1967         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1968                 if (dev_priv->virt_surfaces[i].filp == filp)
1969                         free_surface(filp, dev_priv,
1970                                      dev_priv->virt_surfaces[i].lower);
1971         }
1972 }
1973
1974 /* ================================================================
1975  * IOCTL functions
1976  */
1977 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1978 {
1979         DRM_DEVICE;
1980         drm_radeon_private_t *dev_priv = dev->dev_private;
1981         drm_radeon_surface_alloc_t alloc;
1982
1983         DRM_COPY_FROM_USER_IOCTL(alloc,
1984                                  (drm_radeon_surface_alloc_t __user *) data,
1985                                  sizeof(alloc));
1986
1987         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1988                 return DRM_ERR(EINVAL);
1989         else
1990                 return 0;
1991 }
1992
1993 static int radeon_surface_free(DRM_IOCTL_ARGS)
1994 {
1995         DRM_DEVICE;
1996         drm_radeon_private_t *dev_priv = dev->dev_private;
1997         drm_radeon_surface_free_t memfree;
1998
1999         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2000                                  sizeof(memfree));
2001
2002         if (free_surface(filp, dev_priv, memfree.address))
2003                 return DRM_ERR(EINVAL);
2004         else
2005                 return 0;
2006 }
2007
2008 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2009 {
2010         DRM_DEVICE;
2011         drm_radeon_private_t *dev_priv = dev->dev_private;
2012         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2013         drm_radeon_clear_t clear;
2014         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2015         DRM_DEBUG("\n");
2016
2017         LOCK_TEST_WITH_RETURN(dev, filp);
2018
2019         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2020                                  sizeof(clear));
2021
2022         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2023
2024         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2025                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2026
2027         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2028                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2029                 return DRM_ERR(EFAULT);
2030
2031         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2032
2033         COMMIT_RING();
2034         return 0;
2035 }
2036
2037 /* Not sure why this isn't set all the time:
2038  */
2039 static int radeon_do_init_pageflip(drm_device_t * dev)
2040 {
2041         drm_radeon_private_t *dev_priv = dev->dev_private;
2042         RING_LOCALS;
2043
2044         DRM_DEBUG("\n");
2045
2046         BEGIN_RING(6);
2047         RADEON_WAIT_UNTIL_3D_IDLE();
2048         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2049         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2050                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2051         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2052         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2053                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2054         ADVANCE_RING();
2055
2056         dev_priv->page_flipping = 1;
2057         dev_priv->current_page = 0;
2058         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2059
2060         return 0;
2061 }
2062
2063 /* Called whenever a client dies, from drm_release.
2064  * NOTE:  Lock isn't necessarily held when this is called!
2065  */
2066 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2067 {
2068         drm_radeon_private_t *dev_priv = dev->dev_private;
2069         DRM_DEBUG("\n");
2070
2071         if (dev_priv->current_page != 0)
2072                 radeon_cp_dispatch_flip(dev);
2073
2074         dev_priv->page_flipping = 0;
2075         return 0;
2076 }
2077
2078 /* Swapping and flipping are different operations, need different ioctls.
2079  * They can & should be intermixed to support multiple 3d windows.
2080  */
2081 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2082 {
2083         DRM_DEVICE;
2084         drm_radeon_private_t *dev_priv = dev->dev_private;
2085         DRM_DEBUG("\n");
2086
2087         LOCK_TEST_WITH_RETURN(dev, filp);
2088
2089         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2090
2091         if (!dev_priv->page_flipping)
2092                 radeon_do_init_pageflip(dev);
2093
2094         radeon_cp_dispatch_flip(dev);
2095
2096         COMMIT_RING();
2097         return 0;
2098 }
2099
2100 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2101 {
2102         DRM_DEVICE;
2103         drm_radeon_private_t *dev_priv = dev->dev_private;
2104         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105         DRM_DEBUG("\n");
2106
2107         LOCK_TEST_WITH_RETURN(dev, filp);
2108
2109         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2110
2111         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2112                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2113
2114         radeon_cp_dispatch_swap(dev);
2115         dev_priv->sarea_priv->ctx_owner = 0;
2116
2117         COMMIT_RING();
2118         return 0;
2119 }
2120
2121 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2122 {
2123         DRM_DEVICE;
2124         drm_radeon_private_t *dev_priv = dev->dev_private;
2125         drm_file_t *filp_priv;
2126         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2127         drm_device_dma_t *dma = dev->dma;
2128         drm_buf_t *buf;
2129         drm_radeon_vertex_t vertex;
2130         drm_radeon_tcl_prim_t prim;
2131
2132         LOCK_TEST_WITH_RETURN(dev, filp);
2133
2134         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2135
2136         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2137                                  sizeof(vertex));
2138
2139         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2140                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2141
2142         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2143                 DRM_ERROR("buffer index %d (of %d max)\n",
2144                           vertex.idx, dma->buf_count - 1);
2145                 return DRM_ERR(EINVAL);
2146         }
2147         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2148                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2149                 return DRM_ERR(EINVAL);
2150         }
2151
2152         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2153         VB_AGE_TEST_WITH_RETURN(dev_priv);
2154
2155         buf = dma->buflist[vertex.idx];
2156
2157         if (buf->filp != filp) {
2158                 DRM_ERROR("process %d using buffer owned by %p\n",
2159                           DRM_CURRENTPID, buf->filp);
2160                 return DRM_ERR(EINVAL);
2161         }
2162         if (buf->pending) {
2163                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2164                 return DRM_ERR(EINVAL);
2165         }
2166
2167         /* Build up a prim_t record:
2168          */
2169         if (vertex.count) {
2170                 buf->used = vertex.count;       /* not used? */
2171
2172                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2173                         if (radeon_emit_state(dev_priv, filp_priv,
2174                                               &sarea_priv->context_state,
2175                                               sarea_priv->tex_state,
2176                                               sarea_priv->dirty)) {
2177                                 DRM_ERROR("radeon_emit_state failed\n");
2178                                 return DRM_ERR(EINVAL);
2179                         }
2180
2181                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2182                                                RADEON_UPLOAD_TEX1IMAGES |
2183                                                RADEON_UPLOAD_TEX2IMAGES |
2184                                                RADEON_REQUIRE_QUIESCENCE);
2185                 }
2186
2187                 prim.start = 0;
2188                 prim.finish = vertex.count;     /* unused */
2189                 prim.prim = vertex.prim;
2190                 prim.numverts = vertex.count;
2191                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2192
2193                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2194         }
2195
2196         if (vertex.discard) {
2197                 radeon_cp_discard_buffer(dev, buf);
2198         }
2199
2200         COMMIT_RING();
2201         return 0;
2202 }
2203
2204 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2205 {
2206         DRM_DEVICE;
2207         drm_radeon_private_t *dev_priv = dev->dev_private;
2208         drm_file_t *filp_priv;
2209         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2210         drm_device_dma_t *dma = dev->dma;
2211         drm_buf_t *buf;
2212         drm_radeon_indices_t elts;
2213         drm_radeon_tcl_prim_t prim;
2214         int count;
2215
2216         LOCK_TEST_WITH_RETURN(dev, filp);
2217
2218         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2219
2220         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2221                                  sizeof(elts));
2222
2223         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2224                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2225
2226         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2227                 DRM_ERROR("buffer index %d (of %d max)\n",
2228                           elts.idx, dma->buf_count - 1);
2229                 return DRM_ERR(EINVAL);
2230         }
2231         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2232                 DRM_ERROR("buffer prim %d\n", elts.prim);
2233                 return DRM_ERR(EINVAL);
2234         }
2235
2236         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2237         VB_AGE_TEST_WITH_RETURN(dev_priv);
2238
2239         buf = dma->buflist[elts.idx];
2240
2241         if (buf->filp != filp) {
2242                 DRM_ERROR("process %d using buffer owned by %p\n",
2243                           DRM_CURRENTPID, buf->filp);
2244                 return DRM_ERR(EINVAL);
2245         }
2246         if (buf->pending) {
2247                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2248                 return DRM_ERR(EINVAL);
2249         }
2250
2251         count = (elts.end - elts.start) / sizeof(u16);
2252         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2253
2254         if (elts.start & 0x7) {
2255                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2256                 return DRM_ERR(EINVAL);
2257         }
2258         if (elts.start < buf->used) {
2259                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2260                 return DRM_ERR(EINVAL);
2261         }
2262
2263         buf->used = elts.end;
2264
2265         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2266                 if (radeon_emit_state(dev_priv, filp_priv,
2267                                       &sarea_priv->context_state,
2268                                       sarea_priv->tex_state,
2269                                       sarea_priv->dirty)) {
2270                         DRM_ERROR("radeon_emit_state failed\n");
2271                         return DRM_ERR(EINVAL);
2272                 }
2273
2274                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2275                                        RADEON_UPLOAD_TEX1IMAGES |
2276                                        RADEON_UPLOAD_TEX2IMAGES |
2277                                        RADEON_REQUIRE_QUIESCENCE);
2278         }
2279
2280         /* Build up a prim_t record:
2281          */
2282         prim.start = elts.start;
2283         prim.finish = elts.end;
2284         prim.prim = elts.prim;
2285         prim.offset = 0;        /* offset from start of dma buffers */
2286         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2287         prim.vc_format = dev_priv->sarea_priv->vc_format;
2288
2289         radeon_cp_dispatch_indices(dev, buf, &prim);
2290         if (elts.discard) {
2291                 radeon_cp_discard_buffer(dev, buf);
2292         }
2293
2294         COMMIT_RING();
2295         return 0;
2296 }
2297
2298 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2299 {
2300         DRM_DEVICE;
2301         drm_radeon_private_t *dev_priv = dev->dev_private;
2302         drm_radeon_texture_t tex;
2303         drm_radeon_tex_image_t image;
2304         int ret;
2305
2306         LOCK_TEST_WITH_RETURN(dev, filp);
2307
2308         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2309                                  sizeof(tex));
2310
2311         if (tex.image == NULL) {
2312                 DRM_ERROR("null texture image!\n");
2313                 return DRM_ERR(EINVAL);
2314         }
2315
2316         if (DRM_COPY_FROM_USER(&image,
2317                                (drm_radeon_tex_image_t __user *) tex.image,
2318                                sizeof(image)))
2319                 return DRM_ERR(EFAULT);
2320
2321         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2322         VB_AGE_TEST_WITH_RETURN(dev_priv);
2323
2324         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2325
2326         COMMIT_RING();
2327         return ret;
2328 }
2329
2330 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2331 {
2332         DRM_DEVICE;
2333         drm_radeon_private_t *dev_priv = dev->dev_private;
2334         drm_radeon_stipple_t stipple;
2335         u32 mask[32];
2336
2337         LOCK_TEST_WITH_RETURN(dev, filp);
2338
2339         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2340                                  sizeof(stipple));
2341
2342         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2343                 return DRM_ERR(EFAULT);
2344
2345         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2346
2347         radeon_cp_dispatch_stipple(dev, mask);
2348
2349         COMMIT_RING();
2350         return 0;
2351 }
2352
2353 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2354 {
2355         DRM_DEVICE;
2356         drm_radeon_private_t *dev_priv = dev->dev_private;
2357         drm_device_dma_t *dma = dev->dma;
2358         drm_buf_t *buf;
2359         drm_radeon_indirect_t indirect;
2360         RING_LOCALS;
2361
2362         LOCK_TEST_WITH_RETURN(dev, filp);
2363
2364         DRM_COPY_FROM_USER_IOCTL(indirect,
2365                                  (drm_radeon_indirect_t __user *) data,
2366                                  sizeof(indirect));
2367
2368         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2369                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2370
2371         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2372                 DRM_ERROR("buffer index %d (of %d max)\n",
2373                           indirect.idx, dma->buf_count - 1);
2374                 return DRM_ERR(EINVAL);
2375         }
2376
2377         buf = dma->buflist[indirect.idx];
2378
2379         if (buf->filp != filp) {
2380                 DRM_ERROR("process %d using buffer owned by %p\n",
2381                           DRM_CURRENTPID, buf->filp);
2382                 return DRM_ERR(EINVAL);
2383         }
2384         if (buf->pending) {
2385                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2386                 return DRM_ERR(EINVAL);
2387         }
2388
2389         if (indirect.start < buf->used) {
2390                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2391                           indirect.start, buf->used);
2392                 return DRM_ERR(EINVAL);
2393         }
2394
2395         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2396         VB_AGE_TEST_WITH_RETURN(dev_priv);
2397
2398         buf->used = indirect.end;
2399
2400         /* Wait for the 3D stream to idle before the indirect buffer
2401          * containing 2D acceleration commands is processed.
2402          */
2403         BEGIN_RING(2);
2404
2405         RADEON_WAIT_UNTIL_3D_IDLE();
2406
2407         ADVANCE_RING();
2408
2409         /* Dispatch the indirect buffer full of commands from the
2410          * X server.  This is insecure and is thus only available to
2411          * privileged clients.
2412          */
2413         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2414         if (indirect.discard) {
2415                 radeon_cp_discard_buffer(dev, buf);
2416         }
2417
2418         COMMIT_RING();
2419         return 0;
2420 }
2421
2422 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2423 {
2424         DRM_DEVICE;
2425         drm_radeon_private_t *dev_priv = dev->dev_private;
2426         drm_file_t *filp_priv;
2427         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2428         drm_device_dma_t *dma = dev->dma;
2429         drm_buf_t *buf;
2430         drm_radeon_vertex2_t vertex;
2431         int i;
2432         unsigned char laststate;
2433
2434         LOCK_TEST_WITH_RETURN(dev, filp);
2435
2436         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2437
2438         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2439                                  sizeof(vertex));
2440
2441         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2442                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2443
2444         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2445                 DRM_ERROR("buffer index %d (of %d max)\n",
2446                           vertex.idx, dma->buf_count - 1);
2447                 return DRM_ERR(EINVAL);
2448         }
2449
2450         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2451         VB_AGE_TEST_WITH_RETURN(dev_priv);
2452
2453         buf = dma->buflist[vertex.idx];
2454
2455         if (buf->filp != filp) {
2456                 DRM_ERROR("process %d using buffer owned by %p\n",
2457                           DRM_CURRENTPID, buf->filp);
2458                 return DRM_ERR(EINVAL);
2459         }
2460
2461         if (buf->pending) {
2462                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2463                 return DRM_ERR(EINVAL);
2464         }
2465
2466         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2467                 return DRM_ERR(EINVAL);
2468
2469         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2470                 drm_radeon_prim_t prim;
2471                 drm_radeon_tcl_prim_t tclprim;
2472
2473                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2474                         return DRM_ERR(EFAULT);
2475
2476                 if (prim.stateidx != laststate) {
2477                         drm_radeon_state_t state;
2478
2479                         if (DRM_COPY_FROM_USER(&state,
2480                                                &vertex.state[prim.stateidx],
2481                                                sizeof(state)))
2482                                 return DRM_ERR(EFAULT);
2483
2484                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2485                                 DRM_ERROR("radeon_emit_state2 failed\n");
2486                                 return DRM_ERR(EINVAL);
2487                         }
2488
2489                         laststate = prim.stateidx;
2490                 }
2491
2492                 tclprim.start = prim.start;
2493                 tclprim.finish = prim.finish;
2494                 tclprim.prim = prim.prim;
2495                 tclprim.vc_format = prim.vc_format;
2496
2497                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2498                         tclprim.offset = prim.numverts * 64;
2499                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2500
2501                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2502                 } else {
2503                         tclprim.numverts = prim.numverts;
2504                         tclprim.offset = 0;     /* not used */
2505
2506                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2507                 }
2508
2509                 if (sarea_priv->nbox == 1)
2510                         sarea_priv->nbox = 0;
2511         }
2512
2513         if (vertex.discard) {
2514                 radeon_cp_discard_buffer(dev, buf);
2515         }
2516
2517         COMMIT_RING();
2518         return 0;
2519 }
2520
2521 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2522                                drm_file_t * filp_priv,
2523                                drm_radeon_cmd_header_t header,
2524                                drm_radeon_kcmd_buffer_t *cmdbuf)
2525 {
2526         int id = (int)header.packet.packet_id;
2527         int sz, reg;
2528         int *data = (int *)cmdbuf->buf;
2529         RING_LOCALS;
2530
2531         if (id >= RADEON_MAX_STATE_PACKETS)
2532                 return DRM_ERR(EINVAL);
2533
2534         sz = packet[id].len;
2535         reg = packet[id].start;
2536
2537         if (sz * sizeof(int) > cmdbuf->bufsz) {
2538                 DRM_ERROR("Packet size provided larger than data provided\n");
2539                 return DRM_ERR(EINVAL);
2540         }
2541
2542         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2543                 DRM_ERROR("Packet verification failed\n");
2544                 return DRM_ERR(EINVAL);
2545         }
2546
2547         BEGIN_RING(sz + 1);
2548         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2549         OUT_RING_TABLE(data, sz);
2550         ADVANCE_RING();
2551
2552         cmdbuf->buf += sz * sizeof(int);
2553         cmdbuf->bufsz -= sz * sizeof(int);
2554         return 0;
2555 }
2556
2557 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2558                                           drm_radeon_cmd_header_t header,
2559                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2560 {
2561         int sz = header.scalars.count;
2562         int start = header.scalars.offset;
2563         int stride = header.scalars.stride;
2564         RING_LOCALS;
2565
2566         BEGIN_RING(3 + sz);
2567         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2568         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2569         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2570         OUT_RING_TABLE(cmdbuf->buf, sz);
2571         ADVANCE_RING();
2572         cmdbuf->buf += sz * sizeof(int);
2573         cmdbuf->bufsz -= sz * sizeof(int);
2574         return 0;
2575 }
2576
2577 /* God this is ugly
2578  */
2579 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2580                                            drm_radeon_cmd_header_t header,
2581                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2582 {
2583         int sz = header.scalars.count;
2584         int start = ((unsigned int)header.scalars.offset) + 0x100;
2585         int stride = header.scalars.stride;
2586         RING_LOCALS;
2587
2588         BEGIN_RING(3 + sz);
2589         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2590         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2591         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2592         OUT_RING_TABLE(cmdbuf->buf, sz);
2593         ADVANCE_RING();
2594         cmdbuf->buf += sz * sizeof(int);
2595         cmdbuf->bufsz -= sz * sizeof(int);
2596         return 0;
2597 }
2598
2599 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2600                                           drm_radeon_cmd_header_t header,
2601                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2602 {
2603         int sz = header.vectors.count;
2604         int start = header.vectors.offset;
2605         int stride = header.vectors.stride;
2606         RING_LOCALS;
2607
2608         BEGIN_RING(5 + sz);
2609         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2610         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2611         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2612         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2613         OUT_RING_TABLE(cmdbuf->buf, sz);
2614         ADVANCE_RING();
2615
2616         cmdbuf->buf += sz * sizeof(int);
2617         cmdbuf->bufsz -= sz * sizeof(int);
2618         return 0;
2619 }
2620
2621 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2622                                           drm_radeon_cmd_header_t header,
2623                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2624 {
2625         int sz = header.veclinear.count * 4;
2626         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2627         RING_LOCALS;
2628
2629         if (!sz)
2630                 return 0;
2631         if (sz * 4 > cmdbuf->bufsz)
2632                 return DRM_ERR(EINVAL);
2633
2634         BEGIN_RING(5 + sz);
2635         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2636         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2637         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2638         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2639         OUT_RING_TABLE(cmdbuf->buf, sz);
2640         ADVANCE_RING();
2641
2642         cmdbuf->buf += sz * sizeof(int);
2643         cmdbuf->bufsz -= sz * sizeof(int);
2644         return 0;
2645 }
2646
2647 static int radeon_emit_packet3(drm_device_t * dev,
2648                                drm_file_t * filp_priv,
2649                                drm_radeon_kcmd_buffer_t *cmdbuf)
2650 {
2651         drm_radeon_private_t *dev_priv = dev->dev_private;
2652         unsigned int cmdsz;
2653         int ret;
2654         RING_LOCALS;
2655
2656         DRM_DEBUG("\n");
2657
2658         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2659                                                   cmdbuf, &cmdsz))) {
2660                 DRM_ERROR("Packet verification failed\n");
2661                 return ret;
2662         }
2663
2664         BEGIN_RING(cmdsz);
2665         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2666         ADVANCE_RING();
2667
2668         cmdbuf->buf += cmdsz * 4;
2669         cmdbuf->bufsz -= cmdsz * 4;
2670         return 0;
2671 }
2672
2673 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2674                                         drm_file_t *filp_priv,
2675                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2676                                         int orig_nbox)
2677 {
2678         drm_radeon_private_t *dev_priv = dev->dev_private;
2679         drm_clip_rect_t box;
2680         unsigned int cmdsz;
2681         int ret;
2682         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2683         int i = 0;
2684         RING_LOCALS;
2685
2686         DRM_DEBUG("\n");
2687
2688         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2689                                                   cmdbuf, &cmdsz))) {
2690                 DRM_ERROR("Packet verification failed\n");
2691                 return ret;
2692         }
2693
2694         if (!orig_nbox)
2695                 goto out;
2696
2697         do {
2698                 if (i < cmdbuf->nbox) {
2699                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2700                                 return DRM_ERR(EFAULT);
2701                         /* FIXME The second and subsequent times round
2702                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2703                          * calling emit_clip_rect(). This fixes a
2704                          * lockup on fast machines when sending
2705                          * several cliprects with a cmdbuf, as when
2706                          * waving a 2D window over a 3D
2707                          * window. Something in the commands from user
2708                          * space seems to hang the card when they're
2709                          * sent several times in a row. That would be
2710                          * the correct place to fix it but this works
2711                          * around it until I can figure that out - Tim
2712                          * Smith */
2713                         if (i) {
2714                                 BEGIN_RING(2);
2715                                 RADEON_WAIT_UNTIL_3D_IDLE();
2716                                 ADVANCE_RING();
2717                         }
2718                         radeon_emit_clip_rect(dev_priv, &box);
2719                 }
2720
2721                 BEGIN_RING(cmdsz);
2722                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2723                 ADVANCE_RING();
2724
2725         } while (++i < cmdbuf->nbox);
2726         if (cmdbuf->nbox == 1)
2727                 cmdbuf->nbox = 0;
2728
2729       out:
2730         cmdbuf->buf += cmdsz * 4;
2731         cmdbuf->bufsz -= cmdsz * 4;
2732         return 0;
2733 }
2734
2735 static int radeon_emit_wait(drm_device_t * dev, int flags)
2736 {
2737         drm_radeon_private_t *dev_priv = dev->dev_private;
2738         RING_LOCALS;
2739
2740         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2741         switch (flags) {
2742         case RADEON_WAIT_2D:
2743                 BEGIN_RING(2);
2744                 RADEON_WAIT_UNTIL_2D_IDLE();
2745                 ADVANCE_RING();
2746                 break;
2747         case RADEON_WAIT_3D:
2748                 BEGIN_RING(2);
2749                 RADEON_WAIT_UNTIL_3D_IDLE();
2750                 ADVANCE_RING();
2751                 break;
2752         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2753                 BEGIN_RING(2);
2754                 RADEON_WAIT_UNTIL_IDLE();
2755                 ADVANCE_RING();
2756                 break;
2757         default:
2758                 return DRM_ERR(EINVAL);
2759         }
2760
2761         return 0;
2762 }
2763
2764 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2765 {
2766         DRM_DEVICE;
2767         drm_radeon_private_t *dev_priv = dev->dev_private;
2768         drm_file_t *filp_priv;
2769         drm_device_dma_t *dma = dev->dma;
2770         drm_buf_t *buf = NULL;
2771         int idx;
2772         drm_radeon_kcmd_buffer_t cmdbuf;
2773         drm_radeon_cmd_header_t header;
2774         int orig_nbox, orig_bufsz;
2775         char *kbuf = NULL;
2776
2777         LOCK_TEST_WITH_RETURN(dev, filp);
2778
2779         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2780
2781         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2782                                  (drm_radeon_cmd_buffer_t __user *) data,
2783                                  sizeof(cmdbuf));
2784
2785         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2786         VB_AGE_TEST_WITH_RETURN(dev_priv);
2787
2788         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2789                 return DRM_ERR(EINVAL);
2790         }
2791
2792         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2793          * races between checking values and using those values in other code,
2794          * and simply to avoid a lot of function calls to copy in data.
2795          */
2796         orig_bufsz = cmdbuf.bufsz;
2797         if (orig_bufsz != 0) {
2798                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2799                 if (kbuf == NULL)
2800                         return DRM_ERR(ENOMEM);
2801                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2802                                        cmdbuf.bufsz)) {
2803                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2804                         return DRM_ERR(EFAULT);
2805                 }
2806                 cmdbuf.buf = kbuf;
2807         }
2808
2809         orig_nbox = cmdbuf.nbox;
2810
2811         if (dev_priv->microcode_version == UCODE_R300) {
2812                 int temp;
2813                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2814
2815                 if (orig_bufsz != 0)
2816                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2817
2818                 return temp;
2819         }
2820
2821         /* microcode_version != r300 */
2822         while (cmdbuf.bufsz >= sizeof(header)) {
2823
2824                 header.i = *(int *)cmdbuf.buf;
2825                 cmdbuf.buf += sizeof(header);
2826                 cmdbuf.bufsz -= sizeof(header);
2827
2828                 switch (header.header.cmd_type) {
2829                 case RADEON_CMD_PACKET:
2830                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2831                         if (radeon_emit_packets
2832                             (dev_priv, filp_priv, header, &cmdbuf)) {
2833                                 DRM_ERROR("radeon_emit_packets failed\n");
2834                                 goto err;
2835                         }
2836                         break;
2837
2838                 case RADEON_CMD_SCALARS:
2839                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2840                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2841                                 DRM_ERROR("radeon_emit_scalars failed\n");
2842                                 goto err;
2843                         }
2844                         break;
2845
2846                 case RADEON_CMD_VECTORS:
2847                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2848                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2849                                 DRM_ERROR("radeon_emit_vectors failed\n");
2850                                 goto err;
2851                         }
2852                         break;
2853
2854                 case RADEON_CMD_DMA_DISCARD:
2855                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2856                         idx = header.dma.buf_idx;
2857                         if (idx < 0 || idx >= dma->buf_count) {
2858                                 DRM_ERROR("buffer index %d (of %d max)\n",
2859                                           idx, dma->buf_count - 1);
2860                                 goto err;
2861                         }
2862
2863                         buf = dma->buflist[idx];
2864                         if (buf->filp != filp || buf->pending) {
2865                                 DRM_ERROR("bad buffer %p %p %d\n",
2866                                           buf->filp, filp, buf->pending);
2867                                 goto err;
2868                         }
2869
2870                         radeon_cp_discard_buffer(dev, buf);
2871                         break;
2872
2873                 case RADEON_CMD_PACKET3:
2874                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2875                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2876                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2877                                 goto err;
2878                         }
2879                         break;
2880
2881                 case RADEON_CMD_PACKET3_CLIP:
2882                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2883                         if (radeon_emit_packet3_cliprect
2884                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2885                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2886                                 goto err;
2887                         }
2888                         break;
2889
2890                 case RADEON_CMD_SCALARS2:
2891                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2892                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2893                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2894                                 goto err;
2895                         }
2896                         break;
2897
2898                 case RADEON_CMD_WAIT:
2899                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2900                         if (radeon_emit_wait(dev, header.wait.flags)) {
2901                                 DRM_ERROR("radeon_emit_wait failed\n");
2902                                 goto err;
2903                         }
2904                         break;
2905                 case RADEON_CMD_VECLINEAR:
2906                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2907                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2908                                 DRM_ERROR("radeon_emit_veclinear failed\n");
2909                                 goto err;
2910                         }
2911                         break;
2912
2913                 default:
2914                         DRM_ERROR("bad cmd_type %d at %p\n",
2915                                   header.header.cmd_type,
2916                                   cmdbuf.buf - sizeof(header));
2917                         goto err;
2918                 }
2919         }
2920
2921         if (orig_bufsz != 0)
2922                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2923
2924         DRM_DEBUG("DONE\n");
2925         COMMIT_RING();
2926         return 0;
2927
2928       err:
2929         if (orig_bufsz != 0)
2930                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2931         return DRM_ERR(EINVAL);
2932 }
2933
2934 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2935 {
2936         DRM_DEVICE;
2937         drm_radeon_private_t *dev_priv = dev->dev_private;
2938         drm_radeon_getparam_t param;
2939         int value;
2940
2941         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2942                                  sizeof(param));
2943
2944         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2945
2946         switch (param.param) {
2947         case RADEON_PARAM_GART_BUFFER_OFFSET:
2948                 value = dev_priv->gart_buffers_offset;
2949                 break;
2950         case RADEON_PARAM_LAST_FRAME:
2951                 dev_priv->stats.last_frame_reads++;
2952                 value = GET_SCRATCH(0);
2953                 break;
2954         case RADEON_PARAM_LAST_DISPATCH:
2955                 value = GET_SCRATCH(1);
2956                 break;
2957         case RADEON_PARAM_LAST_CLEAR:
2958                 dev_priv->stats.last_clear_reads++;
2959                 value = GET_SCRATCH(2);
2960                 break;
2961         case RADEON_PARAM_IRQ_NR:
2962                 value = dev->irq;
2963                 break;
2964         case RADEON_PARAM_GART_BASE:
2965                 value = dev_priv->gart_vm_start;
2966                 break;
2967         case RADEON_PARAM_REGISTER_HANDLE:
2968                 value = dev_priv->mmio->offset;
2969                 break;
2970         case RADEON_PARAM_STATUS_HANDLE:
2971                 value = dev_priv->ring_rptr_offset;
2972                 break;
2973 #if BITS_PER_LONG == 32
2974                 /*
2975                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2976                  * pointer which can't fit into an int-sized variable.  According to
2977                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2978                  * not supporting it shouldn't be a problem.  If the same functionality
2979                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2980                  * so backwards-compatibility for the embedded platforms can be
2981                  * maintained.  --davidm 4-Feb-2004.
2982                  */
2983         case RADEON_PARAM_SAREA_HANDLE:
2984                 /* The lock is the first dword in the sarea. */
2985                 value = (long)dev->lock.hw_lock;
2986                 break;
2987 #endif
2988         case RADEON_PARAM_GART_TEX_HANDLE:
2989                 value = dev_priv->gart_textures_offset;
2990                 break;
2991         case RADEON_PARAM_SCRATCH_OFFSET:
2992                 if (!dev_priv->writeback_works)
2993                         return DRM_ERR(EINVAL);
2994                 value = RADEON_SCRATCH_REG_OFFSET;
2995                 break;
2996         case RADEON_PARAM_CARD_TYPE:
2997                 if (dev_priv->flags & RADEON_IS_PCIE)
2998                         value = RADEON_CARD_PCIE;
2999                 else if (dev_priv->flags & RADEON_IS_AGP)
3000                         value = RADEON_CARD_AGP;
3001                 else
3002                         value = RADEON_CARD_PCI;
3003                 break;
3004         default:
3005                 DRM_DEBUG("Invalid parameter %d\n", param.param);
3006                 return DRM_ERR(EINVAL);
3007         }
3008
3009         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3010                 DRM_ERROR("copy_to_user\n");
3011                 return DRM_ERR(EFAULT);
3012         }
3013
3014         return 0;
3015 }
3016
3017 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3018 {
3019         DRM_DEVICE;
3020         drm_radeon_private_t *dev_priv = dev->dev_private;
3021         drm_file_t *filp_priv;
3022         drm_radeon_setparam_t sp;
3023         struct drm_radeon_driver_file_fields *radeon_priv;
3024
3025         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3026
3027         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3028                                  sizeof(sp));
3029
3030         switch (sp.param) {
3031         case RADEON_SETPARAM_FB_LOCATION:
3032                 radeon_priv = filp_priv->driver_priv;
3033                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3034                 break;
3035         case RADEON_SETPARAM_SWITCH_TILING:
3036                 if (sp.value == 0) {
3037                         DRM_DEBUG("color tiling disabled\n");
3038                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3039                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3040                         dev_priv->sarea_priv->tiling_enabled = 0;
3041                 } else if (sp.value == 1) {
3042                         DRM_DEBUG("color tiling enabled\n");
3043                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3044                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3045                         dev_priv->sarea_priv->tiling_enabled = 1;
3046                 }
3047                 break;
3048         case RADEON_SETPARAM_PCIGART_LOCATION:
3049                 dev_priv->pcigart_offset = sp.value;
3050                 break;
3051         case RADEON_SETPARAM_NEW_MEMMAP:
3052                 dev_priv->new_memmap = sp.value;
3053                 break;
3054         default:
3055                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3056                 return DRM_ERR(EINVAL);
3057         }
3058
3059         return 0;
3060 }
3061
3062 /* When a client dies:
3063  *    - Check for and clean up flipped page state
3064  *    - Free any alloced GART memory.
3065  *    - Free any alloced radeon surfaces.
3066  *
3067  * DRM infrastructure takes care of reclaiming dma buffers.
3068  */
3069 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3070 {
3071         if (dev->dev_private) {
3072                 drm_radeon_private_t *dev_priv = dev->dev_private;
3073                 if (dev_priv->page_flipping) {
3074                         radeon_do_cleanup_pageflip(dev);
3075                 }
3076                 radeon_mem_release(filp, dev_priv->gart_heap);
3077                 radeon_mem_release(filp, dev_priv->fb_heap);
3078                 radeon_surfaces_release(filp, dev_priv);
3079         }
3080 }
3081
3082 void radeon_driver_lastclose(drm_device_t * dev)
3083 {
3084         radeon_do_release(dev);
3085 }
3086
3087 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3088 {
3089         drm_radeon_private_t *dev_priv = dev->dev_private;
3090         struct drm_radeon_driver_file_fields *radeon_priv;
3091
3092         DRM_DEBUG("\n");
3093         radeon_priv =
3094             (struct drm_radeon_driver_file_fields *)
3095             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3096
3097         if (!radeon_priv)
3098                 return -ENOMEM;
3099
3100         filp_priv->driver_priv = radeon_priv;
3101
3102         if (dev_priv)
3103                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3104         else
3105                 radeon_priv->radeon_fb_delta = 0;
3106         return 0;
3107 }
3108
3109 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3110 {
3111         struct drm_radeon_driver_file_fields *radeon_priv =
3112             filp_priv->driver_priv;
3113
3114         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3115 }
3116
3117 drm_ioctl_desc_t radeon_ioctls[] = {
3118         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3121         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3122         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3123         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3124         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3125         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3126         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3127         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3128         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3129         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3130         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3131         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3132         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3133         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3134         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3135         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3136         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3137         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3138         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3139         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3140         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3141         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3142         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3143         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3144         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3145 };
3146
3147 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);