[ACPI] merge 3549 4320 4485 4588 4980 5483 5651 acpica asus fops pnpacpi branches...
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         if (off >= dev_priv->fb_location &&
49             off < (dev_priv->gart_vm_start + dev_priv->gart_size))
50                 return 0;
51
52         radeon_priv = filp_priv->driver_priv;
53         off += radeon_priv->radeon_fb_delta;
54
55         DRM_DEBUG("offset fixed up to 0x%x\n", off);
56
57         if (off < dev_priv->fb_location ||
58             off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
59                 return DRM_ERR(EINVAL);
60
61         *offset = off;
62
63         return 0;
64 }
65
66 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
67                                                      dev_priv,
68                                                      drm_file_t * filp_priv,
69                                                      int id, u32 *data)
70 {
71         switch (id) {
72
73         case RADEON_EMIT_PP_MISC:
74                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
75                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
76                         DRM_ERROR("Invalid depth buffer offset\n");
77                         return DRM_ERR(EINVAL);
78                 }
79                 break;
80
81         case RADEON_EMIT_PP_CNTL:
82                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
83                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
84                         DRM_ERROR("Invalid colour buffer offset\n");
85                         return DRM_ERR(EINVAL);
86                 }
87                 break;
88
89         case R200_EMIT_PP_TXOFFSET_0:
90         case R200_EMIT_PP_TXOFFSET_1:
91         case R200_EMIT_PP_TXOFFSET_2:
92         case R200_EMIT_PP_TXOFFSET_3:
93         case R200_EMIT_PP_TXOFFSET_4:
94         case R200_EMIT_PP_TXOFFSET_5:
95                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
96                                                   &data[0])) {
97                         DRM_ERROR("Invalid R200 texture offset\n");
98                         return DRM_ERR(EINVAL);
99                 }
100                 break;
101
102         case RADEON_EMIT_PP_TXFILTER_0:
103         case RADEON_EMIT_PP_TXFILTER_1:
104         case RADEON_EMIT_PP_TXFILTER_2:
105                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
107                         DRM_ERROR("Invalid R100 texture offset\n");
108                         return DRM_ERR(EINVAL);
109                 }
110                 break;
111
112         case R200_EMIT_PP_CUBIC_OFFSETS_0:
113         case R200_EMIT_PP_CUBIC_OFFSETS_1:
114         case R200_EMIT_PP_CUBIC_OFFSETS_2:
115         case R200_EMIT_PP_CUBIC_OFFSETS_3:
116         case R200_EMIT_PP_CUBIC_OFFSETS_4:
117         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
118                         int i;
119                         for (i = 0; i < 5; i++) {
120                                 if (radeon_check_and_fixup_offset(dev_priv,
121                                                                   filp_priv,
122                                                                   &data[i])) {
123                                         DRM_ERROR
124                                             ("Invalid R200 cubic texture offset\n");
125                                         return DRM_ERR(EINVAL);
126                                 }
127                         }
128                         break;
129                 }
130
131         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
132         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
133         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
134                         int i;
135                         for (i = 0; i < 5; i++) {
136                                 if (radeon_check_and_fixup_offset(dev_priv,
137                                                                   filp_priv,
138                                                                   &data[i])) {
139                                         DRM_ERROR
140                                             ("Invalid R100 cubic texture offset\n");
141                                         return DRM_ERR(EINVAL);
142                                 }
143                         }
144                 }
145                 break;
146
147         case RADEON_EMIT_RB3D_COLORPITCH:
148         case RADEON_EMIT_RE_LINE_PATTERN:
149         case RADEON_EMIT_SE_LINE_WIDTH:
150         case RADEON_EMIT_PP_LUM_MATRIX:
151         case RADEON_EMIT_PP_ROT_MATRIX_0:
152         case RADEON_EMIT_RB3D_STENCILREFMASK:
153         case RADEON_EMIT_SE_VPORT_XSCALE:
154         case RADEON_EMIT_SE_CNTL:
155         case RADEON_EMIT_SE_CNTL_STATUS:
156         case RADEON_EMIT_RE_MISC:
157         case RADEON_EMIT_PP_BORDER_COLOR_0:
158         case RADEON_EMIT_PP_BORDER_COLOR_1:
159         case RADEON_EMIT_PP_BORDER_COLOR_2:
160         case RADEON_EMIT_SE_ZBIAS_FACTOR:
161         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
162         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
163         case R200_EMIT_PP_TXCBLEND_0:
164         case R200_EMIT_PP_TXCBLEND_1:
165         case R200_EMIT_PP_TXCBLEND_2:
166         case R200_EMIT_PP_TXCBLEND_3:
167         case R200_EMIT_PP_TXCBLEND_4:
168         case R200_EMIT_PP_TXCBLEND_5:
169         case R200_EMIT_PP_TXCBLEND_6:
170         case R200_EMIT_PP_TXCBLEND_7:
171         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
172         case R200_EMIT_TFACTOR_0:
173         case R200_EMIT_VTX_FMT_0:
174         case R200_EMIT_VAP_CTL:
175         case R200_EMIT_MATRIX_SELECT_0:
176         case R200_EMIT_TEX_PROC_CTL_2:
177         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
178         case R200_EMIT_PP_TXFILTER_0:
179         case R200_EMIT_PP_TXFILTER_1:
180         case R200_EMIT_PP_TXFILTER_2:
181         case R200_EMIT_PP_TXFILTER_3:
182         case R200_EMIT_PP_TXFILTER_4:
183         case R200_EMIT_PP_TXFILTER_5:
184         case R200_EMIT_VTE_CNTL:
185         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
186         case R200_EMIT_PP_TAM_DEBUG3:
187         case R200_EMIT_PP_CNTL_X:
188         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
189         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
190         case R200_EMIT_RE_SCISSOR_TL_0:
191         case R200_EMIT_RE_SCISSOR_TL_1:
192         case R200_EMIT_RE_SCISSOR_TL_2:
193         case R200_EMIT_SE_VAP_CNTL_STATUS:
194         case R200_EMIT_SE_VTX_STATE_CNTL:
195         case R200_EMIT_RE_POINTSIZE:
196         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
197         case R200_EMIT_PP_CUBIC_FACES_0:
198         case R200_EMIT_PP_CUBIC_FACES_1:
199         case R200_EMIT_PP_CUBIC_FACES_2:
200         case R200_EMIT_PP_CUBIC_FACES_3:
201         case R200_EMIT_PP_CUBIC_FACES_4:
202         case R200_EMIT_PP_CUBIC_FACES_5:
203         case RADEON_EMIT_PP_TEX_SIZE_0:
204         case RADEON_EMIT_PP_TEX_SIZE_1:
205         case RADEON_EMIT_PP_TEX_SIZE_2:
206         case R200_EMIT_RB3D_BLENDCOLOR:
207         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
208         case RADEON_EMIT_PP_CUBIC_FACES_0:
209         case RADEON_EMIT_PP_CUBIC_FACES_1:
210         case RADEON_EMIT_PP_CUBIC_FACES_2:
211         case R200_EMIT_PP_TRI_PERF_CNTL:
212         case R200_EMIT_PP_AFS_0:
213         case R200_EMIT_PP_AFS_1:
214         case R200_EMIT_ATF_TFACTOR:
215         case R200_EMIT_PP_TXCTLALL_0:
216         case R200_EMIT_PP_TXCTLALL_1:
217         case R200_EMIT_PP_TXCTLALL_2:
218         case R200_EMIT_PP_TXCTLALL_3:
219         case R200_EMIT_PP_TXCTLALL_4:
220         case R200_EMIT_PP_TXCTLALL_5:
221                 /* These packets don't contain memory offsets */
222                 break;
223
224         default:
225                 DRM_ERROR("Unknown state packet ID %d\n", id);
226                 return DRM_ERR(EINVAL);
227         }
228
229         return 0;
230 }
231
232 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
233                                                      dev_priv,
234                                                      drm_file_t *filp_priv,
235                                                      drm_radeon_kcmd_buffer_t *
236                                                      cmdbuf,
237                                                      unsigned int *cmdsz)
238 {
239         u32 *cmd = (u32 *) cmdbuf->buf;
240
241         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
242
243         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
244                 DRM_ERROR("Not a type 3 packet\n");
245                 return DRM_ERR(EINVAL);
246         }
247
248         if (4 * *cmdsz > cmdbuf->bufsz) {
249                 DRM_ERROR("Packet size larger than size of data provided\n");
250                 return DRM_ERR(EINVAL);
251         }
252
253         /* Check client state and fix it up if necessary */
254         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
255                 u32 offset;
256
257                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
258                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
259                         offset = cmd[2] << 10;
260                         if (radeon_check_and_fixup_offset
261                             (dev_priv, filp_priv, &offset)) {
262                                 DRM_ERROR("Invalid first packet offset\n");
263                                 return DRM_ERR(EINVAL);
264                         }
265                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
266                 }
267
268                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
269                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
270                         offset = cmd[3] << 10;
271                         if (radeon_check_and_fixup_offset
272                             (dev_priv, filp_priv, &offset)) {
273                                 DRM_ERROR("Invalid second packet offset\n");
274                                 return DRM_ERR(EINVAL);
275                         }
276                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
277                 }
278         }
279
280         return 0;
281 }
282
283 /* ================================================================
284  * CP hardware state programming functions
285  */
286
287 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
288                                              drm_clip_rect_t * box)
289 {
290         RING_LOCALS;
291
292         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
293                   box->x1, box->y1, box->x2, box->y2);
294
295         BEGIN_RING(4);
296         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
297         OUT_RING((box->y1 << 16) | box->x1);
298         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
299         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
300         ADVANCE_RING();
301 }
302
303 /* Emit 1.1 state
304  */
305 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
306                              drm_file_t * filp_priv,
307                              drm_radeon_context_regs_t * ctx,
308                              drm_radeon_texture_regs_t * tex,
309                              unsigned int dirty)
310 {
311         RING_LOCALS;
312         DRM_DEBUG("dirty=0x%08x\n", dirty);
313
314         if (dirty & RADEON_UPLOAD_CONTEXT) {
315                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
316                                                   &ctx->rb3d_depthoffset)) {
317                         DRM_ERROR("Invalid depth buffer offset\n");
318                         return DRM_ERR(EINVAL);
319                 }
320
321                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
322                                                   &ctx->rb3d_coloroffset)) {
323                         DRM_ERROR("Invalid depth buffer offset\n");
324                         return DRM_ERR(EINVAL);
325                 }
326
327                 BEGIN_RING(14);
328                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
329                 OUT_RING(ctx->pp_misc);
330                 OUT_RING(ctx->pp_fog_color);
331                 OUT_RING(ctx->re_solid_color);
332                 OUT_RING(ctx->rb3d_blendcntl);
333                 OUT_RING(ctx->rb3d_depthoffset);
334                 OUT_RING(ctx->rb3d_depthpitch);
335                 OUT_RING(ctx->rb3d_zstencilcntl);
336                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
337                 OUT_RING(ctx->pp_cntl);
338                 OUT_RING(ctx->rb3d_cntl);
339                 OUT_RING(ctx->rb3d_coloroffset);
340                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
341                 OUT_RING(ctx->rb3d_colorpitch);
342                 ADVANCE_RING();
343         }
344
345         if (dirty & RADEON_UPLOAD_VERTFMT) {
346                 BEGIN_RING(2);
347                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
348                 OUT_RING(ctx->se_coord_fmt);
349                 ADVANCE_RING();
350         }
351
352         if (dirty & RADEON_UPLOAD_LINE) {
353                 BEGIN_RING(5);
354                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
355                 OUT_RING(ctx->re_line_pattern);
356                 OUT_RING(ctx->re_line_state);
357                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
358                 OUT_RING(ctx->se_line_width);
359                 ADVANCE_RING();
360         }
361
362         if (dirty & RADEON_UPLOAD_BUMPMAP) {
363                 BEGIN_RING(5);
364                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
365                 OUT_RING(ctx->pp_lum_matrix);
366                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
367                 OUT_RING(ctx->pp_rot_matrix_0);
368                 OUT_RING(ctx->pp_rot_matrix_1);
369                 ADVANCE_RING();
370         }
371
372         if (dirty & RADEON_UPLOAD_MASKS) {
373                 BEGIN_RING(4);
374                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
375                 OUT_RING(ctx->rb3d_stencilrefmask);
376                 OUT_RING(ctx->rb3d_ropcntl);
377                 OUT_RING(ctx->rb3d_planemask);
378                 ADVANCE_RING();
379         }
380
381         if (dirty & RADEON_UPLOAD_VIEWPORT) {
382                 BEGIN_RING(7);
383                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
384                 OUT_RING(ctx->se_vport_xscale);
385                 OUT_RING(ctx->se_vport_xoffset);
386                 OUT_RING(ctx->se_vport_yscale);
387                 OUT_RING(ctx->se_vport_yoffset);
388                 OUT_RING(ctx->se_vport_zscale);
389                 OUT_RING(ctx->se_vport_zoffset);
390                 ADVANCE_RING();
391         }
392
393         if (dirty & RADEON_UPLOAD_SETUP) {
394                 BEGIN_RING(4);
395                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
396                 OUT_RING(ctx->se_cntl);
397                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
398                 OUT_RING(ctx->se_cntl_status);
399                 ADVANCE_RING();
400         }
401
402         if (dirty & RADEON_UPLOAD_MISC) {
403                 BEGIN_RING(2);
404                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
405                 OUT_RING(ctx->re_misc);
406                 ADVANCE_RING();
407         }
408
409         if (dirty & RADEON_UPLOAD_TEX0) {
410                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
411                                                   &tex[0].pp_txoffset)) {
412                         DRM_ERROR("Invalid texture offset for unit 0\n");
413                         return DRM_ERR(EINVAL);
414                 }
415
416                 BEGIN_RING(9);
417                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
418                 OUT_RING(tex[0].pp_txfilter);
419                 OUT_RING(tex[0].pp_txformat);
420                 OUT_RING(tex[0].pp_txoffset);
421                 OUT_RING(tex[0].pp_txcblend);
422                 OUT_RING(tex[0].pp_txablend);
423                 OUT_RING(tex[0].pp_tfactor);
424                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
425                 OUT_RING(tex[0].pp_border_color);
426                 ADVANCE_RING();
427         }
428
429         if (dirty & RADEON_UPLOAD_TEX1) {
430                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
431                                                   &tex[1].pp_txoffset)) {
432                         DRM_ERROR("Invalid texture offset for unit 1\n");
433                         return DRM_ERR(EINVAL);
434                 }
435
436                 BEGIN_RING(9);
437                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
438                 OUT_RING(tex[1].pp_txfilter);
439                 OUT_RING(tex[1].pp_txformat);
440                 OUT_RING(tex[1].pp_txoffset);
441                 OUT_RING(tex[1].pp_txcblend);
442                 OUT_RING(tex[1].pp_txablend);
443                 OUT_RING(tex[1].pp_tfactor);
444                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
445                 OUT_RING(tex[1].pp_border_color);
446                 ADVANCE_RING();
447         }
448
449         if (dirty & RADEON_UPLOAD_TEX2) {
450                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
451                                                   &tex[2].pp_txoffset)) {
452                         DRM_ERROR("Invalid texture offset for unit 2\n");
453                         return DRM_ERR(EINVAL);
454                 }
455
456                 BEGIN_RING(9);
457                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
458                 OUT_RING(tex[2].pp_txfilter);
459                 OUT_RING(tex[2].pp_txformat);
460                 OUT_RING(tex[2].pp_txoffset);
461                 OUT_RING(tex[2].pp_txcblend);
462                 OUT_RING(tex[2].pp_txablend);
463                 OUT_RING(tex[2].pp_tfactor);
464                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
465                 OUT_RING(tex[2].pp_border_color);
466                 ADVANCE_RING();
467         }
468
469         return 0;
470 }
471
472 /* Emit 1.2 state
473  */
474 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
475                               drm_file_t * filp_priv,
476                               drm_radeon_state_t * state)
477 {
478         RING_LOCALS;
479
480         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
481                 BEGIN_RING(3);
482                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
483                 OUT_RING(state->context2.se_zbias_factor);
484                 OUT_RING(state->context2.se_zbias_constant);
485                 ADVANCE_RING();
486         }
487
488         return radeon_emit_state(dev_priv, filp_priv, &state->context,
489                                  state->tex, state->dirty);
490 }
491
492 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
493  * 1.3 cmdbuffers allow all previous state to be updated as well as
494  * the tcl scalar and vector areas.
495  */
496 static struct {
497         int start;
498         int len;
499         const char *name;
500 } packet[RADEON_MAX_STATE_PACKETS] = {
501         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
502         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
503         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
504         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
505         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
506         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
507         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
508         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
509         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
510         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
511         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
512         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
513         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
514         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
515         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
516         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
517         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
518         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
519         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
520         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
521         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
522                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
523         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
524         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
525         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
526         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
527         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
528         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
529         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
530         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
531         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
532         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
533         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
534         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
535         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
536         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
537         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
538         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
539         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
540         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
541         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
542         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
543         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
544         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
545         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
546         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
547         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
548         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
549         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
550         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
551         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
552          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
553         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
554         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
555         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
556         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
557         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
558         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
559         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
560         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
561         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
562         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
563         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
564                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
565         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
566         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
567         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
568         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
569         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
570         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
571         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
572         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
573         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
574         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
575         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
576         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
577         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
578         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
579         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
580         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
581         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
582         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
583         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
584         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
585         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
586         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
587         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
588         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
589         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
590         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
591         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
592         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
593         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
594         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
595         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
596         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
597         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
598 };
599
600 /* ================================================================
601  * Performance monitoring functions
602  */
603
604 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
605                              int x, int y, int w, int h, int r, int g, int b)
606 {
607         u32 color;
608         RING_LOCALS;
609
610         x += dev_priv->sarea_priv->boxes[0].x1;
611         y += dev_priv->sarea_priv->boxes[0].y1;
612
613         switch (dev_priv->color_fmt) {
614         case RADEON_COLOR_FORMAT_RGB565:
615                 color = (((r & 0xf8) << 8) |
616                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
617                 break;
618         case RADEON_COLOR_FORMAT_ARGB8888:
619         default:
620                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
621                 break;
622         }
623
624         BEGIN_RING(4);
625         RADEON_WAIT_UNTIL_3D_IDLE();
626         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
627         OUT_RING(0xffffffff);
628         ADVANCE_RING();
629
630         BEGIN_RING(6);
631
632         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
633         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
634                  RADEON_GMC_BRUSH_SOLID_COLOR |
635                  (dev_priv->color_fmt << 8) |
636                  RADEON_GMC_SRC_DATATYPE_COLOR |
637                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
638
639         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
640                 OUT_RING(dev_priv->front_pitch_offset);
641         } else {
642                 OUT_RING(dev_priv->back_pitch_offset);
643         }
644
645         OUT_RING(color);
646
647         OUT_RING((x << 16) | y);
648         OUT_RING((w << 16) | h);
649
650         ADVANCE_RING();
651 }
652
653 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
654 {
655         /* Collapse various things into a wait flag -- trying to
656          * guess if userspase slept -- better just to have them tell us.
657          */
658         if (dev_priv->stats.last_frame_reads > 1 ||
659             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
660                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
661         }
662
663         if (dev_priv->stats.freelist_loops) {
664                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
665         }
666
667         /* Purple box for page flipping
668          */
669         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
670                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
671
672         /* Red box if we have to wait for idle at any point
673          */
674         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
675                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
676
677         /* Blue box: lost context?
678          */
679
680         /* Yellow box for texture swaps
681          */
682         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
683                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
684
685         /* Green box if hardware never idles (as far as we can tell)
686          */
687         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
688                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
689
690         /* Draw bars indicating number of buffers allocated
691          * (not a great measure, easily confused)
692          */
693         if (dev_priv->stats.requested_bufs) {
694                 if (dev_priv->stats.requested_bufs > 100)
695                         dev_priv->stats.requested_bufs = 100;
696
697                 radeon_clear_box(dev_priv, 4, 16,
698                                  dev_priv->stats.requested_bufs, 4,
699                                  196, 128, 128);
700         }
701
702         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
703
704 }
705
706 /* ================================================================
707  * CP command dispatch functions
708  */
709
710 static void radeon_cp_dispatch_clear(drm_device_t * dev,
711                                      drm_radeon_clear_t * clear,
712                                      drm_radeon_clear_rect_t * depth_boxes)
713 {
714         drm_radeon_private_t *dev_priv = dev->dev_private;
715         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
716         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
717         int nbox = sarea_priv->nbox;
718         drm_clip_rect_t *pbox = sarea_priv->boxes;
719         unsigned int flags = clear->flags;
720         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
721         int i;
722         RING_LOCALS;
723         DRM_DEBUG("flags = 0x%x\n", flags);
724
725         dev_priv->stats.clears++;
726
727         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
728                 unsigned int tmp = flags;
729
730                 flags &= ~(RADEON_FRONT | RADEON_BACK);
731                 if (tmp & RADEON_FRONT)
732                         flags |= RADEON_BACK;
733                 if (tmp & RADEON_BACK)
734                         flags |= RADEON_FRONT;
735         }
736
737         if (flags & (RADEON_FRONT | RADEON_BACK)) {
738
739                 BEGIN_RING(4);
740
741                 /* Ensure the 3D stream is idle before doing a
742                  * 2D fill to clear the front or back buffer.
743                  */
744                 RADEON_WAIT_UNTIL_3D_IDLE();
745
746                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
747                 OUT_RING(clear->color_mask);
748
749                 ADVANCE_RING();
750
751                 /* Make sure we restore the 3D state next time.
752                  */
753                 dev_priv->sarea_priv->ctx_owner = 0;
754
755                 for (i = 0; i < nbox; i++) {
756                         int x = pbox[i].x1;
757                         int y = pbox[i].y1;
758                         int w = pbox[i].x2 - x;
759                         int h = pbox[i].y2 - y;
760
761                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
762                                   x, y, w, h, flags);
763
764                         if (flags & RADEON_FRONT) {
765                                 BEGIN_RING(6);
766
767                                 OUT_RING(CP_PACKET3
768                                          (RADEON_CNTL_PAINT_MULTI, 4));
769                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
770                                          RADEON_GMC_BRUSH_SOLID_COLOR |
771                                          (dev_priv->
772                                           color_fmt << 8) |
773                                          RADEON_GMC_SRC_DATATYPE_COLOR |
774                                          RADEON_ROP3_P |
775                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
776
777                                 OUT_RING(dev_priv->front_pitch_offset);
778                                 OUT_RING(clear->clear_color);
779
780                                 OUT_RING((x << 16) | y);
781                                 OUT_RING((w << 16) | h);
782
783                                 ADVANCE_RING();
784                         }
785
786                         if (flags & RADEON_BACK) {
787                                 BEGIN_RING(6);
788
789                                 OUT_RING(CP_PACKET3
790                                          (RADEON_CNTL_PAINT_MULTI, 4));
791                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
792                                          RADEON_GMC_BRUSH_SOLID_COLOR |
793                                          (dev_priv->
794                                           color_fmt << 8) |
795                                          RADEON_GMC_SRC_DATATYPE_COLOR |
796                                          RADEON_ROP3_P |
797                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
798
799                                 OUT_RING(dev_priv->back_pitch_offset);
800                                 OUT_RING(clear->clear_color);
801
802                                 OUT_RING((x << 16) | y);
803                                 OUT_RING((w << 16) | h);
804
805                                 ADVANCE_RING();
806                         }
807                 }
808         }
809
810         /* hyper z clear */
811         /* no docs available, based on reverse engeneering by Stephane Marchesin */
812         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
813             && (flags & RADEON_CLEAR_FASTZ)) {
814
815                 int i;
816                 int depthpixperline =
817                     dev_priv->depth_fmt ==
818                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
819                                                        2) : (dev_priv->
820                                                              depth_pitch / 4);
821
822                 u32 clearmask;
823
824                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
825                     ((clear->depth_mask & 0xff) << 24);
826
827                 /* Make sure we restore the 3D state next time.
828                  * we haven't touched any "normal" state - still need this?
829                  */
830                 dev_priv->sarea_priv->ctx_owner = 0;
831
832                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
833                     && (flags & RADEON_USE_HIERZ)) {
834                         /* FIXME : reverse engineer that for Rx00 cards */
835                         /* FIXME : the mask supposedly contains low-res z values. So can't set
836                            just to the max (0xff? or actually 0x3fff?), need to take z clear
837                            value into account? */
838                         /* pattern seems to work for r100, though get slight
839                            rendering errors with glxgears. If hierz is not enabled for r100,
840                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
841                            other ones are ignored, and the same clear mask can be used. That's
842                            very different behaviour than R200 which needs different clear mask
843                            and different number of tiles to clear if hierz is enabled or not !?!
844                          */
845                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
846                 } else {
847                         /* clear mask : chooses the clearing pattern.
848                            rv250: could be used to clear only parts of macrotiles
849                            (but that would get really complicated...)?
850                            bit 0 and 1 (either or both of them ?!?!) are used to
851                            not clear tile (or maybe one of the bits indicates if the tile is
852                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
853                            Pattern is as follows:
854                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
855                            bits -------------------------------------------------
856                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
857                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
858                            covers 256 pixels ?!?
859                          */
860                         clearmask = 0x0;
861                 }
862
863                 BEGIN_RING(8);
864                 RADEON_WAIT_UNTIL_2D_IDLE();
865                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
866                              tempRB3D_DEPTHCLEARVALUE);
867                 /* what offset is this exactly ? */
868                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
869                 /* need ctlstat, otherwise get some strange black flickering */
870                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
871                              RADEON_RB3D_ZC_FLUSH_ALL);
872                 ADVANCE_RING();
873
874                 for (i = 0; i < nbox; i++) {
875                         int tileoffset, nrtilesx, nrtilesy, j;
876                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
877                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
878                             && !(dev_priv->microcode_version == UCODE_R200)) {
879                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
880                                    maybe r200 actually doesn't need to put the low-res z value into
881                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
882                                    Works for R100, both with hierz and without.
883                                    R100 seems to operate on 2x1 8x8 tiles, but...
884                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
885                                    problematic with resolutions which are not 64 pix aligned? */
886                                 tileoffset =
887                                     ((pbox[i].y1 >> 3) * depthpixperline +
888                                      pbox[i].x1) >> 6;
889                                 nrtilesx =
890                                     ((pbox[i].x2 & ~63) -
891                                      (pbox[i].x1 & ~63)) >> 4;
892                                 nrtilesy =
893                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
894                                 for (j = 0; j <= nrtilesy; j++) {
895                                         BEGIN_RING(4);
896                                         OUT_RING(CP_PACKET3
897                                                  (RADEON_3D_CLEAR_ZMASK, 2));
898                                         /* first tile */
899                                         OUT_RING(tileoffset * 8);
900                                         /* the number of tiles to clear */
901                                         OUT_RING(nrtilesx + 4);
902                                         /* clear mask : chooses the clearing pattern. */
903                                         OUT_RING(clearmask);
904                                         ADVANCE_RING();
905                                         tileoffset += depthpixperline >> 6;
906                                 }
907                         } else if (dev_priv->microcode_version == UCODE_R200) {
908                                 /* works for rv250. */
909                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
910                                 tileoffset =
911                                     ((pbox[i].y1 >> 3) * depthpixperline +
912                                      pbox[i].x1) >> 5;
913                                 nrtilesx =
914                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
915                                 nrtilesy =
916                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
917                                 for (j = 0; j <= nrtilesy; j++) {
918                                         BEGIN_RING(4);
919                                         OUT_RING(CP_PACKET3
920                                                  (RADEON_3D_CLEAR_ZMASK, 2));
921                                         /* first tile */
922                                         /* judging by the first tile offset needed, could possibly
923                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
924                                            macro tiles, though would still need clear mask for
925                                            right/bottom if truely 4x4 granularity is desired ? */
926                                         OUT_RING(tileoffset * 16);
927                                         /* the number of tiles to clear */
928                                         OUT_RING(nrtilesx + 1);
929                                         /* clear mask : chooses the clearing pattern. */
930                                         OUT_RING(clearmask);
931                                         ADVANCE_RING();
932                                         tileoffset += depthpixperline >> 5;
933                                 }
934                         } else {        /* rv 100 */
935                                 /* rv100 might not need 64 pix alignment, who knows */
936                                 /* offsets are, hmm, weird */
937                                 tileoffset =
938                                     ((pbox[i].y1 >> 4) * depthpixperline +
939                                      pbox[i].x1) >> 6;
940                                 nrtilesx =
941                                     ((pbox[i].x2 & ~63) -
942                                      (pbox[i].x1 & ~63)) >> 4;
943                                 nrtilesy =
944                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
945                                 for (j = 0; j <= nrtilesy; j++) {
946                                         BEGIN_RING(4);
947                                         OUT_RING(CP_PACKET3
948                                                  (RADEON_3D_CLEAR_ZMASK, 2));
949                                         OUT_RING(tileoffset * 128);
950                                         /* the number of tiles to clear */
951                                         OUT_RING(nrtilesx + 4);
952                                         /* clear mask : chooses the clearing pattern. */
953                                         OUT_RING(clearmask);
954                                         ADVANCE_RING();
955                                         tileoffset += depthpixperline >> 6;
956                                 }
957                         }
958                 }
959
960                 /* TODO don't always clear all hi-level z tiles */
961                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
962                     && (dev_priv->microcode_version == UCODE_R200)
963                     && (flags & RADEON_USE_HIERZ))
964                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
965                         /* FIXME : the mask supposedly contains low-res z values. So can't set
966                            just to the max (0xff? or actually 0x3fff?), need to take z clear
967                            value into account? */
968                 {
969                         BEGIN_RING(4);
970                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
971                         OUT_RING(0x0);  /* First tile */
972                         OUT_RING(0x3cc0);
973                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
974                         ADVANCE_RING();
975                 }
976         }
977
978         /* We have to clear the depth and/or stencil buffers by
979          * rendering a quad into just those buffers.  Thus, we have to
980          * make sure the 3D engine is configured correctly.
981          */
982         else if ((dev_priv->microcode_version == UCODE_R200) &&
983                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
984
985                 int tempPP_CNTL;
986                 int tempRE_CNTL;
987                 int tempRB3D_CNTL;
988                 int tempRB3D_ZSTENCILCNTL;
989                 int tempRB3D_STENCILREFMASK;
990                 int tempRB3D_PLANEMASK;
991                 int tempSE_CNTL;
992                 int tempSE_VTE_CNTL;
993                 int tempSE_VTX_FMT_0;
994                 int tempSE_VTX_FMT_1;
995                 int tempSE_VAP_CNTL;
996                 int tempRE_AUX_SCISSOR_CNTL;
997
998                 tempPP_CNTL = 0;
999                 tempRE_CNTL = 0;
1000
1001                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1002
1003                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1004                 tempRB3D_STENCILREFMASK = 0x0;
1005
1006                 tempSE_CNTL = depth_clear->se_cntl;
1007
1008                 /* Disable TCL */
1009
1010                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1011                                           (0x9 <<
1012                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1013
1014                 tempRB3D_PLANEMASK = 0x0;
1015
1016                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1017
1018                 tempSE_VTE_CNTL =
1019                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1020
1021                 /* Vertex format (X, Y, Z, W) */
1022                 tempSE_VTX_FMT_0 =
1023                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1024                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1025                 tempSE_VTX_FMT_1 = 0x0;
1026
1027                 /*
1028                  * Depth buffer specific enables
1029                  */
1030                 if (flags & RADEON_DEPTH) {
1031                         /* Enable depth buffer */
1032                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1033                 } else {
1034                         /* Disable depth buffer */
1035                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1036                 }
1037
1038                 /*
1039                  * Stencil buffer specific enables
1040                  */
1041                 if (flags & RADEON_STENCIL) {
1042                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1043                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1044                 } else {
1045                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1046                         tempRB3D_STENCILREFMASK = 0x00000000;
1047                 }
1048
1049                 if (flags & RADEON_USE_COMP_ZBUF) {
1050                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1051                             RADEON_Z_DECOMPRESSION_ENABLE;
1052                 }
1053                 if (flags & RADEON_USE_HIERZ) {
1054                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1055                 }
1056
1057                 BEGIN_RING(26);
1058                 RADEON_WAIT_UNTIL_2D_IDLE();
1059
1060                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1061                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1062                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1063                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1064                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1065                              tempRB3D_STENCILREFMASK);
1066                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1067                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1068                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1069                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1070                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1071                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1072                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1073                 ADVANCE_RING();
1074
1075                 /* Make sure we restore the 3D state next time.
1076                  */
1077                 dev_priv->sarea_priv->ctx_owner = 0;
1078
1079                 for (i = 0; i < nbox; i++) {
1080
1081                         /* Funny that this should be required --
1082                          *  sets top-left?
1083                          */
1084                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1085
1086                         BEGIN_RING(14);
1087                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1088                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1089                                   RADEON_PRIM_WALK_RING |
1090                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1091                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1092                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1093                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1094                         OUT_RING(0x3f800000);
1095                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1096                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1097                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1098                         OUT_RING(0x3f800000);
1099                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1100                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1101                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1102                         OUT_RING(0x3f800000);
1103                         ADVANCE_RING();
1104                 }
1105         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1106
1107                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1108
1109                 rb3d_cntl = depth_clear->rb3d_cntl;
1110
1111                 if (flags & RADEON_DEPTH) {
1112                         rb3d_cntl |= RADEON_Z_ENABLE;
1113                 } else {
1114                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1115                 }
1116
1117                 if (flags & RADEON_STENCIL) {
1118                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1119                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1120                 } else {
1121                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1122                         rb3d_stencilrefmask = 0x00000000;
1123                 }
1124
1125                 if (flags & RADEON_USE_COMP_ZBUF) {
1126                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1127                             RADEON_Z_DECOMPRESSION_ENABLE;
1128                 }
1129                 if (flags & RADEON_USE_HIERZ) {
1130                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1131                 }
1132
1133                 BEGIN_RING(13);
1134                 RADEON_WAIT_UNTIL_2D_IDLE();
1135
1136                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1137                 OUT_RING(0x00000000);
1138                 OUT_RING(rb3d_cntl);
1139
1140                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1141                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1142                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1143                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1144                 ADVANCE_RING();
1145
1146                 /* Make sure we restore the 3D state next time.
1147                  */
1148                 dev_priv->sarea_priv->ctx_owner = 0;
1149
1150                 for (i = 0; i < nbox; i++) {
1151
1152                         /* Funny that this should be required --
1153                          *  sets top-left?
1154                          */
1155                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1156
1157                         BEGIN_RING(15);
1158
1159                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1160                         OUT_RING(RADEON_VTX_Z_PRESENT |
1161                                  RADEON_VTX_PKCOLOR_PRESENT);
1162                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1163                                   RADEON_PRIM_WALK_RING |
1164                                   RADEON_MAOS_ENABLE |
1165                                   RADEON_VTX_FMT_RADEON_MODE |
1166                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1167
1168                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1169                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1170                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1171                         OUT_RING(0x0);
1172
1173                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1174                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1175                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1176                         OUT_RING(0x0);
1177
1178                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1179                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1180                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1181                         OUT_RING(0x0);
1182
1183                         ADVANCE_RING();
1184                 }
1185         }
1186
1187         /* Increment the clear counter.  The client-side 3D driver must
1188          * wait on this value before performing the clear ioctl.  We
1189          * need this because the card's so damned fast...
1190          */
1191         dev_priv->sarea_priv->last_clear++;
1192
1193         BEGIN_RING(4);
1194
1195         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1196         RADEON_WAIT_UNTIL_IDLE();
1197
1198         ADVANCE_RING();
1199 }
1200
1201 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1202 {
1203         drm_radeon_private_t *dev_priv = dev->dev_private;
1204         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1205         int nbox = sarea_priv->nbox;
1206         drm_clip_rect_t *pbox = sarea_priv->boxes;
1207         int i;
1208         RING_LOCALS;
1209         DRM_DEBUG("\n");
1210
1211         /* Do some trivial performance monitoring...
1212          */
1213         if (dev_priv->do_boxes)
1214                 radeon_cp_performance_boxes(dev_priv);
1215
1216         /* Wait for the 3D stream to idle before dispatching the bitblt.
1217          * This will prevent data corruption between the two streams.
1218          */
1219         BEGIN_RING(2);
1220
1221         RADEON_WAIT_UNTIL_3D_IDLE();
1222
1223         ADVANCE_RING();
1224
1225         for (i = 0; i < nbox; i++) {
1226                 int x = pbox[i].x1;
1227                 int y = pbox[i].y1;
1228                 int w = pbox[i].x2 - x;
1229                 int h = pbox[i].y2 - y;
1230
1231                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1232
1233                 BEGIN_RING(7);
1234
1235                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1236                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1237                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1238                          RADEON_GMC_BRUSH_NONE |
1239                          (dev_priv->color_fmt << 8) |
1240                          RADEON_GMC_SRC_DATATYPE_COLOR |
1241                          RADEON_ROP3_S |
1242                          RADEON_DP_SRC_SOURCE_MEMORY |
1243                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1244
1245                 /* Make this work even if front & back are flipped:
1246                  */
1247                 if (dev_priv->current_page == 0) {
1248                         OUT_RING(dev_priv->back_pitch_offset);
1249                         OUT_RING(dev_priv->front_pitch_offset);
1250                 } else {
1251                         OUT_RING(dev_priv->front_pitch_offset);
1252                         OUT_RING(dev_priv->back_pitch_offset);
1253                 }
1254
1255                 OUT_RING((x << 16) | y);
1256                 OUT_RING((x << 16) | y);
1257                 OUT_RING((w << 16) | h);
1258
1259                 ADVANCE_RING();
1260         }
1261
1262         /* Increment the frame counter.  The client-side 3D driver must
1263          * throttle the framerate by waiting for this value before
1264          * performing the swapbuffer ioctl.
1265          */
1266         dev_priv->sarea_priv->last_frame++;
1267
1268         BEGIN_RING(4);
1269
1270         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1271         RADEON_WAIT_UNTIL_2D_IDLE();
1272
1273         ADVANCE_RING();
1274 }
1275
1276 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1277 {
1278         drm_radeon_private_t *dev_priv = dev->dev_private;
1279         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1280         int offset = (dev_priv->current_page == 1)
1281             ? dev_priv->front_offset : dev_priv->back_offset;
1282         RING_LOCALS;
1283         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1284                   __FUNCTION__,
1285                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1286
1287         /* Do some trivial performance monitoring...
1288          */
1289         if (dev_priv->do_boxes) {
1290                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1291                 radeon_cp_performance_boxes(dev_priv);
1292         }
1293
1294         /* Update the frame offsets for both CRTCs
1295          */
1296         BEGIN_RING(6);
1297
1298         RADEON_WAIT_UNTIL_3D_IDLE();
1299         OUT_RING_REG(RADEON_CRTC_OFFSET,
1300                      ((sarea->frame.y * dev_priv->front_pitch +
1301                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1302                      + offset);
1303         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1304                      + offset);
1305
1306         ADVANCE_RING();
1307
1308         /* Increment the frame counter.  The client-side 3D driver must
1309          * throttle the framerate by waiting for this value before
1310          * performing the swapbuffer ioctl.
1311          */
1312         dev_priv->sarea_priv->last_frame++;
1313         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1314             1 - dev_priv->current_page;
1315
1316         BEGIN_RING(2);
1317
1318         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1319
1320         ADVANCE_RING();
1321 }
1322
1323 static int bad_prim_vertex_nr(int primitive, int nr)
1324 {
1325         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1326         case RADEON_PRIM_TYPE_NONE:
1327         case RADEON_PRIM_TYPE_POINT:
1328                 return nr < 1;
1329         case RADEON_PRIM_TYPE_LINE:
1330                 return (nr & 1) || nr == 0;
1331         case RADEON_PRIM_TYPE_LINE_STRIP:
1332                 return nr < 2;
1333         case RADEON_PRIM_TYPE_TRI_LIST:
1334         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1335         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1336         case RADEON_PRIM_TYPE_RECT_LIST:
1337                 return nr % 3 || nr == 0;
1338         case RADEON_PRIM_TYPE_TRI_FAN:
1339         case RADEON_PRIM_TYPE_TRI_STRIP:
1340                 return nr < 3;
1341         default:
1342                 return 1;
1343         }
1344 }
1345
1346 typedef struct {
1347         unsigned int start;
1348         unsigned int finish;
1349         unsigned int prim;
1350         unsigned int numverts;
1351         unsigned int offset;
1352         unsigned int vc_format;
1353 } drm_radeon_tcl_prim_t;
1354
1355 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1356                                       drm_buf_t * buf,
1357                                       drm_radeon_tcl_prim_t * prim)
1358 {
1359         drm_radeon_private_t *dev_priv = dev->dev_private;
1360         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1361         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1362         int numverts = (int)prim->numverts;
1363         int nbox = sarea_priv->nbox;
1364         int i = 0;
1365         RING_LOCALS;
1366
1367         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1368                   prim->prim,
1369                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1370
1371         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1372                 DRM_ERROR("bad prim %x numverts %d\n",
1373                           prim->prim, prim->numverts);
1374                 return;
1375         }
1376
1377         do {
1378                 /* Emit the next cliprect */
1379                 if (i < nbox) {
1380                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1381                 }
1382
1383                 /* Emit the vertex buffer rendering commands */
1384                 BEGIN_RING(5);
1385
1386                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1387                 OUT_RING(offset);
1388                 OUT_RING(numverts);
1389                 OUT_RING(prim->vc_format);
1390                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1391                          RADEON_COLOR_ORDER_RGBA |
1392                          RADEON_VTX_FMT_RADEON_MODE |
1393                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1394
1395                 ADVANCE_RING();
1396
1397                 i++;
1398         } while (i < nbox);
1399 }
1400
1401 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1402 {
1403         drm_radeon_private_t *dev_priv = dev->dev_private;
1404         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1405         RING_LOCALS;
1406
1407         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1408
1409         /* Emit the vertex buffer age */
1410         BEGIN_RING(2);
1411         RADEON_DISPATCH_AGE(buf_priv->age);
1412         ADVANCE_RING();
1413
1414         buf->pending = 1;
1415         buf->used = 0;
1416 }
1417
1418 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1419                                         drm_buf_t * buf, int start, int end)
1420 {
1421         drm_radeon_private_t *dev_priv = dev->dev_private;
1422         RING_LOCALS;
1423         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1424
1425         if (start != end) {
1426                 int offset = (dev_priv->gart_buffers_offset
1427                               + buf->offset + start);
1428                 int dwords = (end - start + 3) / sizeof(u32);
1429
1430                 /* Indirect buffer data must be an even number of
1431                  * dwords, so if we've been given an odd number we must
1432                  * pad the data with a Type-2 CP packet.
1433                  */
1434                 if (dwords & 1) {
1435                         u32 *data = (u32 *)
1436                             ((char *)dev->agp_buffer_map->handle
1437                              + buf->offset + start);
1438                         data[dwords++] = RADEON_CP_PACKET2;
1439                 }
1440
1441                 /* Fire off the indirect buffer */
1442                 BEGIN_RING(3);
1443
1444                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1445                 OUT_RING(offset);
1446                 OUT_RING(dwords);
1447
1448                 ADVANCE_RING();
1449         }
1450 }
1451
1452 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1453                                        drm_buf_t * elt_buf,
1454                                        drm_radeon_tcl_prim_t * prim)
1455 {
1456         drm_radeon_private_t *dev_priv = dev->dev_private;
1457         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1458         int offset = dev_priv->gart_buffers_offset + prim->offset;
1459         u32 *data;
1460         int dwords;
1461         int i = 0;
1462         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1463         int count = (prim->finish - start) / sizeof(u16);
1464         int nbox = sarea_priv->nbox;
1465
1466         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1467                   prim->prim,
1468                   prim->vc_format,
1469                   prim->start, prim->finish, prim->offset, prim->numverts);
1470
1471         if (bad_prim_vertex_nr(prim->prim, count)) {
1472                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1473                 return;
1474         }
1475
1476         if (start >= prim->finish || (prim->start & 0x7)) {
1477                 DRM_ERROR("buffer prim %d\n", prim->prim);
1478                 return;
1479         }
1480
1481         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1482
1483         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1484                         elt_buf->offset + prim->start);
1485
1486         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1487         data[1] = offset;
1488         data[2] = prim->numverts;
1489         data[3] = prim->vc_format;
1490         data[4] = (prim->prim |
1491                    RADEON_PRIM_WALK_IND |
1492                    RADEON_COLOR_ORDER_RGBA |
1493                    RADEON_VTX_FMT_RADEON_MODE |
1494                    (count << RADEON_NUM_VERTICES_SHIFT));
1495
1496         do {
1497                 if (i < nbox)
1498                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1499
1500                 radeon_cp_dispatch_indirect(dev, elt_buf,
1501                                             prim->start, prim->finish);
1502
1503                 i++;
1504         } while (i < nbox);
1505
1506 }
1507
1508 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1509
1510 static int radeon_cp_dispatch_texture(DRMFILE filp,
1511                                       drm_device_t * dev,
1512                                       drm_radeon_texture_t * tex,
1513                                       drm_radeon_tex_image_t * image)
1514 {
1515         drm_radeon_private_t *dev_priv = dev->dev_private;
1516         drm_file_t *filp_priv;
1517         drm_buf_t *buf;
1518         u32 format;
1519         u32 *buffer;
1520         const u8 __user *data;
1521         int size, dwords, tex_width, blit_width, spitch;
1522         u32 height;
1523         int i;
1524         u32 texpitch, microtile;
1525         u32 offset;
1526         RING_LOCALS;
1527
1528         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1529
1530         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1531                 DRM_ERROR("Invalid destination offset\n");
1532                 return DRM_ERR(EINVAL);
1533         }
1534
1535         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1536
1537         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1538          * up with the texture data from the host data blit, otherwise
1539          * part of the texture image may be corrupted.
1540          */
1541         BEGIN_RING(4);
1542         RADEON_FLUSH_CACHE();
1543         RADEON_WAIT_UNTIL_IDLE();
1544         ADVANCE_RING();
1545
1546         /* The compiler won't optimize away a division by a variable,
1547          * even if the only legal values are powers of two.  Thus, we'll
1548          * use a shift instead.
1549          */
1550         switch (tex->format) {
1551         case RADEON_TXFORMAT_ARGB8888:
1552         case RADEON_TXFORMAT_RGBA8888:
1553                 format = RADEON_COLOR_FORMAT_ARGB8888;
1554                 tex_width = tex->width * 4;
1555                 blit_width = image->width * 4;
1556                 break;
1557         case RADEON_TXFORMAT_AI88:
1558         case RADEON_TXFORMAT_ARGB1555:
1559         case RADEON_TXFORMAT_RGB565:
1560         case RADEON_TXFORMAT_ARGB4444:
1561         case RADEON_TXFORMAT_VYUY422:
1562         case RADEON_TXFORMAT_YVYU422:
1563                 format = RADEON_COLOR_FORMAT_RGB565;
1564                 tex_width = tex->width * 2;
1565                 blit_width = image->width * 2;
1566                 break;
1567         case RADEON_TXFORMAT_I8:
1568         case RADEON_TXFORMAT_RGB332:
1569                 format = RADEON_COLOR_FORMAT_CI8;
1570                 tex_width = tex->width * 1;
1571                 blit_width = image->width * 1;
1572                 break;
1573         default:
1574                 DRM_ERROR("invalid texture format %d\n", tex->format);
1575                 return DRM_ERR(EINVAL);
1576         }
1577         spitch = blit_width >> 6;
1578         if (spitch == 0 && image->height > 1)
1579                 return DRM_ERR(EINVAL);
1580
1581         texpitch = tex->pitch;
1582         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1583                 microtile = 1;
1584                 if (tex_width < 64) {
1585                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1586                         /* we got tiled coordinates, untile them */
1587                         image->x *= 2;
1588                 }
1589         } else
1590                 microtile = 0;
1591
1592         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1593
1594         do {
1595                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1596                           tex->offset >> 10, tex->pitch, tex->format,
1597                           image->x, image->y, image->width, image->height);
1598
1599                 /* Make a copy of some parameters in case we have to
1600                  * update them for a multi-pass texture blit.
1601                  */
1602                 height = image->height;
1603                 data = (const u8 __user *)image->data;
1604
1605                 size = height * blit_width;
1606
1607                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1608                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1609                         size = height * blit_width;
1610                 } else if (size < 4 && size > 0) {
1611                         size = 4;
1612                 } else if (size == 0) {
1613                         return 0;
1614                 }
1615
1616                 buf = radeon_freelist_get(dev);
1617                 if (0 && !buf) {
1618                         radeon_do_cp_idle(dev_priv);
1619                         buf = radeon_freelist_get(dev);
1620                 }
1621                 if (!buf) {
1622                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1623                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1624                                 return DRM_ERR(EFAULT);
1625                         return DRM_ERR(EAGAIN);
1626                 }
1627
1628                 /* Dispatch the indirect buffer.
1629                  */
1630                 buffer =
1631                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1632                 dwords = size / 4;
1633
1634 #define RADEON_COPY_MT(_buf, _data, _width) \
1635         do { \
1636                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1637                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1638                         return DRM_ERR(EFAULT); \
1639                 } \
1640         } while(0)
1641
1642                 if (microtile) {
1643                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1644                            however, we cannot use blitter directly for texture width < 64 bytes,
1645                            since minimum tex pitch is 64 bytes and we need this to match
1646                            the texture width, otherwise the blitter will tile it wrong.
1647                            Thus, tiling manually in this case. Additionally, need to special
1648                            case tex height = 1, since our actual image will have height 2
1649                            and we need to ensure we don't read beyond the texture size
1650                            from user space. */
1651                         if (tex->height == 1) {
1652                                 if (tex_width >= 64 || tex_width <= 16) {
1653                                         RADEON_COPY_MT(buffer, data,
1654                                                 (int)(tex_width * sizeof(u32)));
1655                                 } else if (tex_width == 32) {
1656                                         RADEON_COPY_MT(buffer, data, 16);
1657                                         RADEON_COPY_MT(buffer + 8,
1658                                                        data + 16, 16);
1659                                 }
1660                         } else if (tex_width >= 64 || tex_width == 16) {
1661                                 RADEON_COPY_MT(buffer, data,
1662                                                (int)(dwords * sizeof(u32)));
1663                         } else if (tex_width < 16) {
1664                                 for (i = 0; i < tex->height; i++) {
1665                                         RADEON_COPY_MT(buffer, data, tex_width);
1666                                         buffer += 4;
1667                                         data += tex_width;
1668                                 }
1669                         } else if (tex_width == 32) {
1670                                 /* TODO: make sure this works when not fitting in one buffer
1671                                    (i.e. 32bytes x 2048...) */
1672                                 for (i = 0; i < tex->height; i += 2) {
1673                                         RADEON_COPY_MT(buffer, data, 16);
1674                                         data += 16;
1675                                         RADEON_COPY_MT(buffer + 8, data, 16);
1676                                         data += 16;
1677                                         RADEON_COPY_MT(buffer + 4, data, 16);
1678                                         data += 16;
1679                                         RADEON_COPY_MT(buffer + 12, data, 16);
1680                                         data += 16;
1681                                         buffer += 16;
1682                                 }
1683                         }
1684                 } else {
1685                         if (tex_width >= 32) {
1686                                 /* Texture image width is larger than the minimum, so we
1687                                  * can upload it directly.
1688                                  */
1689                                 RADEON_COPY_MT(buffer, data,
1690                                                (int)(dwords * sizeof(u32)));
1691                         } else {
1692                                 /* Texture image width is less than the minimum, so we
1693                                  * need to pad out each image scanline to the minimum
1694                                  * width.
1695                                  */
1696                                 for (i = 0; i < tex->height; i++) {
1697                                         RADEON_COPY_MT(buffer, data, tex_width);
1698                                         buffer += 8;
1699                                         data += tex_width;
1700                                 }
1701                         }
1702                 }
1703
1704 #undef RADEON_COPY_MT
1705                 buf->filp = filp;
1706                 buf->used = size;
1707                 offset = dev_priv->gart_buffers_offset + buf->offset;
1708                 BEGIN_RING(9);
1709                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1710                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1711                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1712                          RADEON_GMC_BRUSH_NONE |
1713                          (format << 8) |
1714                          RADEON_GMC_SRC_DATATYPE_COLOR |
1715                          RADEON_ROP3_S |
1716                          RADEON_DP_SRC_SOURCE_MEMORY |
1717                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1718                 OUT_RING((spitch << 22) | (offset >> 10));
1719                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1720                 OUT_RING(0);
1721                 OUT_RING((image->x << 16) | image->y);
1722                 OUT_RING((image->width << 16) | height);
1723                 RADEON_WAIT_UNTIL_2D_IDLE();
1724                 ADVANCE_RING();
1725
1726                 radeon_cp_discard_buffer(dev, buf);
1727
1728                 /* Update the input parameters for next time */
1729                 image->y += height;
1730                 image->height -= height;
1731                 image->data = (const u8 __user *)image->data + size;
1732         } while (image->height > 0);
1733
1734         /* Flush the pixel cache after the blit completes.  This ensures
1735          * the texture data is written out to memory before rendering
1736          * continues.
1737          */
1738         BEGIN_RING(4);
1739         RADEON_FLUSH_CACHE();
1740         RADEON_WAIT_UNTIL_2D_IDLE();
1741         ADVANCE_RING();
1742         return 0;
1743 }
1744
1745 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1746 {
1747         drm_radeon_private_t *dev_priv = dev->dev_private;
1748         int i;
1749         RING_LOCALS;
1750         DRM_DEBUG("\n");
1751
1752         BEGIN_RING(35);
1753
1754         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1755         OUT_RING(0x00000000);
1756
1757         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1758         for (i = 0; i < 32; i++) {
1759                 OUT_RING(stipple[i]);
1760         }
1761
1762         ADVANCE_RING();
1763 }
1764
1765 static void radeon_apply_surface_regs(int surf_index,
1766                                       drm_radeon_private_t *dev_priv)
1767 {
1768         if (!dev_priv->mmio)
1769                 return;
1770
1771         radeon_do_cp_idle(dev_priv);
1772
1773         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1774                      dev_priv->surfaces[surf_index].flags);
1775         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1776                      dev_priv->surfaces[surf_index].lower);
1777         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1778                      dev_priv->surfaces[surf_index].upper);
1779 }
1780
1781 /* Allocates a virtual surface
1782  * doesn't always allocate a real surface, will stretch an existing
1783  * surface when possible.
1784  *
1785  * Note that refcount can be at most 2, since during a free refcount=3
1786  * might mean we have to allocate a new surface which might not always
1787  * be available.
1788  * For example : we allocate three contigous surfaces ABC. If B is
1789  * freed, we suddenly need two surfaces to store A and C, which might
1790  * not always be available.
1791  */
1792 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1793                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1794 {
1795         struct radeon_virt_surface *s;
1796         int i;
1797         int virt_surface_index;
1798         uint32_t new_upper, new_lower;
1799
1800         new_lower = new->address;
1801         new_upper = new_lower + new->size - 1;
1802
1803         /* sanity check */
1804         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1805             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1806              RADEON_SURF_ADDRESS_FIXED_MASK)
1807             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1808                 return -1;
1809
1810         /* make sure there is no overlap with existing surfaces */
1811         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1812                 if ((dev_priv->surfaces[i].refcount != 0) &&
1813                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1814                       (new_lower < dev_priv->surfaces[i].upper)) ||
1815                      ((new_lower < dev_priv->surfaces[i].lower) &&
1816                       (new_upper > dev_priv->surfaces[i].lower)))) {
1817                         return -1;
1818                 }
1819         }
1820
1821         /* find a virtual surface */
1822         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1823                 if (dev_priv->virt_surfaces[i].filp == 0)
1824                         break;
1825         if (i == 2 * RADEON_MAX_SURFACES) {
1826                 return -1;
1827         }
1828         virt_surface_index = i;
1829
1830         /* try to reuse an existing surface */
1831         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1832                 /* extend before */
1833                 if ((dev_priv->surfaces[i].refcount == 1) &&
1834                     (new->flags == dev_priv->surfaces[i].flags) &&
1835                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1836                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1837                         s->surface_index = i;
1838                         s->lower = new_lower;
1839                         s->upper = new_upper;
1840                         s->flags = new->flags;
1841                         s->filp = filp;
1842                         dev_priv->surfaces[i].refcount++;
1843                         dev_priv->surfaces[i].lower = s->lower;
1844                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1845                         return virt_surface_index;
1846                 }
1847
1848                 /* extend after */
1849                 if ((dev_priv->surfaces[i].refcount == 1) &&
1850                     (new->flags == dev_priv->surfaces[i].flags) &&
1851                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1852                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1853                         s->surface_index = i;
1854                         s->lower = new_lower;
1855                         s->upper = new_upper;
1856                         s->flags = new->flags;
1857                         s->filp = filp;
1858                         dev_priv->surfaces[i].refcount++;
1859                         dev_priv->surfaces[i].upper = s->upper;
1860                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1861                         return virt_surface_index;
1862                 }
1863         }
1864
1865         /* okay, we need a new one */
1866         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1867                 if (dev_priv->surfaces[i].refcount == 0) {
1868                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1869                         s->surface_index = i;
1870                         s->lower = new_lower;
1871                         s->upper = new_upper;
1872                         s->flags = new->flags;
1873                         s->filp = filp;
1874                         dev_priv->surfaces[i].refcount = 1;
1875                         dev_priv->surfaces[i].lower = s->lower;
1876                         dev_priv->surfaces[i].upper = s->upper;
1877                         dev_priv->surfaces[i].flags = s->flags;
1878                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1879                         return virt_surface_index;
1880                 }
1881         }
1882
1883         /* we didn't find anything */
1884         return -1;
1885 }
1886
1887 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1888                         int lower)
1889 {
1890         struct radeon_virt_surface *s;
1891         int i;
1892         /* find the virtual surface */
1893         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1894                 s = &(dev_priv->virt_surfaces[i]);
1895                 if (s->filp) {
1896                         if ((lower == s->lower) && (filp == s->filp)) {
1897                                 if (dev_priv->surfaces[s->surface_index].
1898                                     lower == s->lower)
1899                                         dev_priv->surfaces[s->surface_index].
1900                                             lower = s->upper;
1901
1902                                 if (dev_priv->surfaces[s->surface_index].
1903                                     upper == s->upper)
1904                                         dev_priv->surfaces[s->surface_index].
1905                                             upper = s->lower;
1906
1907                                 dev_priv->surfaces[s->surface_index].refcount--;
1908                                 if (dev_priv->surfaces[s->surface_index].
1909                                     refcount == 0)
1910                                         dev_priv->surfaces[s->surface_index].
1911                                             flags = 0;
1912                                 s->filp = NULL;
1913                                 radeon_apply_surface_regs(s->surface_index,
1914                                                           dev_priv);
1915                                 return 0;
1916                         }
1917                 }
1918         }
1919         return 1;
1920 }
1921
1922 static void radeon_surfaces_release(DRMFILE filp,
1923                                     drm_radeon_private_t * dev_priv)
1924 {
1925         int i;
1926         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1927                 if (dev_priv->virt_surfaces[i].filp == filp)
1928                         free_surface(filp, dev_priv,
1929                                      dev_priv->virt_surfaces[i].lower);
1930         }
1931 }
1932
1933 /* ================================================================
1934  * IOCTL functions
1935  */
1936 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1937 {
1938         DRM_DEVICE;
1939         drm_radeon_private_t *dev_priv = dev->dev_private;
1940         drm_radeon_surface_alloc_t alloc;
1941
1942         if (!dev_priv) {
1943                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1944                 return DRM_ERR(EINVAL);
1945         }
1946
1947         DRM_COPY_FROM_USER_IOCTL(alloc,
1948                                  (drm_radeon_surface_alloc_t __user *) data,
1949                                  sizeof(alloc));
1950
1951         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1952                 return DRM_ERR(EINVAL);
1953         else
1954                 return 0;
1955 }
1956
1957 static int radeon_surface_free(DRM_IOCTL_ARGS)
1958 {
1959         DRM_DEVICE;
1960         drm_radeon_private_t *dev_priv = dev->dev_private;
1961         drm_radeon_surface_free_t memfree;
1962
1963         if (!dev_priv) {
1964                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
1965                 return DRM_ERR(EINVAL);
1966         }
1967
1968         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
1969                                  sizeof(memfree));
1970
1971         if (free_surface(filp, dev_priv, memfree.address))
1972                 return DRM_ERR(EINVAL);
1973         else
1974                 return 0;
1975 }
1976
1977 static int radeon_cp_clear(DRM_IOCTL_ARGS)
1978 {
1979         DRM_DEVICE;
1980         drm_radeon_private_t *dev_priv = dev->dev_private;
1981         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1982         drm_radeon_clear_t clear;
1983         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1984         DRM_DEBUG("\n");
1985
1986         LOCK_TEST_WITH_RETURN(dev, filp);
1987
1988         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
1989                                  sizeof(clear));
1990
1991         RING_SPACE_TEST_WITH_RETURN(dev_priv);
1992
1993         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
1994                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1995
1996         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
1997                                sarea_priv->nbox * sizeof(depth_boxes[0])))
1998                 return DRM_ERR(EFAULT);
1999
2000         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2001
2002         COMMIT_RING();
2003         return 0;
2004 }
2005
2006 /* Not sure why this isn't set all the time:
2007  */
2008 static int radeon_do_init_pageflip(drm_device_t * dev)
2009 {
2010         drm_radeon_private_t *dev_priv = dev->dev_private;
2011         RING_LOCALS;
2012
2013         DRM_DEBUG("\n");
2014
2015         BEGIN_RING(6);
2016         RADEON_WAIT_UNTIL_3D_IDLE();
2017         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2018         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2019                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2020         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2021         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2022                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2023         ADVANCE_RING();
2024
2025         dev_priv->page_flipping = 1;
2026         dev_priv->current_page = 0;
2027         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2028
2029         return 0;
2030 }
2031
2032 /* Called whenever a client dies, from drm_release.
2033  * NOTE:  Lock isn't necessarily held when this is called!
2034  */
2035 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2036 {
2037         drm_radeon_private_t *dev_priv = dev->dev_private;
2038         DRM_DEBUG("\n");
2039
2040         if (dev_priv->current_page != 0)
2041                 radeon_cp_dispatch_flip(dev);
2042
2043         dev_priv->page_flipping = 0;
2044         return 0;
2045 }
2046
2047 /* Swapping and flipping are different operations, need different ioctls.
2048  * They can & should be intermixed to support multiple 3d windows.
2049  */
2050 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2051 {
2052         DRM_DEVICE;
2053         drm_radeon_private_t *dev_priv = dev->dev_private;
2054         DRM_DEBUG("\n");
2055
2056         LOCK_TEST_WITH_RETURN(dev, filp);
2057
2058         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2059
2060         if (!dev_priv->page_flipping)
2061                 radeon_do_init_pageflip(dev);
2062
2063         radeon_cp_dispatch_flip(dev);
2064
2065         COMMIT_RING();
2066         return 0;
2067 }
2068
2069 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2070 {
2071         DRM_DEVICE;
2072         drm_radeon_private_t *dev_priv = dev->dev_private;
2073         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2074         DRM_DEBUG("\n");
2075
2076         LOCK_TEST_WITH_RETURN(dev, filp);
2077
2078         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2079
2080         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2081                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2082
2083         radeon_cp_dispatch_swap(dev);
2084         dev_priv->sarea_priv->ctx_owner = 0;
2085
2086         COMMIT_RING();
2087         return 0;
2088 }
2089
2090 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2091 {
2092         DRM_DEVICE;
2093         drm_radeon_private_t *dev_priv = dev->dev_private;
2094         drm_file_t *filp_priv;
2095         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2096         drm_device_dma_t *dma = dev->dma;
2097         drm_buf_t *buf;
2098         drm_radeon_vertex_t vertex;
2099         drm_radeon_tcl_prim_t prim;
2100
2101         LOCK_TEST_WITH_RETURN(dev, filp);
2102
2103         if (!dev_priv) {
2104                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2105                 return DRM_ERR(EINVAL);
2106         }
2107
2108         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2109
2110         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2111                                  sizeof(vertex));
2112
2113         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2114                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2115
2116         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2117                 DRM_ERROR("buffer index %d (of %d max)\n",
2118                           vertex.idx, dma->buf_count - 1);
2119                 return DRM_ERR(EINVAL);
2120         }
2121         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2122                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2123                 return DRM_ERR(EINVAL);
2124         }
2125
2126         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2127         VB_AGE_TEST_WITH_RETURN(dev_priv);
2128
2129         buf = dma->buflist[vertex.idx];
2130
2131         if (buf->filp != filp) {
2132                 DRM_ERROR("process %d using buffer owned by %p\n",
2133                           DRM_CURRENTPID, buf->filp);
2134                 return DRM_ERR(EINVAL);
2135         }
2136         if (buf->pending) {
2137                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2138                 return DRM_ERR(EINVAL);
2139         }
2140
2141         /* Build up a prim_t record:
2142          */
2143         if (vertex.count) {
2144                 buf->used = vertex.count;       /* not used? */
2145
2146                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2147                         if (radeon_emit_state(dev_priv, filp_priv,
2148                                               &sarea_priv->context_state,
2149                                               sarea_priv->tex_state,
2150                                               sarea_priv->dirty)) {
2151                                 DRM_ERROR("radeon_emit_state failed\n");
2152                                 return DRM_ERR(EINVAL);
2153                         }
2154
2155                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2156                                                RADEON_UPLOAD_TEX1IMAGES |
2157                                                RADEON_UPLOAD_TEX2IMAGES |
2158                                                RADEON_REQUIRE_QUIESCENCE);
2159                 }
2160
2161                 prim.start = 0;
2162                 prim.finish = vertex.count;     /* unused */
2163                 prim.prim = vertex.prim;
2164                 prim.numverts = vertex.count;
2165                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2166
2167                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2168         }
2169
2170         if (vertex.discard) {
2171                 radeon_cp_discard_buffer(dev, buf);
2172         }
2173
2174         COMMIT_RING();
2175         return 0;
2176 }
2177
2178 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2179 {
2180         DRM_DEVICE;
2181         drm_radeon_private_t *dev_priv = dev->dev_private;
2182         drm_file_t *filp_priv;
2183         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2184         drm_device_dma_t *dma = dev->dma;
2185         drm_buf_t *buf;
2186         drm_radeon_indices_t elts;
2187         drm_radeon_tcl_prim_t prim;
2188         int count;
2189
2190         LOCK_TEST_WITH_RETURN(dev, filp);
2191
2192         if (!dev_priv) {
2193                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2194                 return DRM_ERR(EINVAL);
2195         }
2196
2197         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2198
2199         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2200                                  sizeof(elts));
2201
2202         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2203                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2204
2205         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2206                 DRM_ERROR("buffer index %d (of %d max)\n",
2207                           elts.idx, dma->buf_count - 1);
2208                 return DRM_ERR(EINVAL);
2209         }
2210         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2211                 DRM_ERROR("buffer prim %d\n", elts.prim);
2212                 return DRM_ERR(EINVAL);
2213         }
2214
2215         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2216         VB_AGE_TEST_WITH_RETURN(dev_priv);
2217
2218         buf = dma->buflist[elts.idx];
2219
2220         if (buf->filp != filp) {
2221                 DRM_ERROR("process %d using buffer owned by %p\n",
2222                           DRM_CURRENTPID, buf->filp);
2223                 return DRM_ERR(EINVAL);
2224         }
2225         if (buf->pending) {
2226                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2227                 return DRM_ERR(EINVAL);
2228         }
2229
2230         count = (elts.end - elts.start) / sizeof(u16);
2231         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2232
2233         if (elts.start & 0x7) {
2234                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2235                 return DRM_ERR(EINVAL);
2236         }
2237         if (elts.start < buf->used) {
2238                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2239                 return DRM_ERR(EINVAL);
2240         }
2241
2242         buf->used = elts.end;
2243
2244         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2245                 if (radeon_emit_state(dev_priv, filp_priv,
2246                                       &sarea_priv->context_state,
2247                                       sarea_priv->tex_state,
2248                                       sarea_priv->dirty)) {
2249                         DRM_ERROR("radeon_emit_state failed\n");
2250                         return DRM_ERR(EINVAL);
2251                 }
2252
2253                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2254                                        RADEON_UPLOAD_TEX1IMAGES |
2255                                        RADEON_UPLOAD_TEX2IMAGES |
2256                                        RADEON_REQUIRE_QUIESCENCE);
2257         }
2258
2259         /* Build up a prim_t record:
2260          */
2261         prim.start = elts.start;
2262         prim.finish = elts.end;
2263         prim.prim = elts.prim;
2264         prim.offset = 0;        /* offset from start of dma buffers */
2265         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2266         prim.vc_format = dev_priv->sarea_priv->vc_format;
2267
2268         radeon_cp_dispatch_indices(dev, buf, &prim);
2269         if (elts.discard) {
2270                 radeon_cp_discard_buffer(dev, buf);
2271         }
2272
2273         COMMIT_RING();
2274         return 0;
2275 }
2276
2277 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2278 {
2279         DRM_DEVICE;
2280         drm_radeon_private_t *dev_priv = dev->dev_private;
2281         drm_radeon_texture_t tex;
2282         drm_radeon_tex_image_t image;
2283         int ret;
2284
2285         LOCK_TEST_WITH_RETURN(dev, filp);
2286
2287         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2288                                  sizeof(tex));
2289
2290         if (tex.image == NULL) {
2291                 DRM_ERROR("null texture image!\n");
2292                 return DRM_ERR(EINVAL);
2293         }
2294
2295         if (DRM_COPY_FROM_USER(&image,
2296                                (drm_radeon_tex_image_t __user *) tex.image,
2297                                sizeof(image)))
2298                 return DRM_ERR(EFAULT);
2299
2300         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2301         VB_AGE_TEST_WITH_RETURN(dev_priv);
2302
2303         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2304
2305         COMMIT_RING();
2306         return ret;
2307 }
2308
2309 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2310 {
2311         DRM_DEVICE;
2312         drm_radeon_private_t *dev_priv = dev->dev_private;
2313         drm_radeon_stipple_t stipple;
2314         u32 mask[32];
2315
2316         LOCK_TEST_WITH_RETURN(dev, filp);
2317
2318         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2319                                  sizeof(stipple));
2320
2321         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2322                 return DRM_ERR(EFAULT);
2323
2324         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2325
2326         radeon_cp_dispatch_stipple(dev, mask);
2327
2328         COMMIT_RING();
2329         return 0;
2330 }
2331
2332 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2333 {
2334         DRM_DEVICE;
2335         drm_radeon_private_t *dev_priv = dev->dev_private;
2336         drm_device_dma_t *dma = dev->dma;
2337         drm_buf_t *buf;
2338         drm_radeon_indirect_t indirect;
2339         RING_LOCALS;
2340
2341         LOCK_TEST_WITH_RETURN(dev, filp);
2342
2343         if (!dev_priv) {
2344                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2345                 return DRM_ERR(EINVAL);
2346         }
2347
2348         DRM_COPY_FROM_USER_IOCTL(indirect,
2349                                  (drm_radeon_indirect_t __user *) data,
2350                                  sizeof(indirect));
2351
2352         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2353                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2354
2355         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2356                 DRM_ERROR("buffer index %d (of %d max)\n",
2357                           indirect.idx, dma->buf_count - 1);
2358                 return DRM_ERR(EINVAL);
2359         }
2360
2361         buf = dma->buflist[indirect.idx];
2362
2363         if (buf->filp != filp) {
2364                 DRM_ERROR("process %d using buffer owned by %p\n",
2365                           DRM_CURRENTPID, buf->filp);
2366                 return DRM_ERR(EINVAL);
2367         }
2368         if (buf->pending) {
2369                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2370                 return DRM_ERR(EINVAL);
2371         }
2372
2373         if (indirect.start < buf->used) {
2374                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2375                           indirect.start, buf->used);
2376                 return DRM_ERR(EINVAL);
2377         }
2378
2379         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2380         VB_AGE_TEST_WITH_RETURN(dev_priv);
2381
2382         buf->used = indirect.end;
2383
2384         /* Wait for the 3D stream to idle before the indirect buffer
2385          * containing 2D acceleration commands is processed.
2386          */
2387         BEGIN_RING(2);
2388
2389         RADEON_WAIT_UNTIL_3D_IDLE();
2390
2391         ADVANCE_RING();
2392
2393         /* Dispatch the indirect buffer full of commands from the
2394          * X server.  This is insecure and is thus only available to
2395          * privileged clients.
2396          */
2397         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2398         if (indirect.discard) {
2399                 radeon_cp_discard_buffer(dev, buf);
2400         }
2401
2402         COMMIT_RING();
2403         return 0;
2404 }
2405
2406 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2407 {
2408         DRM_DEVICE;
2409         drm_radeon_private_t *dev_priv = dev->dev_private;
2410         drm_file_t *filp_priv;
2411         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2412         drm_device_dma_t *dma = dev->dma;
2413         drm_buf_t *buf;
2414         drm_radeon_vertex2_t vertex;
2415         int i;
2416         unsigned char laststate;
2417
2418         LOCK_TEST_WITH_RETURN(dev, filp);
2419
2420         if (!dev_priv) {
2421                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2422                 return DRM_ERR(EINVAL);
2423         }
2424
2425         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2426
2427         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2428                                  sizeof(vertex));
2429
2430         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2431                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2432
2433         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2434                 DRM_ERROR("buffer index %d (of %d max)\n",
2435                           vertex.idx, dma->buf_count - 1);
2436                 return DRM_ERR(EINVAL);
2437         }
2438
2439         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2440         VB_AGE_TEST_WITH_RETURN(dev_priv);
2441
2442         buf = dma->buflist[vertex.idx];
2443
2444         if (buf->filp != filp) {
2445                 DRM_ERROR("process %d using buffer owned by %p\n",
2446                           DRM_CURRENTPID, buf->filp);
2447                 return DRM_ERR(EINVAL);
2448         }
2449
2450         if (buf->pending) {
2451                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2452                 return DRM_ERR(EINVAL);
2453         }
2454
2455         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2456                 return DRM_ERR(EINVAL);
2457
2458         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2459                 drm_radeon_prim_t prim;
2460                 drm_radeon_tcl_prim_t tclprim;
2461
2462                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2463                         return DRM_ERR(EFAULT);
2464
2465                 if (prim.stateidx != laststate) {
2466                         drm_radeon_state_t state;
2467
2468                         if (DRM_COPY_FROM_USER(&state,
2469                                                &vertex.state[prim.stateidx],
2470                                                sizeof(state)))
2471                                 return DRM_ERR(EFAULT);
2472
2473                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2474                                 DRM_ERROR("radeon_emit_state2 failed\n");
2475                                 return DRM_ERR(EINVAL);
2476                         }
2477
2478                         laststate = prim.stateidx;
2479                 }
2480
2481                 tclprim.start = prim.start;
2482                 tclprim.finish = prim.finish;
2483                 tclprim.prim = prim.prim;
2484                 tclprim.vc_format = prim.vc_format;
2485
2486                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2487                         tclprim.offset = prim.numverts * 64;
2488                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2489
2490                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2491                 } else {
2492                         tclprim.numverts = prim.numverts;
2493                         tclprim.offset = 0;     /* not used */
2494
2495                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2496                 }
2497
2498                 if (sarea_priv->nbox == 1)
2499                         sarea_priv->nbox = 0;
2500         }
2501
2502         if (vertex.discard) {
2503                 radeon_cp_discard_buffer(dev, buf);
2504         }
2505
2506         COMMIT_RING();
2507         return 0;
2508 }
2509
2510 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2511                                drm_file_t * filp_priv,
2512                                drm_radeon_cmd_header_t header,
2513                                drm_radeon_kcmd_buffer_t *cmdbuf)
2514 {
2515         int id = (int)header.packet.packet_id;
2516         int sz, reg;
2517         int *data = (int *)cmdbuf->buf;
2518         RING_LOCALS;
2519
2520         if (id >= RADEON_MAX_STATE_PACKETS)
2521                 return DRM_ERR(EINVAL);
2522
2523         sz = packet[id].len;
2524         reg = packet[id].start;
2525
2526         if (sz * sizeof(int) > cmdbuf->bufsz) {
2527                 DRM_ERROR("Packet size provided larger than data provided\n");
2528                 return DRM_ERR(EINVAL);
2529         }
2530
2531         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2532                 DRM_ERROR("Packet verification failed\n");
2533                 return DRM_ERR(EINVAL);
2534         }
2535
2536         BEGIN_RING(sz + 1);
2537         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2538         OUT_RING_TABLE(data, sz);
2539         ADVANCE_RING();
2540
2541         cmdbuf->buf += sz * sizeof(int);
2542         cmdbuf->bufsz -= sz * sizeof(int);
2543         return 0;
2544 }
2545
2546 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2547                                           drm_radeon_cmd_header_t header,
2548                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2549 {
2550         int sz = header.scalars.count;
2551         int start = header.scalars.offset;
2552         int stride = header.scalars.stride;
2553         RING_LOCALS;
2554
2555         BEGIN_RING(3 + sz);
2556         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2557         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2558         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2559         OUT_RING_TABLE(cmdbuf->buf, sz);
2560         ADVANCE_RING();
2561         cmdbuf->buf += sz * sizeof(int);
2562         cmdbuf->bufsz -= sz * sizeof(int);
2563         return 0;
2564 }
2565
2566 /* God this is ugly
2567  */
2568 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2569                                            drm_radeon_cmd_header_t header,
2570                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2571 {
2572         int sz = header.scalars.count;
2573         int start = ((unsigned int)header.scalars.offset) + 0x100;
2574         int stride = header.scalars.stride;
2575         RING_LOCALS;
2576
2577         BEGIN_RING(3 + sz);
2578         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2579         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2580         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2581         OUT_RING_TABLE(cmdbuf->buf, sz);
2582         ADVANCE_RING();
2583         cmdbuf->buf += sz * sizeof(int);
2584         cmdbuf->bufsz -= sz * sizeof(int);
2585         return 0;
2586 }
2587
2588 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2589                                           drm_radeon_cmd_header_t header,
2590                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2591 {
2592         int sz = header.vectors.count;
2593         int start = header.vectors.offset;
2594         int stride = header.vectors.stride;
2595         RING_LOCALS;
2596
2597         BEGIN_RING(3 + sz);
2598         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2599         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2600         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2601         OUT_RING_TABLE(cmdbuf->buf, sz);
2602         ADVANCE_RING();
2603
2604         cmdbuf->buf += sz * sizeof(int);
2605         cmdbuf->bufsz -= sz * sizeof(int);
2606         return 0;
2607 }
2608
2609 static int radeon_emit_packet3(drm_device_t * dev,
2610                                drm_file_t * filp_priv,
2611                                drm_radeon_kcmd_buffer_t *cmdbuf)
2612 {
2613         drm_radeon_private_t *dev_priv = dev->dev_private;
2614         unsigned int cmdsz;
2615         int ret;
2616         RING_LOCALS;
2617
2618         DRM_DEBUG("\n");
2619
2620         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2621                                                   cmdbuf, &cmdsz))) {
2622                 DRM_ERROR("Packet verification failed\n");
2623                 return ret;
2624         }
2625
2626         BEGIN_RING(cmdsz);
2627         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2628         ADVANCE_RING();
2629
2630         cmdbuf->buf += cmdsz * 4;
2631         cmdbuf->bufsz -= cmdsz * 4;
2632         return 0;
2633 }
2634
2635 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2636                                         drm_file_t *filp_priv,
2637                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2638                                         int orig_nbox)
2639 {
2640         drm_radeon_private_t *dev_priv = dev->dev_private;
2641         drm_clip_rect_t box;
2642         unsigned int cmdsz;
2643         int ret;
2644         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2645         int i = 0;
2646         RING_LOCALS;
2647
2648         DRM_DEBUG("\n");
2649
2650         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2651                                                   cmdbuf, &cmdsz))) {
2652                 DRM_ERROR("Packet verification failed\n");
2653                 return ret;
2654         }
2655
2656         if (!orig_nbox)
2657                 goto out;
2658
2659         do {
2660                 if (i < cmdbuf->nbox) {
2661                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2662                                 return DRM_ERR(EFAULT);
2663                         /* FIXME The second and subsequent times round
2664                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2665                          * calling emit_clip_rect(). This fixes a
2666                          * lockup on fast machines when sending
2667                          * several cliprects with a cmdbuf, as when
2668                          * waving a 2D window over a 3D
2669                          * window. Something in the commands from user
2670                          * space seems to hang the card when they're
2671                          * sent several times in a row. That would be
2672                          * the correct place to fix it but this works
2673                          * around it until I can figure that out - Tim
2674                          * Smith */
2675                         if (i) {
2676                                 BEGIN_RING(2);
2677                                 RADEON_WAIT_UNTIL_3D_IDLE();
2678                                 ADVANCE_RING();
2679                         }
2680                         radeon_emit_clip_rect(dev_priv, &box);
2681                 }
2682
2683                 BEGIN_RING(cmdsz);
2684                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2685                 ADVANCE_RING();
2686
2687         } while (++i < cmdbuf->nbox);
2688         if (cmdbuf->nbox == 1)
2689                 cmdbuf->nbox = 0;
2690
2691       out:
2692         cmdbuf->buf += cmdsz * 4;
2693         cmdbuf->bufsz -= cmdsz * 4;
2694         return 0;
2695 }
2696
2697 static int radeon_emit_wait(drm_device_t * dev, int flags)
2698 {
2699         drm_radeon_private_t *dev_priv = dev->dev_private;
2700         RING_LOCALS;
2701
2702         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2703         switch (flags) {
2704         case RADEON_WAIT_2D:
2705                 BEGIN_RING(2);
2706                 RADEON_WAIT_UNTIL_2D_IDLE();
2707                 ADVANCE_RING();
2708                 break;
2709         case RADEON_WAIT_3D:
2710                 BEGIN_RING(2);
2711                 RADEON_WAIT_UNTIL_3D_IDLE();
2712                 ADVANCE_RING();
2713                 break;
2714         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2715                 BEGIN_RING(2);
2716                 RADEON_WAIT_UNTIL_IDLE();
2717                 ADVANCE_RING();
2718                 break;
2719         default:
2720                 return DRM_ERR(EINVAL);
2721         }
2722
2723         return 0;
2724 }
2725
2726 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2727 {
2728         DRM_DEVICE;
2729         drm_radeon_private_t *dev_priv = dev->dev_private;
2730         drm_file_t *filp_priv;
2731         drm_device_dma_t *dma = dev->dma;
2732         drm_buf_t *buf = NULL;
2733         int idx;
2734         drm_radeon_kcmd_buffer_t cmdbuf;
2735         drm_radeon_cmd_header_t header;
2736         int orig_nbox, orig_bufsz;
2737         char *kbuf = NULL;
2738
2739         LOCK_TEST_WITH_RETURN(dev, filp);
2740
2741         if (!dev_priv) {
2742                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2743                 return DRM_ERR(EINVAL);
2744         }
2745
2746         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2747
2748         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2749                                  (drm_radeon_cmd_buffer_t __user *) data,
2750                                  sizeof(cmdbuf));
2751
2752         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2753         VB_AGE_TEST_WITH_RETURN(dev_priv);
2754
2755         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2756                 return DRM_ERR(EINVAL);
2757         }
2758
2759         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2760          * races between checking values and using those values in other code,
2761          * and simply to avoid a lot of function calls to copy in data.
2762          */
2763         orig_bufsz = cmdbuf.bufsz;
2764         if (orig_bufsz != 0) {
2765                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2766                 if (kbuf == NULL)
2767                         return DRM_ERR(ENOMEM);
2768                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2769                                        cmdbuf.bufsz)) {
2770                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2771                         return DRM_ERR(EFAULT);
2772                 }
2773                 cmdbuf.buf = kbuf;
2774         }
2775
2776         orig_nbox = cmdbuf.nbox;
2777
2778         if (dev_priv->microcode_version == UCODE_R300) {
2779                 int temp;
2780                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2781
2782                 if (orig_bufsz != 0)
2783                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2784
2785                 return temp;
2786         }
2787
2788         /* microcode_version != r300 */
2789         while (cmdbuf.bufsz >= sizeof(header)) {
2790
2791                 header.i = *(int *)cmdbuf.buf;
2792                 cmdbuf.buf += sizeof(header);
2793                 cmdbuf.bufsz -= sizeof(header);
2794
2795                 switch (header.header.cmd_type) {
2796                 case RADEON_CMD_PACKET:
2797                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2798                         if (radeon_emit_packets
2799                             (dev_priv, filp_priv, header, &cmdbuf)) {
2800                                 DRM_ERROR("radeon_emit_packets failed\n");
2801                                 goto err;
2802                         }
2803                         break;
2804
2805                 case RADEON_CMD_SCALARS:
2806                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2807                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2808                                 DRM_ERROR("radeon_emit_scalars failed\n");
2809                                 goto err;
2810                         }
2811                         break;
2812
2813                 case RADEON_CMD_VECTORS:
2814                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2815                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2816                                 DRM_ERROR("radeon_emit_vectors failed\n");
2817                                 goto err;
2818                         }
2819                         break;
2820
2821                 case RADEON_CMD_DMA_DISCARD:
2822                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2823                         idx = header.dma.buf_idx;
2824                         if (idx < 0 || idx >= dma->buf_count) {
2825                                 DRM_ERROR("buffer index %d (of %d max)\n",
2826                                           idx, dma->buf_count - 1);
2827                                 goto err;
2828                         }
2829
2830                         buf = dma->buflist[idx];
2831                         if (buf->filp != filp || buf->pending) {
2832                                 DRM_ERROR("bad buffer %p %p %d\n",
2833                                           buf->filp, filp, buf->pending);
2834                                 goto err;
2835                         }
2836
2837                         radeon_cp_discard_buffer(dev, buf);
2838                         break;
2839
2840                 case RADEON_CMD_PACKET3:
2841                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2842                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2843                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2844                                 goto err;
2845                         }
2846                         break;
2847
2848                 case RADEON_CMD_PACKET3_CLIP:
2849                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2850                         if (radeon_emit_packet3_cliprect
2851                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2852                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2853                                 goto err;
2854                         }
2855                         break;
2856
2857                 case RADEON_CMD_SCALARS2:
2858                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2859                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2860                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2861                                 goto err;
2862                         }
2863                         break;
2864
2865                 case RADEON_CMD_WAIT:
2866                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2867                         if (radeon_emit_wait(dev, header.wait.flags)) {
2868                                 DRM_ERROR("radeon_emit_wait failed\n");
2869                                 goto err;
2870                         }
2871                         break;
2872                 default:
2873                         DRM_ERROR("bad cmd_type %d at %p\n",
2874                                   header.header.cmd_type,
2875                                   cmdbuf.buf - sizeof(header));
2876                         goto err;
2877                 }
2878         }
2879
2880         if (orig_bufsz != 0)
2881                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2882
2883         DRM_DEBUG("DONE\n");
2884         COMMIT_RING();
2885         return 0;
2886
2887       err:
2888         if (orig_bufsz != 0)
2889                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2890         return DRM_ERR(EINVAL);
2891 }
2892
2893 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2894 {
2895         DRM_DEVICE;
2896         drm_radeon_private_t *dev_priv = dev->dev_private;
2897         drm_radeon_getparam_t param;
2898         int value;
2899
2900         if (!dev_priv) {
2901                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2902                 return DRM_ERR(EINVAL);
2903         }
2904
2905         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2906                                  sizeof(param));
2907
2908         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2909
2910         switch (param.param) {
2911         case RADEON_PARAM_GART_BUFFER_OFFSET:
2912                 value = dev_priv->gart_buffers_offset;
2913                 break;
2914         case RADEON_PARAM_LAST_FRAME:
2915                 dev_priv->stats.last_frame_reads++;
2916                 value = GET_SCRATCH(0);
2917                 break;
2918         case RADEON_PARAM_LAST_DISPATCH:
2919                 value = GET_SCRATCH(1);
2920                 break;
2921         case RADEON_PARAM_LAST_CLEAR:
2922                 dev_priv->stats.last_clear_reads++;
2923                 value = GET_SCRATCH(2);
2924                 break;
2925         case RADEON_PARAM_IRQ_NR:
2926                 value = dev->irq;
2927                 break;
2928         case RADEON_PARAM_GART_BASE:
2929                 value = dev_priv->gart_vm_start;
2930                 break;
2931         case RADEON_PARAM_REGISTER_HANDLE:
2932                 value = dev_priv->mmio->offset;
2933                 break;
2934         case RADEON_PARAM_STATUS_HANDLE:
2935                 value = dev_priv->ring_rptr_offset;
2936                 break;
2937 #if BITS_PER_LONG == 32
2938                 /*
2939                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2940                  * pointer which can't fit into an int-sized variable.  According to
2941                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2942                  * not supporting it shouldn't be a problem.  If the same functionality
2943                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2944                  * so backwards-compatibility for the embedded platforms can be
2945                  * maintained.  --davidm 4-Feb-2004.
2946                  */
2947         case RADEON_PARAM_SAREA_HANDLE:
2948                 /* The lock is the first dword in the sarea. */
2949                 value = (long)dev->lock.hw_lock;
2950                 break;
2951 #endif
2952         case RADEON_PARAM_GART_TEX_HANDLE:
2953                 value = dev_priv->gart_textures_offset;
2954                 break;
2955         
2956         case RADEON_PARAM_CARD_TYPE:
2957                 if (dev_priv->flags & CHIP_IS_PCIE)
2958                         value = RADEON_CARD_PCIE;
2959                 else if (dev_priv->flags & CHIP_IS_AGP)
2960                         value = RADEON_CARD_AGP;
2961                 else
2962                         value = RADEON_CARD_PCI;
2963                 break;
2964         default:
2965                 return DRM_ERR(EINVAL);
2966         }
2967
2968         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2969                 DRM_ERROR("copy_to_user\n");
2970                 return DRM_ERR(EFAULT);
2971         }
2972
2973         return 0;
2974 }
2975
2976 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
2977 {
2978         DRM_DEVICE;
2979         drm_radeon_private_t *dev_priv = dev->dev_private;
2980         drm_file_t *filp_priv;
2981         drm_radeon_setparam_t sp;
2982         struct drm_radeon_driver_file_fields *radeon_priv;
2983
2984         if (!dev_priv) {
2985                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2986                 return DRM_ERR(EINVAL);
2987         }
2988
2989         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2990
2991         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
2992                                  sizeof(sp));
2993
2994         switch (sp.param) {
2995         case RADEON_SETPARAM_FB_LOCATION:
2996                 radeon_priv = filp_priv->driver_priv;
2997                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2998                 break;
2999         case RADEON_SETPARAM_SWITCH_TILING:
3000                 if (sp.value == 0) {
3001                         DRM_DEBUG("color tiling disabled\n");
3002                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3003                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3004                         dev_priv->sarea_priv->tiling_enabled = 0;
3005                 } else if (sp.value == 1) {
3006                         DRM_DEBUG("color tiling enabled\n");
3007                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3008                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3009                         dev_priv->sarea_priv->tiling_enabled = 1;
3010                 }
3011                 break;
3012         case RADEON_SETPARAM_PCIGART_LOCATION:
3013                 dev_priv->pcigart_offset = sp.value;
3014                 break;
3015         default:
3016                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3017                 return DRM_ERR(EINVAL);
3018         }
3019
3020         return 0;
3021 }
3022
3023 /* When a client dies:
3024  *    - Check for and clean up flipped page state
3025  *    - Free any alloced GART memory.
3026  *    - Free any alloced radeon surfaces.
3027  *
3028  * DRM infrastructure takes care of reclaiming dma buffers.
3029  */
3030 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3031 {
3032         if (dev->dev_private) {
3033                 drm_radeon_private_t *dev_priv = dev->dev_private;
3034                 if (dev_priv->page_flipping) {
3035                         radeon_do_cleanup_pageflip(dev);
3036                 }
3037                 radeon_mem_release(filp, dev_priv->gart_heap);
3038                 radeon_mem_release(filp, dev_priv->fb_heap);
3039                 radeon_surfaces_release(filp, dev_priv);
3040         }
3041 }
3042
3043 void radeon_driver_lastclose(drm_device_t * dev)
3044 {
3045         radeon_do_release(dev);
3046 }
3047
3048 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3049 {
3050         drm_radeon_private_t *dev_priv = dev->dev_private;
3051         struct drm_radeon_driver_file_fields *radeon_priv;
3052
3053         DRM_DEBUG("\n");
3054         radeon_priv =
3055             (struct drm_radeon_driver_file_fields *)
3056             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3057
3058         if (!radeon_priv)
3059                 return -ENOMEM;
3060
3061         filp_priv->driver_priv = radeon_priv;
3062
3063         if (dev_priv)
3064                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3065         else
3066                 radeon_priv->radeon_fb_delta = 0;
3067         return 0;
3068 }
3069
3070 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3071 {
3072         struct drm_radeon_driver_file_fields *radeon_priv =
3073             filp_priv->driver_priv;
3074
3075         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3076 }
3077
3078 drm_ioctl_desc_t radeon_ioctls[] = {
3079         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3080         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3081         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3082         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3083         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3084         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3085         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3086         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3087         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3088         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3089         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3090         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3091         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3092         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3093         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3094         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3095         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3096         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3097         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3098         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3099         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3100         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3101         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3102         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3103         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3104         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3105         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3106 };
3107
3108 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);