Merge branch 'for_paulus' of git://git.kernel.org/pub/scm/linux/kernel/git/galak...
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         /* Hrm ... the story of the offset ... So this function converts
49          * the various ideas of what userland clients might have for an
50          * offset in the card address space into an offset into the card
51          * address space :) So with a sane client, it should just keep
52          * the value intact and just do some boundary checking. However,
53          * not all clients are sane. Some older clients pass us 0 based
54          * offsets relative to the start of the framebuffer and some may
55          * assume the AGP aperture it appended to the framebuffer, so we
56          * try to detect those cases and fix them up.
57          *
58          * Note: It might be a good idea here to make sure the offset lands
59          * in some "allowed" area to protect things like the PCIE GART...
60          */
61
62         /* First, the best case, the offset already lands in either the
63          * framebuffer or the GART mapped space
64          */
65         if ((off >= dev_priv->fb_location &&
66              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67             (off >= dev_priv->gart_vm_start &&
68              off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
69                 return 0;
70
71         /* Ok, that didn't happen... now check if we have a zero based
72          * offset that fits in the framebuffer + gart space, apply the
73          * magic offset we get from SETPARAM or calculated from fb_location
74          */
75         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76                 radeon_priv = filp_priv->driver_priv;
77                 off += radeon_priv->radeon_fb_delta;
78         }
79
80         /* Finally, assume we aimed at a GART offset if beyond the fb */
81         if (off > (dev_priv->fb_location + dev_priv->fb_size))
82                 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83                         dev_priv->gart_vm_start;
84
85         /* Now recheck and fail if out of bounds */
86         if ((off >= dev_priv->fb_location &&
87              off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88             (off >= dev_priv->gart_vm_start &&
89              off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90                 DRM_DEBUG("offset fixed up to 0x%x\n", off);
91                 *offset = off;
92                 return 0;
93         }
94         return DRM_ERR(EINVAL);
95 }
96
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98                                                      dev_priv,
99                                                      drm_file_t * filp_priv,
100                                                      int id, u32 *data)
101 {
102         switch (id) {
103
104         case RADEON_EMIT_PP_MISC:
105                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107                         DRM_ERROR("Invalid depth buffer offset\n");
108                         return DRM_ERR(EINVAL);
109                 }
110                 break;
111
112         case RADEON_EMIT_PP_CNTL:
113                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115                         DRM_ERROR("Invalid colour buffer offset\n");
116                         return DRM_ERR(EINVAL);
117                 }
118                 break;
119
120         case R200_EMIT_PP_TXOFFSET_0:
121         case R200_EMIT_PP_TXOFFSET_1:
122         case R200_EMIT_PP_TXOFFSET_2:
123         case R200_EMIT_PP_TXOFFSET_3:
124         case R200_EMIT_PP_TXOFFSET_4:
125         case R200_EMIT_PP_TXOFFSET_5:
126                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127                                                   &data[0])) {
128                         DRM_ERROR("Invalid R200 texture offset\n");
129                         return DRM_ERR(EINVAL);
130                 }
131                 break;
132
133         case RADEON_EMIT_PP_TXFILTER_0:
134         case RADEON_EMIT_PP_TXFILTER_1:
135         case RADEON_EMIT_PP_TXFILTER_2:
136                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138                         DRM_ERROR("Invalid R100 texture offset\n");
139                         return DRM_ERR(EINVAL);
140                 }
141                 break;
142
143         case R200_EMIT_PP_CUBIC_OFFSETS_0:
144         case R200_EMIT_PP_CUBIC_OFFSETS_1:
145         case R200_EMIT_PP_CUBIC_OFFSETS_2:
146         case R200_EMIT_PP_CUBIC_OFFSETS_3:
147         case R200_EMIT_PP_CUBIC_OFFSETS_4:
148         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149                         int i;
150                         for (i = 0; i < 5; i++) {
151                                 if (radeon_check_and_fixup_offset(dev_priv,
152                                                                   filp_priv,
153                                                                   &data[i])) {
154                                         DRM_ERROR
155                                             ("Invalid R200 cubic texture offset\n");
156                                         return DRM_ERR(EINVAL);
157                                 }
158                         }
159                         break;
160                 }
161
162         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165                         int i;
166                         for (i = 0; i < 5; i++) {
167                                 if (radeon_check_and_fixup_offset(dev_priv,
168                                                                   filp_priv,
169                                                                   &data[i])) {
170                                         DRM_ERROR
171                                             ("Invalid R100 cubic texture offset\n");
172                                         return DRM_ERR(EINVAL);
173                                 }
174                         }
175                 }
176                 break;
177
178         case RADEON_EMIT_RB3D_COLORPITCH:
179         case RADEON_EMIT_RE_LINE_PATTERN:
180         case RADEON_EMIT_SE_LINE_WIDTH:
181         case RADEON_EMIT_PP_LUM_MATRIX:
182         case RADEON_EMIT_PP_ROT_MATRIX_0:
183         case RADEON_EMIT_RB3D_STENCILREFMASK:
184         case RADEON_EMIT_SE_VPORT_XSCALE:
185         case RADEON_EMIT_SE_CNTL:
186         case RADEON_EMIT_SE_CNTL_STATUS:
187         case RADEON_EMIT_RE_MISC:
188         case RADEON_EMIT_PP_BORDER_COLOR_0:
189         case RADEON_EMIT_PP_BORDER_COLOR_1:
190         case RADEON_EMIT_PP_BORDER_COLOR_2:
191         case RADEON_EMIT_SE_ZBIAS_FACTOR:
192         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
193         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
194         case R200_EMIT_PP_TXCBLEND_0:
195         case R200_EMIT_PP_TXCBLEND_1:
196         case R200_EMIT_PP_TXCBLEND_2:
197         case R200_EMIT_PP_TXCBLEND_3:
198         case R200_EMIT_PP_TXCBLEND_4:
199         case R200_EMIT_PP_TXCBLEND_5:
200         case R200_EMIT_PP_TXCBLEND_6:
201         case R200_EMIT_PP_TXCBLEND_7:
202         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
203         case R200_EMIT_TFACTOR_0:
204         case R200_EMIT_VTX_FMT_0:
205         case R200_EMIT_VAP_CTL:
206         case R200_EMIT_MATRIX_SELECT_0:
207         case R200_EMIT_TEX_PROC_CTL_2:
208         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209         case R200_EMIT_PP_TXFILTER_0:
210         case R200_EMIT_PP_TXFILTER_1:
211         case R200_EMIT_PP_TXFILTER_2:
212         case R200_EMIT_PP_TXFILTER_3:
213         case R200_EMIT_PP_TXFILTER_4:
214         case R200_EMIT_PP_TXFILTER_5:
215         case R200_EMIT_VTE_CNTL:
216         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217         case R200_EMIT_PP_TAM_DEBUG3:
218         case R200_EMIT_PP_CNTL_X:
219         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221         case R200_EMIT_RE_SCISSOR_TL_0:
222         case R200_EMIT_RE_SCISSOR_TL_1:
223         case R200_EMIT_RE_SCISSOR_TL_2:
224         case R200_EMIT_SE_VAP_CNTL_STATUS:
225         case R200_EMIT_SE_VTX_STATE_CNTL:
226         case R200_EMIT_RE_POINTSIZE:
227         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228         case R200_EMIT_PP_CUBIC_FACES_0:
229         case R200_EMIT_PP_CUBIC_FACES_1:
230         case R200_EMIT_PP_CUBIC_FACES_2:
231         case R200_EMIT_PP_CUBIC_FACES_3:
232         case R200_EMIT_PP_CUBIC_FACES_4:
233         case R200_EMIT_PP_CUBIC_FACES_5:
234         case RADEON_EMIT_PP_TEX_SIZE_0:
235         case RADEON_EMIT_PP_TEX_SIZE_1:
236         case RADEON_EMIT_PP_TEX_SIZE_2:
237         case R200_EMIT_RB3D_BLENDCOLOR:
238         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239         case RADEON_EMIT_PP_CUBIC_FACES_0:
240         case RADEON_EMIT_PP_CUBIC_FACES_1:
241         case RADEON_EMIT_PP_CUBIC_FACES_2:
242         case R200_EMIT_PP_TRI_PERF_CNTL:
243         case R200_EMIT_PP_AFS_0:
244         case R200_EMIT_PP_AFS_1:
245         case R200_EMIT_ATF_TFACTOR:
246         case R200_EMIT_PP_TXCTLALL_0:
247         case R200_EMIT_PP_TXCTLALL_1:
248         case R200_EMIT_PP_TXCTLALL_2:
249         case R200_EMIT_PP_TXCTLALL_3:
250         case R200_EMIT_PP_TXCTLALL_4:
251         case R200_EMIT_PP_TXCTLALL_5:
252                 /* These packets don't contain memory offsets */
253                 break;
254
255         default:
256                 DRM_ERROR("Unknown state packet ID %d\n", id);
257                 return DRM_ERR(EINVAL);
258         }
259
260         return 0;
261 }
262
263 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
264                                                      dev_priv,
265                                                      drm_file_t *filp_priv,
266                                                      drm_radeon_kcmd_buffer_t *
267                                                      cmdbuf,
268                                                      unsigned int *cmdsz)
269 {
270         u32 *cmd = (u32 *) cmdbuf->buf;
271
272         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
273
274         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
275                 DRM_ERROR("Not a type 3 packet\n");
276                 return DRM_ERR(EINVAL);
277         }
278
279         if (4 * *cmdsz > cmdbuf->bufsz) {
280                 DRM_ERROR("Packet size larger than size of data provided\n");
281                 return DRM_ERR(EINVAL);
282         }
283
284         /* Check client state and fix it up if necessary */
285         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
286                 u32 offset;
287
288                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
289                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
290                         offset = cmd[2] << 10;
291                         if (radeon_check_and_fixup_offset
292                             (dev_priv, filp_priv, &offset)) {
293                                 DRM_ERROR("Invalid first packet offset\n");
294                                 return DRM_ERR(EINVAL);
295                         }
296                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
297                 }
298
299                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
300                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
301                         offset = cmd[3] << 10;
302                         if (radeon_check_and_fixup_offset
303                             (dev_priv, filp_priv, &offset)) {
304                                 DRM_ERROR("Invalid second packet offset\n");
305                                 return DRM_ERR(EINVAL);
306                         }
307                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
308                 }
309         }
310
311         return 0;
312 }
313
314 /* ================================================================
315  * CP hardware state programming functions
316  */
317
318 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
319                                              drm_clip_rect_t * box)
320 {
321         RING_LOCALS;
322
323         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
324                   box->x1, box->y1, box->x2, box->y2);
325
326         BEGIN_RING(4);
327         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
328         OUT_RING((box->y1 << 16) | box->x1);
329         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
330         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
331         ADVANCE_RING();
332 }
333
334 /* Emit 1.1 state
335  */
336 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
337                              drm_file_t * filp_priv,
338                              drm_radeon_context_regs_t * ctx,
339                              drm_radeon_texture_regs_t * tex,
340                              unsigned int dirty)
341 {
342         RING_LOCALS;
343         DRM_DEBUG("dirty=0x%08x\n", dirty);
344
345         if (dirty & RADEON_UPLOAD_CONTEXT) {
346                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
347                                                   &ctx->rb3d_depthoffset)) {
348                         DRM_ERROR("Invalid depth buffer offset\n");
349                         return DRM_ERR(EINVAL);
350                 }
351
352                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
353                                                   &ctx->rb3d_coloroffset)) {
354                         DRM_ERROR("Invalid depth buffer offset\n");
355                         return DRM_ERR(EINVAL);
356                 }
357
358                 BEGIN_RING(14);
359                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
360                 OUT_RING(ctx->pp_misc);
361                 OUT_RING(ctx->pp_fog_color);
362                 OUT_RING(ctx->re_solid_color);
363                 OUT_RING(ctx->rb3d_blendcntl);
364                 OUT_RING(ctx->rb3d_depthoffset);
365                 OUT_RING(ctx->rb3d_depthpitch);
366                 OUT_RING(ctx->rb3d_zstencilcntl);
367                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
368                 OUT_RING(ctx->pp_cntl);
369                 OUT_RING(ctx->rb3d_cntl);
370                 OUT_RING(ctx->rb3d_coloroffset);
371                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
372                 OUT_RING(ctx->rb3d_colorpitch);
373                 ADVANCE_RING();
374         }
375
376         if (dirty & RADEON_UPLOAD_VERTFMT) {
377                 BEGIN_RING(2);
378                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
379                 OUT_RING(ctx->se_coord_fmt);
380                 ADVANCE_RING();
381         }
382
383         if (dirty & RADEON_UPLOAD_LINE) {
384                 BEGIN_RING(5);
385                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
386                 OUT_RING(ctx->re_line_pattern);
387                 OUT_RING(ctx->re_line_state);
388                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
389                 OUT_RING(ctx->se_line_width);
390                 ADVANCE_RING();
391         }
392
393         if (dirty & RADEON_UPLOAD_BUMPMAP) {
394                 BEGIN_RING(5);
395                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
396                 OUT_RING(ctx->pp_lum_matrix);
397                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
398                 OUT_RING(ctx->pp_rot_matrix_0);
399                 OUT_RING(ctx->pp_rot_matrix_1);
400                 ADVANCE_RING();
401         }
402
403         if (dirty & RADEON_UPLOAD_MASKS) {
404                 BEGIN_RING(4);
405                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
406                 OUT_RING(ctx->rb3d_stencilrefmask);
407                 OUT_RING(ctx->rb3d_ropcntl);
408                 OUT_RING(ctx->rb3d_planemask);
409                 ADVANCE_RING();
410         }
411
412         if (dirty & RADEON_UPLOAD_VIEWPORT) {
413                 BEGIN_RING(7);
414                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
415                 OUT_RING(ctx->se_vport_xscale);
416                 OUT_RING(ctx->se_vport_xoffset);
417                 OUT_RING(ctx->se_vport_yscale);
418                 OUT_RING(ctx->se_vport_yoffset);
419                 OUT_RING(ctx->se_vport_zscale);
420                 OUT_RING(ctx->se_vport_zoffset);
421                 ADVANCE_RING();
422         }
423
424         if (dirty & RADEON_UPLOAD_SETUP) {
425                 BEGIN_RING(4);
426                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
427                 OUT_RING(ctx->se_cntl);
428                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
429                 OUT_RING(ctx->se_cntl_status);
430                 ADVANCE_RING();
431         }
432
433         if (dirty & RADEON_UPLOAD_MISC) {
434                 BEGIN_RING(2);
435                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
436                 OUT_RING(ctx->re_misc);
437                 ADVANCE_RING();
438         }
439
440         if (dirty & RADEON_UPLOAD_TEX0) {
441                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
442                                                   &tex[0].pp_txoffset)) {
443                         DRM_ERROR("Invalid texture offset for unit 0\n");
444                         return DRM_ERR(EINVAL);
445                 }
446
447                 BEGIN_RING(9);
448                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
449                 OUT_RING(tex[0].pp_txfilter);
450                 OUT_RING(tex[0].pp_txformat);
451                 OUT_RING(tex[0].pp_txoffset);
452                 OUT_RING(tex[0].pp_txcblend);
453                 OUT_RING(tex[0].pp_txablend);
454                 OUT_RING(tex[0].pp_tfactor);
455                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
456                 OUT_RING(tex[0].pp_border_color);
457                 ADVANCE_RING();
458         }
459
460         if (dirty & RADEON_UPLOAD_TEX1) {
461                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
462                                                   &tex[1].pp_txoffset)) {
463                         DRM_ERROR("Invalid texture offset for unit 1\n");
464                         return DRM_ERR(EINVAL);
465                 }
466
467                 BEGIN_RING(9);
468                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
469                 OUT_RING(tex[1].pp_txfilter);
470                 OUT_RING(tex[1].pp_txformat);
471                 OUT_RING(tex[1].pp_txoffset);
472                 OUT_RING(tex[1].pp_txcblend);
473                 OUT_RING(tex[1].pp_txablend);
474                 OUT_RING(tex[1].pp_tfactor);
475                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
476                 OUT_RING(tex[1].pp_border_color);
477                 ADVANCE_RING();
478         }
479
480         if (dirty & RADEON_UPLOAD_TEX2) {
481                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
482                                                   &tex[2].pp_txoffset)) {
483                         DRM_ERROR("Invalid texture offset for unit 2\n");
484                         return DRM_ERR(EINVAL);
485                 }
486
487                 BEGIN_RING(9);
488                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
489                 OUT_RING(tex[2].pp_txfilter);
490                 OUT_RING(tex[2].pp_txformat);
491                 OUT_RING(tex[2].pp_txoffset);
492                 OUT_RING(tex[2].pp_txcblend);
493                 OUT_RING(tex[2].pp_txablend);
494                 OUT_RING(tex[2].pp_tfactor);
495                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
496                 OUT_RING(tex[2].pp_border_color);
497                 ADVANCE_RING();
498         }
499
500         return 0;
501 }
502
503 /* Emit 1.2 state
504  */
505 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
506                               drm_file_t * filp_priv,
507                               drm_radeon_state_t * state)
508 {
509         RING_LOCALS;
510
511         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
512                 BEGIN_RING(3);
513                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
514                 OUT_RING(state->context2.se_zbias_factor);
515                 OUT_RING(state->context2.se_zbias_constant);
516                 ADVANCE_RING();
517         }
518
519         return radeon_emit_state(dev_priv, filp_priv, &state->context,
520                                  state->tex, state->dirty);
521 }
522
523 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
524  * 1.3 cmdbuffers allow all previous state to be updated as well as
525  * the tcl scalar and vector areas.
526  */
527 static struct {
528         int start;
529         int len;
530         const char *name;
531 } packet[RADEON_MAX_STATE_PACKETS] = {
532         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
533         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
534         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
535         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
536         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
537         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
538         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
539         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
540         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
541         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
542         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
543         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
544         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
545         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
546         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
547         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
548         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
549         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
550         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
551         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
552         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
553                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
554         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
555         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
556         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
557         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
558         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
559         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
560         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
561         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
562         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
563         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
564         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
565         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
566         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
567         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
568         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
569         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
570         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
571         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
572         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
573         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
574         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
575         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
576         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
577         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
578         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
579         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
580         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
581         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
582         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
583          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
584         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
585         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
586         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
587         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
588         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
589         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
590         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
591         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
592         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
593         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
594         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
595                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
596         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
597         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
598         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
599         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
600         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
601         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
602         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
603         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
604         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
605         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
606         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
607         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
608         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
609         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
610         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
611         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
612         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
613         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
614         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
615         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
616         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
617         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
618         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
619         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
620         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
621         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
622         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
623         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
624         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
625         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
626         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
627         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
628         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
629 };
630
631 /* ================================================================
632  * Performance monitoring functions
633  */
634
635 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
636                              int x, int y, int w, int h, int r, int g, int b)
637 {
638         u32 color;
639         RING_LOCALS;
640
641         x += dev_priv->sarea_priv->boxes[0].x1;
642         y += dev_priv->sarea_priv->boxes[0].y1;
643
644         switch (dev_priv->color_fmt) {
645         case RADEON_COLOR_FORMAT_RGB565:
646                 color = (((r & 0xf8) << 8) |
647                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
648                 break;
649         case RADEON_COLOR_FORMAT_ARGB8888:
650         default:
651                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
652                 break;
653         }
654
655         BEGIN_RING(4);
656         RADEON_WAIT_UNTIL_3D_IDLE();
657         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
658         OUT_RING(0xffffffff);
659         ADVANCE_RING();
660
661         BEGIN_RING(6);
662
663         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
664         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
665                  RADEON_GMC_BRUSH_SOLID_COLOR |
666                  (dev_priv->color_fmt << 8) |
667                  RADEON_GMC_SRC_DATATYPE_COLOR |
668                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
669
670         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
671                 OUT_RING(dev_priv->front_pitch_offset);
672         } else {
673                 OUT_RING(dev_priv->back_pitch_offset);
674         }
675
676         OUT_RING(color);
677
678         OUT_RING((x << 16) | y);
679         OUT_RING((w << 16) | h);
680
681         ADVANCE_RING();
682 }
683
684 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
685 {
686         /* Collapse various things into a wait flag -- trying to
687          * guess if userspase slept -- better just to have them tell us.
688          */
689         if (dev_priv->stats.last_frame_reads > 1 ||
690             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
691                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
692         }
693
694         if (dev_priv->stats.freelist_loops) {
695                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
696         }
697
698         /* Purple box for page flipping
699          */
700         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
701                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
702
703         /* Red box if we have to wait for idle at any point
704          */
705         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
706                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
707
708         /* Blue box: lost context?
709          */
710
711         /* Yellow box for texture swaps
712          */
713         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
714                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
715
716         /* Green box if hardware never idles (as far as we can tell)
717          */
718         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
719                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
720
721         /* Draw bars indicating number of buffers allocated
722          * (not a great measure, easily confused)
723          */
724         if (dev_priv->stats.requested_bufs) {
725                 if (dev_priv->stats.requested_bufs > 100)
726                         dev_priv->stats.requested_bufs = 100;
727
728                 radeon_clear_box(dev_priv, 4, 16,
729                                  dev_priv->stats.requested_bufs, 4,
730                                  196, 128, 128);
731         }
732
733         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
734
735 }
736
737 /* ================================================================
738  * CP command dispatch functions
739  */
740
741 static void radeon_cp_dispatch_clear(drm_device_t * dev,
742                                      drm_radeon_clear_t * clear,
743                                      drm_radeon_clear_rect_t * depth_boxes)
744 {
745         drm_radeon_private_t *dev_priv = dev->dev_private;
746         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
747         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
748         int nbox = sarea_priv->nbox;
749         drm_clip_rect_t *pbox = sarea_priv->boxes;
750         unsigned int flags = clear->flags;
751         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
752         int i;
753         RING_LOCALS;
754         DRM_DEBUG("flags = 0x%x\n", flags);
755
756         dev_priv->stats.clears++;
757
758         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
759                 unsigned int tmp = flags;
760
761                 flags &= ~(RADEON_FRONT | RADEON_BACK);
762                 if (tmp & RADEON_FRONT)
763                         flags |= RADEON_BACK;
764                 if (tmp & RADEON_BACK)
765                         flags |= RADEON_FRONT;
766         }
767
768         if (flags & (RADEON_FRONT | RADEON_BACK)) {
769
770                 BEGIN_RING(4);
771
772                 /* Ensure the 3D stream is idle before doing a
773                  * 2D fill to clear the front or back buffer.
774                  */
775                 RADEON_WAIT_UNTIL_3D_IDLE();
776
777                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
778                 OUT_RING(clear->color_mask);
779
780                 ADVANCE_RING();
781
782                 /* Make sure we restore the 3D state next time.
783                  */
784                 dev_priv->sarea_priv->ctx_owner = 0;
785
786                 for (i = 0; i < nbox; i++) {
787                         int x = pbox[i].x1;
788                         int y = pbox[i].y1;
789                         int w = pbox[i].x2 - x;
790                         int h = pbox[i].y2 - y;
791
792                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
793                                   x, y, w, h, flags);
794
795                         if (flags & RADEON_FRONT) {
796                                 BEGIN_RING(6);
797
798                                 OUT_RING(CP_PACKET3
799                                          (RADEON_CNTL_PAINT_MULTI, 4));
800                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
801                                          RADEON_GMC_BRUSH_SOLID_COLOR |
802                                          (dev_priv->
803                                           color_fmt << 8) |
804                                          RADEON_GMC_SRC_DATATYPE_COLOR |
805                                          RADEON_ROP3_P |
806                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
807
808                                 OUT_RING(dev_priv->front_pitch_offset);
809                                 OUT_RING(clear->clear_color);
810
811                                 OUT_RING((x << 16) | y);
812                                 OUT_RING((w << 16) | h);
813
814                                 ADVANCE_RING();
815                         }
816
817                         if (flags & RADEON_BACK) {
818                                 BEGIN_RING(6);
819
820                                 OUT_RING(CP_PACKET3
821                                          (RADEON_CNTL_PAINT_MULTI, 4));
822                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
823                                          RADEON_GMC_BRUSH_SOLID_COLOR |
824                                          (dev_priv->
825                                           color_fmt << 8) |
826                                          RADEON_GMC_SRC_DATATYPE_COLOR |
827                                          RADEON_ROP3_P |
828                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
829
830                                 OUT_RING(dev_priv->back_pitch_offset);
831                                 OUT_RING(clear->clear_color);
832
833                                 OUT_RING((x << 16) | y);
834                                 OUT_RING((w << 16) | h);
835
836                                 ADVANCE_RING();
837                         }
838                 }
839         }
840
841         /* hyper z clear */
842         /* no docs available, based on reverse engeneering by Stephane Marchesin */
843         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
844             && (flags & RADEON_CLEAR_FASTZ)) {
845
846                 int i;
847                 int depthpixperline =
848                     dev_priv->depth_fmt ==
849                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
850                                                        2) : (dev_priv->
851                                                              depth_pitch / 4);
852
853                 u32 clearmask;
854
855                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
856                     ((clear->depth_mask & 0xff) << 24);
857
858                 /* Make sure we restore the 3D state next time.
859                  * we haven't touched any "normal" state - still need this?
860                  */
861                 dev_priv->sarea_priv->ctx_owner = 0;
862
863                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
864                     && (flags & RADEON_USE_HIERZ)) {
865                         /* FIXME : reverse engineer that for Rx00 cards */
866                         /* FIXME : the mask supposedly contains low-res z values. So can't set
867                            just to the max (0xff? or actually 0x3fff?), need to take z clear
868                            value into account? */
869                         /* pattern seems to work for r100, though get slight
870                            rendering errors with glxgears. If hierz is not enabled for r100,
871                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
872                            other ones are ignored, and the same clear mask can be used. That's
873                            very different behaviour than R200 which needs different clear mask
874                            and different number of tiles to clear if hierz is enabled or not !?!
875                          */
876                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
877                 } else {
878                         /* clear mask : chooses the clearing pattern.
879                            rv250: could be used to clear only parts of macrotiles
880                            (but that would get really complicated...)?
881                            bit 0 and 1 (either or both of them ?!?!) are used to
882                            not clear tile (or maybe one of the bits indicates if the tile is
883                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
884                            Pattern is as follows:
885                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
886                            bits -------------------------------------------------
887                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
888                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
889                            covers 256 pixels ?!?
890                          */
891                         clearmask = 0x0;
892                 }
893
894                 BEGIN_RING(8);
895                 RADEON_WAIT_UNTIL_2D_IDLE();
896                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
897                              tempRB3D_DEPTHCLEARVALUE);
898                 /* what offset is this exactly ? */
899                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
900                 /* need ctlstat, otherwise get some strange black flickering */
901                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
902                              RADEON_RB3D_ZC_FLUSH_ALL);
903                 ADVANCE_RING();
904
905                 for (i = 0; i < nbox; i++) {
906                         int tileoffset, nrtilesx, nrtilesy, j;
907                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
908                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
909                             && !(dev_priv->microcode_version == UCODE_R200)) {
910                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
911                                    maybe r200 actually doesn't need to put the low-res z value into
912                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
913                                    Works for R100, both with hierz and without.
914                                    R100 seems to operate on 2x1 8x8 tiles, but...
915                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
916                                    problematic with resolutions which are not 64 pix aligned? */
917                                 tileoffset =
918                                     ((pbox[i].y1 >> 3) * depthpixperline +
919                                      pbox[i].x1) >> 6;
920                                 nrtilesx =
921                                     ((pbox[i].x2 & ~63) -
922                                      (pbox[i].x1 & ~63)) >> 4;
923                                 nrtilesy =
924                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
925                                 for (j = 0; j <= nrtilesy; j++) {
926                                         BEGIN_RING(4);
927                                         OUT_RING(CP_PACKET3
928                                                  (RADEON_3D_CLEAR_ZMASK, 2));
929                                         /* first tile */
930                                         OUT_RING(tileoffset * 8);
931                                         /* the number of tiles to clear */
932                                         OUT_RING(nrtilesx + 4);
933                                         /* clear mask : chooses the clearing pattern. */
934                                         OUT_RING(clearmask);
935                                         ADVANCE_RING();
936                                         tileoffset += depthpixperline >> 6;
937                                 }
938                         } else if (dev_priv->microcode_version == UCODE_R200) {
939                                 /* works for rv250. */
940                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
941                                 tileoffset =
942                                     ((pbox[i].y1 >> 3) * depthpixperline +
943                                      pbox[i].x1) >> 5;
944                                 nrtilesx =
945                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
946                                 nrtilesy =
947                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
948                                 for (j = 0; j <= nrtilesy; j++) {
949                                         BEGIN_RING(4);
950                                         OUT_RING(CP_PACKET3
951                                                  (RADEON_3D_CLEAR_ZMASK, 2));
952                                         /* first tile */
953                                         /* judging by the first tile offset needed, could possibly
954                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
955                                            macro tiles, though would still need clear mask for
956                                            right/bottom if truely 4x4 granularity is desired ? */
957                                         OUT_RING(tileoffset * 16);
958                                         /* the number of tiles to clear */
959                                         OUT_RING(nrtilesx + 1);
960                                         /* clear mask : chooses the clearing pattern. */
961                                         OUT_RING(clearmask);
962                                         ADVANCE_RING();
963                                         tileoffset += depthpixperline >> 5;
964                                 }
965                         } else {        /* rv 100 */
966                                 /* rv100 might not need 64 pix alignment, who knows */
967                                 /* offsets are, hmm, weird */
968                                 tileoffset =
969                                     ((pbox[i].y1 >> 4) * depthpixperline +
970                                      pbox[i].x1) >> 6;
971                                 nrtilesx =
972                                     ((pbox[i].x2 & ~63) -
973                                      (pbox[i].x1 & ~63)) >> 4;
974                                 nrtilesy =
975                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
976                                 for (j = 0; j <= nrtilesy; j++) {
977                                         BEGIN_RING(4);
978                                         OUT_RING(CP_PACKET3
979                                                  (RADEON_3D_CLEAR_ZMASK, 2));
980                                         OUT_RING(tileoffset * 128);
981                                         /* the number of tiles to clear */
982                                         OUT_RING(nrtilesx + 4);
983                                         /* clear mask : chooses the clearing pattern. */
984                                         OUT_RING(clearmask);
985                                         ADVANCE_RING();
986                                         tileoffset += depthpixperline >> 6;
987                                 }
988                         }
989                 }
990
991                 /* TODO don't always clear all hi-level z tiles */
992                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
993                     && (dev_priv->microcode_version == UCODE_R200)
994                     && (flags & RADEON_USE_HIERZ))
995                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
996                         /* FIXME : the mask supposedly contains low-res z values. So can't set
997                            just to the max (0xff? or actually 0x3fff?), need to take z clear
998                            value into account? */
999                 {
1000                         BEGIN_RING(4);
1001                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1002                         OUT_RING(0x0);  /* First tile */
1003                         OUT_RING(0x3cc0);
1004                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1005                         ADVANCE_RING();
1006                 }
1007         }
1008
1009         /* We have to clear the depth and/or stencil buffers by
1010          * rendering a quad into just those buffers.  Thus, we have to
1011          * make sure the 3D engine is configured correctly.
1012          */
1013         else if ((dev_priv->microcode_version == UCODE_R200) &&
1014                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1015
1016                 int tempPP_CNTL;
1017                 int tempRE_CNTL;
1018                 int tempRB3D_CNTL;
1019                 int tempRB3D_ZSTENCILCNTL;
1020                 int tempRB3D_STENCILREFMASK;
1021                 int tempRB3D_PLANEMASK;
1022                 int tempSE_CNTL;
1023                 int tempSE_VTE_CNTL;
1024                 int tempSE_VTX_FMT_0;
1025                 int tempSE_VTX_FMT_1;
1026                 int tempSE_VAP_CNTL;
1027                 int tempRE_AUX_SCISSOR_CNTL;
1028
1029                 tempPP_CNTL = 0;
1030                 tempRE_CNTL = 0;
1031
1032                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1033
1034                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1035                 tempRB3D_STENCILREFMASK = 0x0;
1036
1037                 tempSE_CNTL = depth_clear->se_cntl;
1038
1039                 /* Disable TCL */
1040
1041                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1042                                           (0x9 <<
1043                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1044
1045                 tempRB3D_PLANEMASK = 0x0;
1046
1047                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1048
1049                 tempSE_VTE_CNTL =
1050                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1051
1052                 /* Vertex format (X, Y, Z, W) */
1053                 tempSE_VTX_FMT_0 =
1054                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1055                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1056                 tempSE_VTX_FMT_1 = 0x0;
1057
1058                 /*
1059                  * Depth buffer specific enables
1060                  */
1061                 if (flags & RADEON_DEPTH) {
1062                         /* Enable depth buffer */
1063                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1064                 } else {
1065                         /* Disable depth buffer */
1066                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1067                 }
1068
1069                 /*
1070                  * Stencil buffer specific enables
1071                  */
1072                 if (flags & RADEON_STENCIL) {
1073                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1074                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1075                 } else {
1076                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1077                         tempRB3D_STENCILREFMASK = 0x00000000;
1078                 }
1079
1080                 if (flags & RADEON_USE_COMP_ZBUF) {
1081                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1082                             RADEON_Z_DECOMPRESSION_ENABLE;
1083                 }
1084                 if (flags & RADEON_USE_HIERZ) {
1085                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1086                 }
1087
1088                 BEGIN_RING(26);
1089                 RADEON_WAIT_UNTIL_2D_IDLE();
1090
1091                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1092                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1093                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1094                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1095                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1096                              tempRB3D_STENCILREFMASK);
1097                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1098                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1099                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1100                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1101                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1102                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1103                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1104                 ADVANCE_RING();
1105
1106                 /* Make sure we restore the 3D state next time.
1107                  */
1108                 dev_priv->sarea_priv->ctx_owner = 0;
1109
1110                 for (i = 0; i < nbox; i++) {
1111
1112                         /* Funny that this should be required --
1113                          *  sets top-left?
1114                          */
1115                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1116
1117                         BEGIN_RING(14);
1118                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1119                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1120                                   RADEON_PRIM_WALK_RING |
1121                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1122                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1123                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1124                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1125                         OUT_RING(0x3f800000);
1126                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1127                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1128                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1129                         OUT_RING(0x3f800000);
1130                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1131                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1132                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133                         OUT_RING(0x3f800000);
1134                         ADVANCE_RING();
1135                 }
1136         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1137
1138                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1139
1140                 rb3d_cntl = depth_clear->rb3d_cntl;
1141
1142                 if (flags & RADEON_DEPTH) {
1143                         rb3d_cntl |= RADEON_Z_ENABLE;
1144                 } else {
1145                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1146                 }
1147
1148                 if (flags & RADEON_STENCIL) {
1149                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1150                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1151                 } else {
1152                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1153                         rb3d_stencilrefmask = 0x00000000;
1154                 }
1155
1156                 if (flags & RADEON_USE_COMP_ZBUF) {
1157                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1158                             RADEON_Z_DECOMPRESSION_ENABLE;
1159                 }
1160                 if (flags & RADEON_USE_HIERZ) {
1161                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1162                 }
1163
1164                 BEGIN_RING(13);
1165                 RADEON_WAIT_UNTIL_2D_IDLE();
1166
1167                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1168                 OUT_RING(0x00000000);
1169                 OUT_RING(rb3d_cntl);
1170
1171                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1172                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1173                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1174                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1175                 ADVANCE_RING();
1176
1177                 /* Make sure we restore the 3D state next time.
1178                  */
1179                 dev_priv->sarea_priv->ctx_owner = 0;
1180
1181                 for (i = 0; i < nbox; i++) {
1182
1183                         /* Funny that this should be required --
1184                          *  sets top-left?
1185                          */
1186                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1187
1188                         BEGIN_RING(15);
1189
1190                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1191                         OUT_RING(RADEON_VTX_Z_PRESENT |
1192                                  RADEON_VTX_PKCOLOR_PRESENT);
1193                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1194                                   RADEON_PRIM_WALK_RING |
1195                                   RADEON_MAOS_ENABLE |
1196                                   RADEON_VTX_FMT_RADEON_MODE |
1197                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1198
1199                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1200                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1201                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1202                         OUT_RING(0x0);
1203
1204                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1205                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1206                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1207                         OUT_RING(0x0);
1208
1209                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1210                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1211                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1212                         OUT_RING(0x0);
1213
1214                         ADVANCE_RING();
1215                 }
1216         }
1217
1218         /* Increment the clear counter.  The client-side 3D driver must
1219          * wait on this value before performing the clear ioctl.  We
1220          * need this because the card's so damned fast...
1221          */
1222         dev_priv->sarea_priv->last_clear++;
1223
1224         BEGIN_RING(4);
1225
1226         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1227         RADEON_WAIT_UNTIL_IDLE();
1228
1229         ADVANCE_RING();
1230 }
1231
1232 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1233 {
1234         drm_radeon_private_t *dev_priv = dev->dev_private;
1235         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1236         int nbox = sarea_priv->nbox;
1237         drm_clip_rect_t *pbox = sarea_priv->boxes;
1238         int i;
1239         RING_LOCALS;
1240         DRM_DEBUG("\n");
1241
1242         /* Do some trivial performance monitoring...
1243          */
1244         if (dev_priv->do_boxes)
1245                 radeon_cp_performance_boxes(dev_priv);
1246
1247         /* Wait for the 3D stream to idle before dispatching the bitblt.
1248          * This will prevent data corruption between the two streams.
1249          */
1250         BEGIN_RING(2);
1251
1252         RADEON_WAIT_UNTIL_3D_IDLE();
1253
1254         ADVANCE_RING();
1255
1256         for (i = 0; i < nbox; i++) {
1257                 int x = pbox[i].x1;
1258                 int y = pbox[i].y1;
1259                 int w = pbox[i].x2 - x;
1260                 int h = pbox[i].y2 - y;
1261
1262                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1263
1264                 BEGIN_RING(7);
1265
1266                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1267                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1268                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1269                          RADEON_GMC_BRUSH_NONE |
1270                          (dev_priv->color_fmt << 8) |
1271                          RADEON_GMC_SRC_DATATYPE_COLOR |
1272                          RADEON_ROP3_S |
1273                          RADEON_DP_SRC_SOURCE_MEMORY |
1274                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1275
1276                 /* Make this work even if front & back are flipped:
1277                  */
1278                 if (dev_priv->current_page == 0) {
1279                         OUT_RING(dev_priv->back_pitch_offset);
1280                         OUT_RING(dev_priv->front_pitch_offset);
1281                 } else {
1282                         OUT_RING(dev_priv->front_pitch_offset);
1283                         OUT_RING(dev_priv->back_pitch_offset);
1284                 }
1285
1286                 OUT_RING((x << 16) | y);
1287                 OUT_RING((x << 16) | y);
1288                 OUT_RING((w << 16) | h);
1289
1290                 ADVANCE_RING();
1291         }
1292
1293         /* Increment the frame counter.  The client-side 3D driver must
1294          * throttle the framerate by waiting for this value before
1295          * performing the swapbuffer ioctl.
1296          */
1297         dev_priv->sarea_priv->last_frame++;
1298
1299         BEGIN_RING(4);
1300
1301         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1302         RADEON_WAIT_UNTIL_2D_IDLE();
1303
1304         ADVANCE_RING();
1305 }
1306
1307 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1308 {
1309         drm_radeon_private_t *dev_priv = dev->dev_private;
1310         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1311         int offset = (dev_priv->current_page == 1)
1312             ? dev_priv->front_offset : dev_priv->back_offset;
1313         RING_LOCALS;
1314         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1315                   __FUNCTION__,
1316                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1317
1318         /* Do some trivial performance monitoring...
1319          */
1320         if (dev_priv->do_boxes) {
1321                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1322                 radeon_cp_performance_boxes(dev_priv);
1323         }
1324
1325         /* Update the frame offsets for both CRTCs
1326          */
1327         BEGIN_RING(6);
1328
1329         RADEON_WAIT_UNTIL_3D_IDLE();
1330         OUT_RING_REG(RADEON_CRTC_OFFSET,
1331                      ((sarea->frame.y * dev_priv->front_pitch +
1332                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1333                      + offset);
1334         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1335                      + offset);
1336
1337         ADVANCE_RING();
1338
1339         /* Increment the frame counter.  The client-side 3D driver must
1340          * throttle the framerate by waiting for this value before
1341          * performing the swapbuffer ioctl.
1342          */
1343         dev_priv->sarea_priv->last_frame++;
1344         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1345             1 - dev_priv->current_page;
1346
1347         BEGIN_RING(2);
1348
1349         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1350
1351         ADVANCE_RING();
1352 }
1353
1354 static int bad_prim_vertex_nr(int primitive, int nr)
1355 {
1356         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1357         case RADEON_PRIM_TYPE_NONE:
1358         case RADEON_PRIM_TYPE_POINT:
1359                 return nr < 1;
1360         case RADEON_PRIM_TYPE_LINE:
1361                 return (nr & 1) || nr == 0;
1362         case RADEON_PRIM_TYPE_LINE_STRIP:
1363                 return nr < 2;
1364         case RADEON_PRIM_TYPE_TRI_LIST:
1365         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1366         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1367         case RADEON_PRIM_TYPE_RECT_LIST:
1368                 return nr % 3 || nr == 0;
1369         case RADEON_PRIM_TYPE_TRI_FAN:
1370         case RADEON_PRIM_TYPE_TRI_STRIP:
1371                 return nr < 3;
1372         default:
1373                 return 1;
1374         }
1375 }
1376
1377 typedef struct {
1378         unsigned int start;
1379         unsigned int finish;
1380         unsigned int prim;
1381         unsigned int numverts;
1382         unsigned int offset;
1383         unsigned int vc_format;
1384 } drm_radeon_tcl_prim_t;
1385
1386 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1387                                       drm_buf_t * buf,
1388                                       drm_radeon_tcl_prim_t * prim)
1389 {
1390         drm_radeon_private_t *dev_priv = dev->dev_private;
1391         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1392         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1393         int numverts = (int)prim->numverts;
1394         int nbox = sarea_priv->nbox;
1395         int i = 0;
1396         RING_LOCALS;
1397
1398         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1399                   prim->prim,
1400                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1401
1402         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1403                 DRM_ERROR("bad prim %x numverts %d\n",
1404                           prim->prim, prim->numverts);
1405                 return;
1406         }
1407
1408         do {
1409                 /* Emit the next cliprect */
1410                 if (i < nbox) {
1411                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1412                 }
1413
1414                 /* Emit the vertex buffer rendering commands */
1415                 BEGIN_RING(5);
1416
1417                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1418                 OUT_RING(offset);
1419                 OUT_RING(numverts);
1420                 OUT_RING(prim->vc_format);
1421                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1422                          RADEON_COLOR_ORDER_RGBA |
1423                          RADEON_VTX_FMT_RADEON_MODE |
1424                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1425
1426                 ADVANCE_RING();
1427
1428                 i++;
1429         } while (i < nbox);
1430 }
1431
1432 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1433 {
1434         drm_radeon_private_t *dev_priv = dev->dev_private;
1435         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1436         RING_LOCALS;
1437
1438         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1439
1440         /* Emit the vertex buffer age */
1441         BEGIN_RING(2);
1442         RADEON_DISPATCH_AGE(buf_priv->age);
1443         ADVANCE_RING();
1444
1445         buf->pending = 1;
1446         buf->used = 0;
1447 }
1448
1449 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1450                                         drm_buf_t * buf, int start, int end)
1451 {
1452         drm_radeon_private_t *dev_priv = dev->dev_private;
1453         RING_LOCALS;
1454         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1455
1456         if (start != end) {
1457                 int offset = (dev_priv->gart_buffers_offset
1458                               + buf->offset + start);
1459                 int dwords = (end - start + 3) / sizeof(u32);
1460
1461                 /* Indirect buffer data must be an even number of
1462                  * dwords, so if we've been given an odd number we must
1463                  * pad the data with a Type-2 CP packet.
1464                  */
1465                 if (dwords & 1) {
1466                         u32 *data = (u32 *)
1467                             ((char *)dev->agp_buffer_map->handle
1468                              + buf->offset + start);
1469                         data[dwords++] = RADEON_CP_PACKET2;
1470                 }
1471
1472                 /* Fire off the indirect buffer */
1473                 BEGIN_RING(3);
1474
1475                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1476                 OUT_RING(offset);
1477                 OUT_RING(dwords);
1478
1479                 ADVANCE_RING();
1480         }
1481 }
1482
1483 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1484                                        drm_buf_t * elt_buf,
1485                                        drm_radeon_tcl_prim_t * prim)
1486 {
1487         drm_radeon_private_t *dev_priv = dev->dev_private;
1488         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1489         int offset = dev_priv->gart_buffers_offset + prim->offset;
1490         u32 *data;
1491         int dwords;
1492         int i = 0;
1493         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1494         int count = (prim->finish - start) / sizeof(u16);
1495         int nbox = sarea_priv->nbox;
1496
1497         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1498                   prim->prim,
1499                   prim->vc_format,
1500                   prim->start, prim->finish, prim->offset, prim->numverts);
1501
1502         if (bad_prim_vertex_nr(prim->prim, count)) {
1503                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1504                 return;
1505         }
1506
1507         if (start >= prim->finish || (prim->start & 0x7)) {
1508                 DRM_ERROR("buffer prim %d\n", prim->prim);
1509                 return;
1510         }
1511
1512         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1513
1514         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1515                         elt_buf->offset + prim->start);
1516
1517         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1518         data[1] = offset;
1519         data[2] = prim->numverts;
1520         data[3] = prim->vc_format;
1521         data[4] = (prim->prim |
1522                    RADEON_PRIM_WALK_IND |
1523                    RADEON_COLOR_ORDER_RGBA |
1524                    RADEON_VTX_FMT_RADEON_MODE |
1525                    (count << RADEON_NUM_VERTICES_SHIFT));
1526
1527         do {
1528                 if (i < nbox)
1529                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1530
1531                 radeon_cp_dispatch_indirect(dev, elt_buf,
1532                                             prim->start, prim->finish);
1533
1534                 i++;
1535         } while (i < nbox);
1536
1537 }
1538
1539 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1540
1541 static int radeon_cp_dispatch_texture(DRMFILE filp,
1542                                       drm_device_t * dev,
1543                                       drm_radeon_texture_t * tex,
1544                                       drm_radeon_tex_image_t * image)
1545 {
1546         drm_radeon_private_t *dev_priv = dev->dev_private;
1547         drm_file_t *filp_priv;
1548         drm_buf_t *buf;
1549         u32 format;
1550         u32 *buffer;
1551         const u8 __user *data;
1552         int size, dwords, tex_width, blit_width, spitch;
1553         u32 height;
1554         int i;
1555         u32 texpitch, microtile;
1556         u32 offset;
1557         RING_LOCALS;
1558
1559         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1560
1561         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1562                 DRM_ERROR("Invalid destination offset\n");
1563                 return DRM_ERR(EINVAL);
1564         }
1565
1566         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1567
1568         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1569          * up with the texture data from the host data blit, otherwise
1570          * part of the texture image may be corrupted.
1571          */
1572         BEGIN_RING(4);
1573         RADEON_FLUSH_CACHE();
1574         RADEON_WAIT_UNTIL_IDLE();
1575         ADVANCE_RING();
1576
1577         /* The compiler won't optimize away a division by a variable,
1578          * even if the only legal values are powers of two.  Thus, we'll
1579          * use a shift instead.
1580          */
1581         switch (tex->format) {
1582         case RADEON_TXFORMAT_ARGB8888:
1583         case RADEON_TXFORMAT_RGBA8888:
1584                 format = RADEON_COLOR_FORMAT_ARGB8888;
1585                 tex_width = tex->width * 4;
1586                 blit_width = image->width * 4;
1587                 break;
1588         case RADEON_TXFORMAT_AI88:
1589         case RADEON_TXFORMAT_ARGB1555:
1590         case RADEON_TXFORMAT_RGB565:
1591         case RADEON_TXFORMAT_ARGB4444:
1592         case RADEON_TXFORMAT_VYUY422:
1593         case RADEON_TXFORMAT_YVYU422:
1594                 format = RADEON_COLOR_FORMAT_RGB565;
1595                 tex_width = tex->width * 2;
1596                 blit_width = image->width * 2;
1597                 break;
1598         case RADEON_TXFORMAT_I8:
1599         case RADEON_TXFORMAT_RGB332:
1600                 format = RADEON_COLOR_FORMAT_CI8;
1601                 tex_width = tex->width * 1;
1602                 blit_width = image->width * 1;
1603                 break;
1604         default:
1605                 DRM_ERROR("invalid texture format %d\n", tex->format);
1606                 return DRM_ERR(EINVAL);
1607         }
1608         spitch = blit_width >> 6;
1609         if (spitch == 0 && image->height > 1)
1610                 return DRM_ERR(EINVAL);
1611
1612         texpitch = tex->pitch;
1613         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1614                 microtile = 1;
1615                 if (tex_width < 64) {
1616                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1617                         /* we got tiled coordinates, untile them */
1618                         image->x *= 2;
1619                 }
1620         } else
1621                 microtile = 0;
1622
1623         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1624
1625         do {
1626                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1627                           tex->offset >> 10, tex->pitch, tex->format,
1628                           image->x, image->y, image->width, image->height);
1629
1630                 /* Make a copy of some parameters in case we have to
1631                  * update them for a multi-pass texture blit.
1632                  */
1633                 height = image->height;
1634                 data = (const u8 __user *)image->data;
1635
1636                 size = height * blit_width;
1637
1638                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1639                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1640                         size = height * blit_width;
1641                 } else if (size < 4 && size > 0) {
1642                         size = 4;
1643                 } else if (size == 0) {
1644                         return 0;
1645                 }
1646
1647                 buf = radeon_freelist_get(dev);
1648                 if (0 && !buf) {
1649                         radeon_do_cp_idle(dev_priv);
1650                         buf = radeon_freelist_get(dev);
1651                 }
1652                 if (!buf) {
1653                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1654                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1655                                 return DRM_ERR(EFAULT);
1656                         return DRM_ERR(EAGAIN);
1657                 }
1658
1659                 /* Dispatch the indirect buffer.
1660                  */
1661                 buffer =
1662                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1663                 dwords = size / 4;
1664
1665 #define RADEON_COPY_MT(_buf, _data, _width) \
1666         do { \
1667                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1668                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1669                         return DRM_ERR(EFAULT); \
1670                 } \
1671         } while(0)
1672
1673                 if (microtile) {
1674                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1675                            however, we cannot use blitter directly for texture width < 64 bytes,
1676                            since minimum tex pitch is 64 bytes and we need this to match
1677                            the texture width, otherwise the blitter will tile it wrong.
1678                            Thus, tiling manually in this case. Additionally, need to special
1679                            case tex height = 1, since our actual image will have height 2
1680                            and we need to ensure we don't read beyond the texture size
1681                            from user space. */
1682                         if (tex->height == 1) {
1683                                 if (tex_width >= 64 || tex_width <= 16) {
1684                                         RADEON_COPY_MT(buffer, data,
1685                                                 (int)(tex_width * sizeof(u32)));
1686                                 } else if (tex_width == 32) {
1687                                         RADEON_COPY_MT(buffer, data, 16);
1688                                         RADEON_COPY_MT(buffer + 8,
1689                                                        data + 16, 16);
1690                                 }
1691                         } else if (tex_width >= 64 || tex_width == 16) {
1692                                 RADEON_COPY_MT(buffer, data,
1693                                                (int)(dwords * sizeof(u32)));
1694                         } else if (tex_width < 16) {
1695                                 for (i = 0; i < tex->height; i++) {
1696                                         RADEON_COPY_MT(buffer, data, tex_width);
1697                                         buffer += 4;
1698                                         data += tex_width;
1699                                 }
1700                         } else if (tex_width == 32) {
1701                                 /* TODO: make sure this works when not fitting in one buffer
1702                                    (i.e. 32bytes x 2048...) */
1703                                 for (i = 0; i < tex->height; i += 2) {
1704                                         RADEON_COPY_MT(buffer, data, 16);
1705                                         data += 16;
1706                                         RADEON_COPY_MT(buffer + 8, data, 16);
1707                                         data += 16;
1708                                         RADEON_COPY_MT(buffer + 4, data, 16);
1709                                         data += 16;
1710                                         RADEON_COPY_MT(buffer + 12, data, 16);
1711                                         data += 16;
1712                                         buffer += 16;
1713                                 }
1714                         }
1715                 } else {
1716                         if (tex_width >= 32) {
1717                                 /* Texture image width is larger than the minimum, so we
1718                                  * can upload it directly.
1719                                  */
1720                                 RADEON_COPY_MT(buffer, data,
1721                                                (int)(dwords * sizeof(u32)));
1722                         } else {
1723                                 /* Texture image width is less than the minimum, so we
1724                                  * need to pad out each image scanline to the minimum
1725                                  * width.
1726                                  */
1727                                 for (i = 0; i < tex->height; i++) {
1728                                         RADEON_COPY_MT(buffer, data, tex_width);
1729                                         buffer += 8;
1730                                         data += tex_width;
1731                                 }
1732                         }
1733                 }
1734
1735 #undef RADEON_COPY_MT
1736                 buf->filp = filp;
1737                 buf->used = size;
1738                 offset = dev_priv->gart_buffers_offset + buf->offset;
1739                 BEGIN_RING(9);
1740                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1741                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1742                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1743                          RADEON_GMC_BRUSH_NONE |
1744                          (format << 8) |
1745                          RADEON_GMC_SRC_DATATYPE_COLOR |
1746                          RADEON_ROP3_S |
1747                          RADEON_DP_SRC_SOURCE_MEMORY |
1748                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1749                 OUT_RING((spitch << 22) | (offset >> 10));
1750                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1751                 OUT_RING(0);
1752                 OUT_RING((image->x << 16) | image->y);
1753                 OUT_RING((image->width << 16) | height);
1754                 RADEON_WAIT_UNTIL_2D_IDLE();
1755                 ADVANCE_RING();
1756
1757                 radeon_cp_discard_buffer(dev, buf);
1758
1759                 /* Update the input parameters for next time */
1760                 image->y += height;
1761                 image->height -= height;
1762                 image->data = (const u8 __user *)image->data + size;
1763         } while (image->height > 0);
1764
1765         /* Flush the pixel cache after the blit completes.  This ensures
1766          * the texture data is written out to memory before rendering
1767          * continues.
1768          */
1769         BEGIN_RING(4);
1770         RADEON_FLUSH_CACHE();
1771         RADEON_WAIT_UNTIL_2D_IDLE();
1772         ADVANCE_RING();
1773         return 0;
1774 }
1775
1776 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1777 {
1778         drm_radeon_private_t *dev_priv = dev->dev_private;
1779         int i;
1780         RING_LOCALS;
1781         DRM_DEBUG("\n");
1782
1783         BEGIN_RING(35);
1784
1785         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1786         OUT_RING(0x00000000);
1787
1788         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1789         for (i = 0; i < 32; i++) {
1790                 OUT_RING(stipple[i]);
1791         }
1792
1793         ADVANCE_RING();
1794 }
1795
1796 static void radeon_apply_surface_regs(int surf_index,
1797                                       drm_radeon_private_t *dev_priv)
1798 {
1799         if (!dev_priv->mmio)
1800                 return;
1801
1802         radeon_do_cp_idle(dev_priv);
1803
1804         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1805                      dev_priv->surfaces[surf_index].flags);
1806         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1807                      dev_priv->surfaces[surf_index].lower);
1808         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1809                      dev_priv->surfaces[surf_index].upper);
1810 }
1811
1812 /* Allocates a virtual surface
1813  * doesn't always allocate a real surface, will stretch an existing
1814  * surface when possible.
1815  *
1816  * Note that refcount can be at most 2, since during a free refcount=3
1817  * might mean we have to allocate a new surface which might not always
1818  * be available.
1819  * For example : we allocate three contigous surfaces ABC. If B is
1820  * freed, we suddenly need two surfaces to store A and C, which might
1821  * not always be available.
1822  */
1823 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1824                          drm_radeon_private_t *dev_priv, DRMFILE filp)
1825 {
1826         struct radeon_virt_surface *s;
1827         int i;
1828         int virt_surface_index;
1829         uint32_t new_upper, new_lower;
1830
1831         new_lower = new->address;
1832         new_upper = new_lower + new->size - 1;
1833
1834         /* sanity check */
1835         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1836             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1837              RADEON_SURF_ADDRESS_FIXED_MASK)
1838             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1839                 return -1;
1840
1841         /* make sure there is no overlap with existing surfaces */
1842         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1843                 if ((dev_priv->surfaces[i].refcount != 0) &&
1844                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1845                       (new_lower < dev_priv->surfaces[i].upper)) ||
1846                      ((new_lower < dev_priv->surfaces[i].lower) &&
1847                       (new_upper > dev_priv->surfaces[i].lower)))) {
1848                         return -1;
1849                 }
1850         }
1851
1852         /* find a virtual surface */
1853         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1854                 if (dev_priv->virt_surfaces[i].filp == 0)
1855                         break;
1856         if (i == 2 * RADEON_MAX_SURFACES) {
1857                 return -1;
1858         }
1859         virt_surface_index = i;
1860
1861         /* try to reuse an existing surface */
1862         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1863                 /* extend before */
1864                 if ((dev_priv->surfaces[i].refcount == 1) &&
1865                     (new->flags == dev_priv->surfaces[i].flags) &&
1866                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1867                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1868                         s->surface_index = i;
1869                         s->lower = new_lower;
1870                         s->upper = new_upper;
1871                         s->flags = new->flags;
1872                         s->filp = filp;
1873                         dev_priv->surfaces[i].refcount++;
1874                         dev_priv->surfaces[i].lower = s->lower;
1875                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1876                         return virt_surface_index;
1877                 }
1878
1879                 /* extend after */
1880                 if ((dev_priv->surfaces[i].refcount == 1) &&
1881                     (new->flags == dev_priv->surfaces[i].flags) &&
1882                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1883                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1884                         s->surface_index = i;
1885                         s->lower = new_lower;
1886                         s->upper = new_upper;
1887                         s->flags = new->flags;
1888                         s->filp = filp;
1889                         dev_priv->surfaces[i].refcount++;
1890                         dev_priv->surfaces[i].upper = s->upper;
1891                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1892                         return virt_surface_index;
1893                 }
1894         }
1895
1896         /* okay, we need a new one */
1897         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1898                 if (dev_priv->surfaces[i].refcount == 0) {
1899                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1900                         s->surface_index = i;
1901                         s->lower = new_lower;
1902                         s->upper = new_upper;
1903                         s->flags = new->flags;
1904                         s->filp = filp;
1905                         dev_priv->surfaces[i].refcount = 1;
1906                         dev_priv->surfaces[i].lower = s->lower;
1907                         dev_priv->surfaces[i].upper = s->upper;
1908                         dev_priv->surfaces[i].flags = s->flags;
1909                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1910                         return virt_surface_index;
1911                 }
1912         }
1913
1914         /* we didn't find anything */
1915         return -1;
1916 }
1917
1918 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1919                         int lower)
1920 {
1921         struct radeon_virt_surface *s;
1922         int i;
1923         /* find the virtual surface */
1924         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1925                 s = &(dev_priv->virt_surfaces[i]);
1926                 if (s->filp) {
1927                         if ((lower == s->lower) && (filp == s->filp)) {
1928                                 if (dev_priv->surfaces[s->surface_index].
1929                                     lower == s->lower)
1930                                         dev_priv->surfaces[s->surface_index].
1931                                             lower = s->upper;
1932
1933                                 if (dev_priv->surfaces[s->surface_index].
1934                                     upper == s->upper)
1935                                         dev_priv->surfaces[s->surface_index].
1936                                             upper = s->lower;
1937
1938                                 dev_priv->surfaces[s->surface_index].refcount--;
1939                                 if (dev_priv->surfaces[s->surface_index].
1940                                     refcount == 0)
1941                                         dev_priv->surfaces[s->surface_index].
1942                                             flags = 0;
1943                                 s->filp = NULL;
1944                                 radeon_apply_surface_regs(s->surface_index,
1945                                                           dev_priv);
1946                                 return 0;
1947                         }
1948                 }
1949         }
1950         return 1;
1951 }
1952
1953 static void radeon_surfaces_release(DRMFILE filp,
1954                                     drm_radeon_private_t * dev_priv)
1955 {
1956         int i;
1957         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1958                 if (dev_priv->virt_surfaces[i].filp == filp)
1959                         free_surface(filp, dev_priv,
1960                                      dev_priv->virt_surfaces[i].lower);
1961         }
1962 }
1963
1964 /* ================================================================
1965  * IOCTL functions
1966  */
1967 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1968 {
1969         DRM_DEVICE;
1970         drm_radeon_private_t *dev_priv = dev->dev_private;
1971         drm_radeon_surface_alloc_t alloc;
1972
1973         DRM_COPY_FROM_USER_IOCTL(alloc,
1974                                  (drm_radeon_surface_alloc_t __user *) data,
1975                                  sizeof(alloc));
1976
1977         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1978                 return DRM_ERR(EINVAL);
1979         else
1980                 return 0;
1981 }
1982
1983 static int radeon_surface_free(DRM_IOCTL_ARGS)
1984 {
1985         DRM_DEVICE;
1986         drm_radeon_private_t *dev_priv = dev->dev_private;
1987         drm_radeon_surface_free_t memfree;
1988
1989         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1990                                  sizeof(memfree));
1991
1992         if (free_surface(filp, dev_priv, memfree.address))
1993                 return DRM_ERR(EINVAL);
1994         else
1995                 return 0;
1996 }
1997
1998 static int radeon_cp_clear(DRM_IOCTL_ARGS)
1999 {
2000         DRM_DEVICE;
2001         drm_radeon_private_t *dev_priv = dev->dev_private;
2002         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2003         drm_radeon_clear_t clear;
2004         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2005         DRM_DEBUG("\n");
2006
2007         LOCK_TEST_WITH_RETURN(dev, filp);
2008
2009         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2010                                  sizeof(clear));
2011
2012         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2013
2014         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2015                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2016
2017         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2018                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2019                 return DRM_ERR(EFAULT);
2020
2021         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2022
2023         COMMIT_RING();
2024         return 0;
2025 }
2026
2027 /* Not sure why this isn't set all the time:
2028  */
2029 static int radeon_do_init_pageflip(drm_device_t * dev)
2030 {
2031         drm_radeon_private_t *dev_priv = dev->dev_private;
2032         RING_LOCALS;
2033
2034         DRM_DEBUG("\n");
2035
2036         BEGIN_RING(6);
2037         RADEON_WAIT_UNTIL_3D_IDLE();
2038         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2039         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2040                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2041         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2042         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2043                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2044         ADVANCE_RING();
2045
2046         dev_priv->page_flipping = 1;
2047         dev_priv->current_page = 0;
2048         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2049
2050         return 0;
2051 }
2052
2053 /* Called whenever a client dies, from drm_release.
2054  * NOTE:  Lock isn't necessarily held when this is called!
2055  */
2056 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2057 {
2058         drm_radeon_private_t *dev_priv = dev->dev_private;
2059         DRM_DEBUG("\n");
2060
2061         if (dev_priv->current_page != 0)
2062                 radeon_cp_dispatch_flip(dev);
2063
2064         dev_priv->page_flipping = 0;
2065         return 0;
2066 }
2067
2068 /* Swapping and flipping are different operations, need different ioctls.
2069  * They can & should be intermixed to support multiple 3d windows.
2070  */
2071 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2072 {
2073         DRM_DEVICE;
2074         drm_radeon_private_t *dev_priv = dev->dev_private;
2075         DRM_DEBUG("\n");
2076
2077         LOCK_TEST_WITH_RETURN(dev, filp);
2078
2079         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2080
2081         if (!dev_priv->page_flipping)
2082                 radeon_do_init_pageflip(dev);
2083
2084         radeon_cp_dispatch_flip(dev);
2085
2086         COMMIT_RING();
2087         return 0;
2088 }
2089
2090 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2091 {
2092         DRM_DEVICE;
2093         drm_radeon_private_t *dev_priv = dev->dev_private;
2094         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2095         DRM_DEBUG("\n");
2096
2097         LOCK_TEST_WITH_RETURN(dev, filp);
2098
2099         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2100
2101         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2102                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2103
2104         radeon_cp_dispatch_swap(dev);
2105         dev_priv->sarea_priv->ctx_owner = 0;
2106
2107         COMMIT_RING();
2108         return 0;
2109 }
2110
2111 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2112 {
2113         DRM_DEVICE;
2114         drm_radeon_private_t *dev_priv = dev->dev_private;
2115         drm_file_t *filp_priv;
2116         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2117         drm_device_dma_t *dma = dev->dma;
2118         drm_buf_t *buf;
2119         drm_radeon_vertex_t vertex;
2120         drm_radeon_tcl_prim_t prim;
2121
2122         LOCK_TEST_WITH_RETURN(dev, filp);
2123
2124         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2125
2126         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2127                                  sizeof(vertex));
2128
2129         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2130                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2131
2132         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2133                 DRM_ERROR("buffer index %d (of %d max)\n",
2134                           vertex.idx, dma->buf_count - 1);
2135                 return DRM_ERR(EINVAL);
2136         }
2137         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2138                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2139                 return DRM_ERR(EINVAL);
2140         }
2141
2142         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2143         VB_AGE_TEST_WITH_RETURN(dev_priv);
2144
2145         buf = dma->buflist[vertex.idx];
2146
2147         if (buf->filp != filp) {
2148                 DRM_ERROR("process %d using buffer owned by %p\n",
2149                           DRM_CURRENTPID, buf->filp);
2150                 return DRM_ERR(EINVAL);
2151         }
2152         if (buf->pending) {
2153                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2154                 return DRM_ERR(EINVAL);
2155         }
2156
2157         /* Build up a prim_t record:
2158          */
2159         if (vertex.count) {
2160                 buf->used = vertex.count;       /* not used? */
2161
2162                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2163                         if (radeon_emit_state(dev_priv, filp_priv,
2164                                               &sarea_priv->context_state,
2165                                               sarea_priv->tex_state,
2166                                               sarea_priv->dirty)) {
2167                                 DRM_ERROR("radeon_emit_state failed\n");
2168                                 return DRM_ERR(EINVAL);
2169                         }
2170
2171                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2172                                                RADEON_UPLOAD_TEX1IMAGES |
2173                                                RADEON_UPLOAD_TEX2IMAGES |
2174                                                RADEON_REQUIRE_QUIESCENCE);
2175                 }
2176
2177                 prim.start = 0;
2178                 prim.finish = vertex.count;     /* unused */
2179                 prim.prim = vertex.prim;
2180                 prim.numverts = vertex.count;
2181                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2182
2183                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2184         }
2185
2186         if (vertex.discard) {
2187                 radeon_cp_discard_buffer(dev, buf);
2188         }
2189
2190         COMMIT_RING();
2191         return 0;
2192 }
2193
2194 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2195 {
2196         DRM_DEVICE;
2197         drm_radeon_private_t *dev_priv = dev->dev_private;
2198         drm_file_t *filp_priv;
2199         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2200         drm_device_dma_t *dma = dev->dma;
2201         drm_buf_t *buf;
2202         drm_radeon_indices_t elts;
2203         drm_radeon_tcl_prim_t prim;
2204         int count;
2205
2206         LOCK_TEST_WITH_RETURN(dev, filp);
2207
2208         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2209
2210         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2211                                  sizeof(elts));
2212
2213         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2214                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2215
2216         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2217                 DRM_ERROR("buffer index %d (of %d max)\n",
2218                           elts.idx, dma->buf_count - 1);
2219                 return DRM_ERR(EINVAL);
2220         }
2221         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2222                 DRM_ERROR("buffer prim %d\n", elts.prim);
2223                 return DRM_ERR(EINVAL);
2224         }
2225
2226         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2227         VB_AGE_TEST_WITH_RETURN(dev_priv);
2228
2229         buf = dma->buflist[elts.idx];
2230
2231         if (buf->filp != filp) {
2232                 DRM_ERROR("process %d using buffer owned by %p\n",
2233                           DRM_CURRENTPID, buf->filp);
2234                 return DRM_ERR(EINVAL);
2235         }
2236         if (buf->pending) {
2237                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2238                 return DRM_ERR(EINVAL);
2239         }
2240
2241         count = (elts.end - elts.start) / sizeof(u16);
2242         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2243
2244         if (elts.start & 0x7) {
2245                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2246                 return DRM_ERR(EINVAL);
2247         }
2248         if (elts.start < buf->used) {
2249                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2250                 return DRM_ERR(EINVAL);
2251         }
2252
2253         buf->used = elts.end;
2254
2255         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2256                 if (radeon_emit_state(dev_priv, filp_priv,
2257                                       &sarea_priv->context_state,
2258                                       sarea_priv->tex_state,
2259                                       sarea_priv->dirty)) {
2260                         DRM_ERROR("radeon_emit_state failed\n");
2261                         return DRM_ERR(EINVAL);
2262                 }
2263
2264                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2265                                        RADEON_UPLOAD_TEX1IMAGES |
2266                                        RADEON_UPLOAD_TEX2IMAGES |
2267                                        RADEON_REQUIRE_QUIESCENCE);
2268         }
2269
2270         /* Build up a prim_t record:
2271          */
2272         prim.start = elts.start;
2273         prim.finish = elts.end;
2274         prim.prim = elts.prim;
2275         prim.offset = 0;        /* offset from start of dma buffers */
2276         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2277         prim.vc_format = dev_priv->sarea_priv->vc_format;
2278
2279         radeon_cp_dispatch_indices(dev, buf, &prim);
2280         if (elts.discard) {
2281                 radeon_cp_discard_buffer(dev, buf);
2282         }
2283
2284         COMMIT_RING();
2285         return 0;
2286 }
2287
2288 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2289 {
2290         DRM_DEVICE;
2291         drm_radeon_private_t *dev_priv = dev->dev_private;
2292         drm_radeon_texture_t tex;
2293         drm_radeon_tex_image_t image;
2294         int ret;
2295
2296         LOCK_TEST_WITH_RETURN(dev, filp);
2297
2298         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2299                                  sizeof(tex));
2300
2301         if (tex.image == NULL) {
2302                 DRM_ERROR("null texture image!\n");
2303                 return DRM_ERR(EINVAL);
2304         }
2305
2306         if (DRM_COPY_FROM_USER(&image,
2307                                (drm_radeon_tex_image_t __user *) tex.image,
2308                                sizeof(image)))
2309                 return DRM_ERR(EFAULT);
2310
2311         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2312         VB_AGE_TEST_WITH_RETURN(dev_priv);
2313
2314         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2315
2316         COMMIT_RING();
2317         return ret;
2318 }
2319
2320 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2321 {
2322         DRM_DEVICE;
2323         drm_radeon_private_t *dev_priv = dev->dev_private;
2324         drm_radeon_stipple_t stipple;
2325         u32 mask[32];
2326
2327         LOCK_TEST_WITH_RETURN(dev, filp);
2328
2329         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2330                                  sizeof(stipple));
2331
2332         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2333                 return DRM_ERR(EFAULT);
2334
2335         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2336
2337         radeon_cp_dispatch_stipple(dev, mask);
2338
2339         COMMIT_RING();
2340         return 0;
2341 }
2342
2343 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2344 {
2345         DRM_DEVICE;
2346         drm_radeon_private_t *dev_priv = dev->dev_private;
2347         drm_device_dma_t *dma = dev->dma;
2348         drm_buf_t *buf;
2349         drm_radeon_indirect_t indirect;
2350         RING_LOCALS;
2351
2352         LOCK_TEST_WITH_RETURN(dev, filp);
2353
2354         DRM_COPY_FROM_USER_IOCTL(indirect,
2355                                  (drm_radeon_indirect_t __user *) data,
2356                                  sizeof(indirect));
2357
2358         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2359                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2360
2361         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2362                 DRM_ERROR("buffer index %d (of %d max)\n",
2363                           indirect.idx, dma->buf_count - 1);
2364                 return DRM_ERR(EINVAL);
2365         }
2366
2367         buf = dma->buflist[indirect.idx];
2368
2369         if (buf->filp != filp) {
2370                 DRM_ERROR("process %d using buffer owned by %p\n",
2371                           DRM_CURRENTPID, buf->filp);
2372                 return DRM_ERR(EINVAL);
2373         }
2374         if (buf->pending) {
2375                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2376                 return DRM_ERR(EINVAL);
2377         }
2378
2379         if (indirect.start < buf->used) {
2380                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2381                           indirect.start, buf->used);
2382                 return DRM_ERR(EINVAL);
2383         }
2384
2385         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2386         VB_AGE_TEST_WITH_RETURN(dev_priv);
2387
2388         buf->used = indirect.end;
2389
2390         /* Wait for the 3D stream to idle before the indirect buffer
2391          * containing 2D acceleration commands is processed.
2392          */
2393         BEGIN_RING(2);
2394
2395         RADEON_WAIT_UNTIL_3D_IDLE();
2396
2397         ADVANCE_RING();
2398
2399         /* Dispatch the indirect buffer full of commands from the
2400          * X server.  This is insecure and is thus only available to
2401          * privileged clients.
2402          */
2403         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2404         if (indirect.discard) {
2405                 radeon_cp_discard_buffer(dev, buf);
2406         }
2407
2408         COMMIT_RING();
2409         return 0;
2410 }
2411
2412 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2413 {
2414         DRM_DEVICE;
2415         drm_radeon_private_t *dev_priv = dev->dev_private;
2416         drm_file_t *filp_priv;
2417         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2418         drm_device_dma_t *dma = dev->dma;
2419         drm_buf_t *buf;
2420         drm_radeon_vertex2_t vertex;
2421         int i;
2422         unsigned char laststate;
2423
2424         LOCK_TEST_WITH_RETURN(dev, filp);
2425
2426         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2427
2428         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2429                                  sizeof(vertex));
2430
2431         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2432                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2433
2434         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2435                 DRM_ERROR("buffer index %d (of %d max)\n",
2436                           vertex.idx, dma->buf_count - 1);
2437                 return DRM_ERR(EINVAL);
2438         }
2439
2440         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2441         VB_AGE_TEST_WITH_RETURN(dev_priv);
2442
2443         buf = dma->buflist[vertex.idx];
2444
2445         if (buf->filp != filp) {
2446                 DRM_ERROR("process %d using buffer owned by %p\n",
2447                           DRM_CURRENTPID, buf->filp);
2448                 return DRM_ERR(EINVAL);
2449         }
2450
2451         if (buf->pending) {
2452                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2453                 return DRM_ERR(EINVAL);
2454         }
2455
2456         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2457                 return DRM_ERR(EINVAL);
2458
2459         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2460                 drm_radeon_prim_t prim;
2461                 drm_radeon_tcl_prim_t tclprim;
2462
2463                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2464                         return DRM_ERR(EFAULT);
2465
2466                 if (prim.stateidx != laststate) {
2467                         drm_radeon_state_t state;
2468
2469                         if (DRM_COPY_FROM_USER(&state,
2470                                                &vertex.state[prim.stateidx],
2471                                                sizeof(state)))
2472                                 return DRM_ERR(EFAULT);
2473
2474                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2475                                 DRM_ERROR("radeon_emit_state2 failed\n");
2476                                 return DRM_ERR(EINVAL);
2477                         }
2478
2479                         laststate = prim.stateidx;
2480                 }
2481
2482                 tclprim.start = prim.start;
2483                 tclprim.finish = prim.finish;
2484                 tclprim.prim = prim.prim;
2485                 tclprim.vc_format = prim.vc_format;
2486
2487                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2488                         tclprim.offset = prim.numverts * 64;
2489                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2490
2491                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2492                 } else {
2493                         tclprim.numverts = prim.numverts;
2494                         tclprim.offset = 0;     /* not used */
2495
2496                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2497                 }
2498
2499                 if (sarea_priv->nbox == 1)
2500                         sarea_priv->nbox = 0;
2501         }
2502
2503         if (vertex.discard) {
2504                 radeon_cp_discard_buffer(dev, buf);
2505         }
2506
2507         COMMIT_RING();
2508         return 0;
2509 }
2510
2511 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2512                                drm_file_t * filp_priv,
2513                                drm_radeon_cmd_header_t header,
2514                                drm_radeon_kcmd_buffer_t *cmdbuf)
2515 {
2516         int id = (int)header.packet.packet_id;
2517         int sz, reg;
2518         int *data = (int *)cmdbuf->buf;
2519         RING_LOCALS;
2520
2521         if (id >= RADEON_MAX_STATE_PACKETS)
2522                 return DRM_ERR(EINVAL);
2523
2524         sz = packet[id].len;
2525         reg = packet[id].start;
2526
2527         if (sz * sizeof(int) > cmdbuf->bufsz) {
2528                 DRM_ERROR("Packet size provided larger than data provided\n");
2529                 return DRM_ERR(EINVAL);
2530         }
2531
2532         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2533                 DRM_ERROR("Packet verification failed\n");
2534                 return DRM_ERR(EINVAL);
2535         }
2536
2537         BEGIN_RING(sz + 1);
2538         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2539         OUT_RING_TABLE(data, sz);
2540         ADVANCE_RING();
2541
2542         cmdbuf->buf += sz * sizeof(int);
2543         cmdbuf->bufsz -= sz * sizeof(int);
2544         return 0;
2545 }
2546
2547 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2548                                           drm_radeon_cmd_header_t header,
2549                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2550 {
2551         int sz = header.scalars.count;
2552         int start = header.scalars.offset;
2553         int stride = header.scalars.stride;
2554         RING_LOCALS;
2555
2556         BEGIN_RING(3 + sz);
2557         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2558         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2559         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2560         OUT_RING_TABLE(cmdbuf->buf, sz);
2561         ADVANCE_RING();
2562         cmdbuf->buf += sz * sizeof(int);
2563         cmdbuf->bufsz -= sz * sizeof(int);
2564         return 0;
2565 }
2566
2567 /* God this is ugly
2568  */
2569 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2570                                            drm_radeon_cmd_header_t header,
2571                                            drm_radeon_kcmd_buffer_t *cmdbuf)
2572 {
2573         int sz = header.scalars.count;
2574         int start = ((unsigned int)header.scalars.offset) + 0x100;
2575         int stride = header.scalars.stride;
2576         RING_LOCALS;
2577
2578         BEGIN_RING(3 + sz);
2579         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2580         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2581         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2582         OUT_RING_TABLE(cmdbuf->buf, sz);
2583         ADVANCE_RING();
2584         cmdbuf->buf += sz * sizeof(int);
2585         cmdbuf->bufsz -= sz * sizeof(int);
2586         return 0;
2587 }
2588
2589 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2590                                           drm_radeon_cmd_header_t header,
2591                                           drm_radeon_kcmd_buffer_t *cmdbuf)
2592 {
2593         int sz = header.vectors.count;
2594         int start = header.vectors.offset;
2595         int stride = header.vectors.stride;
2596         RING_LOCALS;
2597
2598         BEGIN_RING(3 + sz);
2599         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2600         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2601         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2602         OUT_RING_TABLE(cmdbuf->buf, sz);
2603         ADVANCE_RING();
2604
2605         cmdbuf->buf += sz * sizeof(int);
2606         cmdbuf->bufsz -= sz * sizeof(int);
2607         return 0;
2608 }
2609
2610 static int radeon_emit_packet3(drm_device_t * dev,
2611                                drm_file_t * filp_priv,
2612                                drm_radeon_kcmd_buffer_t *cmdbuf)
2613 {
2614         drm_radeon_private_t *dev_priv = dev->dev_private;
2615         unsigned int cmdsz;
2616         int ret;
2617         RING_LOCALS;
2618
2619         DRM_DEBUG("\n");
2620
2621         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2622                                                   cmdbuf, &cmdsz))) {
2623                 DRM_ERROR("Packet verification failed\n");
2624                 return ret;
2625         }
2626
2627         BEGIN_RING(cmdsz);
2628         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2629         ADVANCE_RING();
2630
2631         cmdbuf->buf += cmdsz * 4;
2632         cmdbuf->bufsz -= cmdsz * 4;
2633         return 0;
2634 }
2635
2636 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2637                                         drm_file_t *filp_priv,
2638                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2639                                         int orig_nbox)
2640 {
2641         drm_radeon_private_t *dev_priv = dev->dev_private;
2642         drm_clip_rect_t box;
2643         unsigned int cmdsz;
2644         int ret;
2645         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2646         int i = 0;
2647         RING_LOCALS;
2648
2649         DRM_DEBUG("\n");
2650
2651         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2652                                                   cmdbuf, &cmdsz))) {
2653                 DRM_ERROR("Packet verification failed\n");
2654                 return ret;
2655         }
2656
2657         if (!orig_nbox)
2658                 goto out;
2659
2660         do {
2661                 if (i < cmdbuf->nbox) {
2662                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2663                                 return DRM_ERR(EFAULT);
2664                         /* FIXME The second and subsequent times round
2665                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2666                          * calling emit_clip_rect(). This fixes a
2667                          * lockup on fast machines when sending
2668                          * several cliprects with a cmdbuf, as when
2669                          * waving a 2D window over a 3D
2670                          * window. Something in the commands from user
2671                          * space seems to hang the card when they're
2672                          * sent several times in a row. That would be
2673                          * the correct place to fix it but this works
2674                          * around it until I can figure that out - Tim
2675                          * Smith */
2676                         if (i) {
2677                                 BEGIN_RING(2);
2678                                 RADEON_WAIT_UNTIL_3D_IDLE();
2679                                 ADVANCE_RING();
2680                         }
2681                         radeon_emit_clip_rect(dev_priv, &box);
2682                 }
2683
2684                 BEGIN_RING(cmdsz);
2685                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2686                 ADVANCE_RING();
2687
2688         } while (++i < cmdbuf->nbox);
2689         if (cmdbuf->nbox == 1)
2690                 cmdbuf->nbox = 0;
2691
2692       out:
2693         cmdbuf->buf += cmdsz * 4;
2694         cmdbuf->bufsz -= cmdsz * 4;
2695         return 0;
2696 }
2697
2698 static int radeon_emit_wait(drm_device_t * dev, int flags)
2699 {
2700         drm_radeon_private_t *dev_priv = dev->dev_private;
2701         RING_LOCALS;
2702
2703         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2704         switch (flags) {
2705         case RADEON_WAIT_2D:
2706                 BEGIN_RING(2);
2707                 RADEON_WAIT_UNTIL_2D_IDLE();
2708                 ADVANCE_RING();
2709                 break;
2710         case RADEON_WAIT_3D:
2711                 BEGIN_RING(2);
2712                 RADEON_WAIT_UNTIL_3D_IDLE();
2713                 ADVANCE_RING();
2714                 break;
2715         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2716                 BEGIN_RING(2);
2717                 RADEON_WAIT_UNTIL_IDLE();
2718                 ADVANCE_RING();
2719                 break;
2720         default:
2721                 return DRM_ERR(EINVAL);
2722         }
2723
2724         return 0;
2725 }
2726
2727 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2728 {
2729         DRM_DEVICE;
2730         drm_radeon_private_t *dev_priv = dev->dev_private;
2731         drm_file_t *filp_priv;
2732         drm_device_dma_t *dma = dev->dma;
2733         drm_buf_t *buf = NULL;
2734         int idx;
2735         drm_radeon_kcmd_buffer_t cmdbuf;
2736         drm_radeon_cmd_header_t header;
2737         int orig_nbox, orig_bufsz;
2738         char *kbuf = NULL;
2739
2740         LOCK_TEST_WITH_RETURN(dev, filp);
2741
2742         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2743
2744         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2745                                  (drm_radeon_cmd_buffer_t __user *) data,
2746                                  sizeof(cmdbuf));
2747
2748         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2749         VB_AGE_TEST_WITH_RETURN(dev_priv);
2750
2751         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2752                 return DRM_ERR(EINVAL);
2753         }
2754
2755         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2756          * races between checking values and using those values in other code,
2757          * and simply to avoid a lot of function calls to copy in data.
2758          */
2759         orig_bufsz = cmdbuf.bufsz;
2760         if (orig_bufsz != 0) {
2761                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2762                 if (kbuf == NULL)
2763                         return DRM_ERR(ENOMEM);
2764                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2765                                        cmdbuf.bufsz)) {
2766                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2767                         return DRM_ERR(EFAULT);
2768                 }
2769                 cmdbuf.buf = kbuf;
2770         }
2771
2772         orig_nbox = cmdbuf.nbox;
2773
2774         if (dev_priv->microcode_version == UCODE_R300) {
2775                 int temp;
2776                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2777
2778                 if (orig_bufsz != 0)
2779                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2780
2781                 return temp;
2782         }
2783
2784         /* microcode_version != r300 */
2785         while (cmdbuf.bufsz >= sizeof(header)) {
2786
2787                 header.i = *(int *)cmdbuf.buf;
2788                 cmdbuf.buf += sizeof(header);
2789                 cmdbuf.bufsz -= sizeof(header);
2790
2791                 switch (header.header.cmd_type) {
2792                 case RADEON_CMD_PACKET:
2793                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2794                         if (radeon_emit_packets
2795                             (dev_priv, filp_priv, header, &cmdbuf)) {
2796                                 DRM_ERROR("radeon_emit_packets failed\n");
2797                                 goto err;
2798                         }
2799                         break;
2800
2801                 case RADEON_CMD_SCALARS:
2802                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2803                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2804                                 DRM_ERROR("radeon_emit_scalars failed\n");
2805                                 goto err;
2806                         }
2807                         break;
2808
2809                 case RADEON_CMD_VECTORS:
2810                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2811                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2812                                 DRM_ERROR("radeon_emit_vectors failed\n");
2813                                 goto err;
2814                         }
2815                         break;
2816
2817                 case RADEON_CMD_DMA_DISCARD:
2818                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2819                         idx = header.dma.buf_idx;
2820                         if (idx < 0 || idx >= dma->buf_count) {
2821                                 DRM_ERROR("buffer index %d (of %d max)\n",
2822                                           idx, dma->buf_count - 1);
2823                                 goto err;
2824                         }
2825
2826                         buf = dma->buflist[idx];
2827                         if (buf->filp != filp || buf->pending) {
2828                                 DRM_ERROR("bad buffer %p %p %d\n",
2829                                           buf->filp, filp, buf->pending);
2830                                 goto err;
2831                         }
2832
2833                         radeon_cp_discard_buffer(dev, buf);
2834                         break;
2835
2836                 case RADEON_CMD_PACKET3:
2837                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2838                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2839                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2840                                 goto err;
2841                         }
2842                         break;
2843
2844                 case RADEON_CMD_PACKET3_CLIP:
2845                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2846                         if (radeon_emit_packet3_cliprect
2847                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2848                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2849                                 goto err;
2850                         }
2851                         break;
2852
2853                 case RADEON_CMD_SCALARS2:
2854                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2855                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2856                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2857                                 goto err;
2858                         }
2859                         break;
2860
2861                 case RADEON_CMD_WAIT:
2862                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2863                         if (radeon_emit_wait(dev, header.wait.flags)) {
2864                                 DRM_ERROR("radeon_emit_wait failed\n");
2865                                 goto err;
2866                         }
2867                         break;
2868                 default:
2869                         DRM_ERROR("bad cmd_type %d at %p\n",
2870                                   header.header.cmd_type,
2871                                   cmdbuf.buf - sizeof(header));
2872                         goto err;
2873                 }
2874         }
2875
2876         if (orig_bufsz != 0)
2877                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2878
2879         DRM_DEBUG("DONE\n");
2880         COMMIT_RING();
2881         return 0;
2882
2883       err:
2884         if (orig_bufsz != 0)
2885                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2886         return DRM_ERR(EINVAL);
2887 }
2888
2889 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2890 {
2891         DRM_DEVICE;
2892         drm_radeon_private_t *dev_priv = dev->dev_private;
2893         drm_radeon_getparam_t param;
2894         int value;
2895
2896         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2897                                  sizeof(param));
2898
2899         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2900
2901         switch (param.param) {
2902         case RADEON_PARAM_GART_BUFFER_OFFSET:
2903                 value = dev_priv->gart_buffers_offset;
2904                 break;
2905         case RADEON_PARAM_LAST_FRAME:
2906                 dev_priv->stats.last_frame_reads++;
2907                 value = GET_SCRATCH(0);
2908                 break;
2909         case RADEON_PARAM_LAST_DISPATCH:
2910                 value = GET_SCRATCH(1);
2911                 break;
2912         case RADEON_PARAM_LAST_CLEAR:
2913                 dev_priv->stats.last_clear_reads++;
2914                 value = GET_SCRATCH(2);
2915                 break;
2916         case RADEON_PARAM_IRQ_NR:
2917                 value = dev->irq;
2918                 break;
2919         case RADEON_PARAM_GART_BASE:
2920                 value = dev_priv->gart_vm_start;
2921                 break;
2922         case RADEON_PARAM_REGISTER_HANDLE:
2923                 value = dev_priv->mmio->offset;
2924                 break;
2925         case RADEON_PARAM_STATUS_HANDLE:
2926                 value = dev_priv->ring_rptr_offset;
2927                 break;
2928 #if BITS_PER_LONG == 32
2929                 /*
2930                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2931                  * pointer which can't fit into an int-sized variable.  According to
2932                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2933                  * not supporting it shouldn't be a problem.  If the same functionality
2934                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2935                  * so backwards-compatibility for the embedded platforms can be
2936                  * maintained.  --davidm 4-Feb-2004.
2937                  */
2938         case RADEON_PARAM_SAREA_HANDLE:
2939                 /* The lock is the first dword in the sarea. */
2940                 value = (long)dev->lock.hw_lock;
2941                 break;
2942 #endif
2943         case RADEON_PARAM_GART_TEX_HANDLE:
2944                 value = dev_priv->gart_textures_offset;
2945                 break;
2946         
2947         case RADEON_PARAM_CARD_TYPE:
2948                 if (dev_priv->flags & CHIP_IS_PCIE)
2949                         value = RADEON_CARD_PCIE;
2950                 else if (dev_priv->flags & CHIP_IS_AGP)
2951                         value = RADEON_CARD_AGP;
2952                 else
2953                         value = RADEON_CARD_PCI;
2954                 break;
2955         default:
2956                 return DRM_ERR(EINVAL);
2957         }
2958
2959         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2960                 DRM_ERROR("copy_to_user\n");
2961                 return DRM_ERR(EFAULT);
2962         }
2963
2964         return 0;
2965 }
2966
2967 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
2968 {
2969         DRM_DEVICE;
2970         drm_radeon_private_t *dev_priv = dev->dev_private;
2971         drm_file_t *filp_priv;
2972         drm_radeon_setparam_t sp;
2973         struct drm_radeon_driver_file_fields *radeon_priv;
2974
2975         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2976
2977         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
2978                                  sizeof(sp));
2979
2980         switch (sp.param) {
2981         case RADEON_SETPARAM_FB_LOCATION:
2982                 radeon_priv = filp_priv->driver_priv;
2983                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2984                 break;
2985         case RADEON_SETPARAM_SWITCH_TILING:
2986                 if (sp.value == 0) {
2987                         DRM_DEBUG("color tiling disabled\n");
2988                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2989                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2990                         dev_priv->sarea_priv->tiling_enabled = 0;
2991                 } else if (sp.value == 1) {
2992                         DRM_DEBUG("color tiling enabled\n");
2993                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
2994                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
2995                         dev_priv->sarea_priv->tiling_enabled = 1;
2996                 }
2997                 break;
2998         case RADEON_SETPARAM_PCIGART_LOCATION:
2999                 dev_priv->pcigart_offset = sp.value;
3000                 break;
3001         case RADEON_SETPARAM_NEW_MEMMAP:
3002                 dev_priv->new_memmap = sp.value;
3003                 break;
3004         default:
3005                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3006                 return DRM_ERR(EINVAL);
3007         }
3008
3009         return 0;
3010 }
3011
3012 /* When a client dies:
3013  *    - Check for and clean up flipped page state
3014  *    - Free any alloced GART memory.
3015  *    - Free any alloced radeon surfaces.
3016  *
3017  * DRM infrastructure takes care of reclaiming dma buffers.
3018  */
3019 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3020 {
3021         if (dev->dev_private) {
3022                 drm_radeon_private_t *dev_priv = dev->dev_private;
3023                 if (dev_priv->page_flipping) {
3024                         radeon_do_cleanup_pageflip(dev);
3025                 }
3026                 radeon_mem_release(filp, dev_priv->gart_heap);
3027                 radeon_mem_release(filp, dev_priv->fb_heap);
3028                 radeon_surfaces_release(filp, dev_priv);
3029         }
3030 }
3031
3032 void radeon_driver_lastclose(drm_device_t * dev)
3033 {
3034         radeon_do_release(dev);
3035 }
3036
3037 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3038 {
3039         drm_radeon_private_t *dev_priv = dev->dev_private;
3040         struct drm_radeon_driver_file_fields *radeon_priv;
3041
3042         DRM_DEBUG("\n");
3043         radeon_priv =
3044             (struct drm_radeon_driver_file_fields *)
3045             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3046
3047         if (!radeon_priv)
3048                 return -ENOMEM;
3049
3050         filp_priv->driver_priv = radeon_priv;
3051
3052         if (dev_priv)
3053                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3054         else
3055                 radeon_priv->radeon_fb_delta = 0;
3056         return 0;
3057 }
3058
3059 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3060 {
3061         struct drm_radeon_driver_file_fields *radeon_priv =
3062             filp_priv->driver_priv;
3063
3064         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3065 }
3066
3067 drm_ioctl_desc_t radeon_ioctls[] = {
3068         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3069         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3070         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3071         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3072         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3073         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3074         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3075         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3076         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3077         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3078         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3079         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3080         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3081         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3082         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3083         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3084         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3085         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3086         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3087         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3088         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3089         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3090         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3091         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3092         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3093         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3094         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3095 };
3096
3097 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);