merge Linus head tree into my drm tree and fix up conflicts
[linux-2.6] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 /* ================================================================
37  * Helper functions for client state checking and fixup
38  */
39
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41                                                     dev_priv,
42                                                     drm_file_t * filp_priv,
43                                                     u32 *offset)
44 {
45         u32 off = *offset;
46         struct drm_radeon_driver_file_fields *radeon_priv;
47
48         if (off >= dev_priv->fb_location &&
49             off < (dev_priv->gart_vm_start + dev_priv->gart_size))
50                 return 0;
51
52         radeon_priv = filp_priv->driver_priv;
53         off += radeon_priv->radeon_fb_delta;
54
55         DRM_DEBUG("offset fixed up to 0x%x\n", off);
56
57         if (off < dev_priv->fb_location ||
58             off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
59                 return DRM_ERR(EINVAL);
60
61         *offset = off;
62
63         return 0;
64 }
65
66 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
67                                                      dev_priv,
68                                                      drm_file_t * filp_priv,
69                                                      int id, u32 *data)
70 {
71         switch (id) {
72
73         case RADEON_EMIT_PP_MISC:
74                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
75                                                   &data[(RADEON_RB3D_DEPTHOFFSET
76                                                          -
77                                                          RADEON_PP_MISC) /
78                                                         4])) {
79                         DRM_ERROR("Invalid depth buffer offset\n");
80                         return DRM_ERR(EINVAL);
81                 }
82                 break;
83
84         case RADEON_EMIT_PP_CNTL:
85                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
86                                                   &data[(RADEON_RB3D_COLOROFFSET
87                                                          -
88                                                          RADEON_PP_CNTL) /
89                                                         4])) {
90                         DRM_ERROR("Invalid colour buffer offset\n");
91                         return DRM_ERR(EINVAL);
92                 }
93                 break;
94
95         case R200_EMIT_PP_TXOFFSET_0:
96         case R200_EMIT_PP_TXOFFSET_1:
97         case R200_EMIT_PP_TXOFFSET_2:
98         case R200_EMIT_PP_TXOFFSET_3:
99         case R200_EMIT_PP_TXOFFSET_4:
100         case R200_EMIT_PP_TXOFFSET_5:
101                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
102                                                   &data[0])) {
103                         DRM_ERROR("Invalid R200 texture offset\n");
104                         return DRM_ERR(EINVAL);
105                 }
106                 break;
107
108         case RADEON_EMIT_PP_TXFILTER_0:
109         case RADEON_EMIT_PP_TXFILTER_1:
110         case RADEON_EMIT_PP_TXFILTER_2:
111                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
112                                                   &data[(RADEON_PP_TXOFFSET_0
113                                                          -
114                                                          RADEON_PP_TXFILTER_0) /
115                                                         4])) {
116                         DRM_ERROR("Invalid R100 texture offset\n");
117                         return DRM_ERR(EINVAL);
118                 }
119                 break;
120
121         case R200_EMIT_PP_CUBIC_OFFSETS_0:
122         case R200_EMIT_PP_CUBIC_OFFSETS_1:
123         case R200_EMIT_PP_CUBIC_OFFSETS_2:
124         case R200_EMIT_PP_CUBIC_OFFSETS_3:
125         case R200_EMIT_PP_CUBIC_OFFSETS_4:
126         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
127                         int i;
128                         for (i = 0; i < 5; i++) {
129                                 if (radeon_check_and_fixup_offset
130                                     (dev_priv, filp_priv, &data[i])) {
131                                         DRM_ERROR
132                                             ("Invalid R200 cubic texture offset\n");
133                                         return DRM_ERR(EINVAL);
134                                 }
135                         }
136                         break;
137                 }
138
139         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
140         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
141         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
142                         int i;
143                         for (i = 0; i < 5; i++) {
144                                 if (radeon_check_and_fixup_offset(dev_priv,
145                                                                   filp_priv,
146                                                                   &data[i])) {
147                                         DRM_ERROR
148                                             ("Invalid R100 cubic texture offset\n");
149                                         return DRM_ERR(EINVAL);
150                                 }
151                         }
152                 }
153                 break;
154
155         case RADEON_EMIT_RB3D_COLORPITCH:
156         case RADEON_EMIT_RE_LINE_PATTERN:
157         case RADEON_EMIT_SE_LINE_WIDTH:
158         case RADEON_EMIT_PP_LUM_MATRIX:
159         case RADEON_EMIT_PP_ROT_MATRIX_0:
160         case RADEON_EMIT_RB3D_STENCILREFMASK:
161         case RADEON_EMIT_SE_VPORT_XSCALE:
162         case RADEON_EMIT_SE_CNTL:
163         case RADEON_EMIT_SE_CNTL_STATUS:
164         case RADEON_EMIT_RE_MISC:
165         case RADEON_EMIT_PP_BORDER_COLOR_0:
166         case RADEON_EMIT_PP_BORDER_COLOR_1:
167         case RADEON_EMIT_PP_BORDER_COLOR_2:
168         case RADEON_EMIT_SE_ZBIAS_FACTOR:
169         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
170         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
171         case R200_EMIT_PP_TXCBLEND_0:
172         case R200_EMIT_PP_TXCBLEND_1:
173         case R200_EMIT_PP_TXCBLEND_2:
174         case R200_EMIT_PP_TXCBLEND_3:
175         case R200_EMIT_PP_TXCBLEND_4:
176         case R200_EMIT_PP_TXCBLEND_5:
177         case R200_EMIT_PP_TXCBLEND_6:
178         case R200_EMIT_PP_TXCBLEND_7:
179         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
180         case R200_EMIT_TFACTOR_0:
181         case R200_EMIT_VTX_FMT_0:
182         case R200_EMIT_VAP_CTL:
183         case R200_EMIT_MATRIX_SELECT_0:
184         case R200_EMIT_TEX_PROC_CTL_2:
185         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
186         case R200_EMIT_PP_TXFILTER_0:
187         case R200_EMIT_PP_TXFILTER_1:
188         case R200_EMIT_PP_TXFILTER_2:
189         case R200_EMIT_PP_TXFILTER_3:
190         case R200_EMIT_PP_TXFILTER_4:
191         case R200_EMIT_PP_TXFILTER_5:
192         case R200_EMIT_VTE_CNTL:
193         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
194         case R200_EMIT_PP_TAM_DEBUG3:
195         case R200_EMIT_PP_CNTL_X:
196         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
197         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
198         case R200_EMIT_RE_SCISSOR_TL_0:
199         case R200_EMIT_RE_SCISSOR_TL_1:
200         case R200_EMIT_RE_SCISSOR_TL_2:
201         case R200_EMIT_SE_VAP_CNTL_STATUS:
202         case R200_EMIT_SE_VTX_STATE_CNTL:
203         case R200_EMIT_RE_POINTSIZE:
204         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
205         case R200_EMIT_PP_CUBIC_FACES_0:
206         case R200_EMIT_PP_CUBIC_FACES_1:
207         case R200_EMIT_PP_CUBIC_FACES_2:
208         case R200_EMIT_PP_CUBIC_FACES_3:
209         case R200_EMIT_PP_CUBIC_FACES_4:
210         case R200_EMIT_PP_CUBIC_FACES_5:
211         case RADEON_EMIT_PP_TEX_SIZE_0:
212         case RADEON_EMIT_PP_TEX_SIZE_1:
213         case RADEON_EMIT_PP_TEX_SIZE_2:
214         case R200_EMIT_RB3D_BLENDCOLOR:
215         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
216         case RADEON_EMIT_PP_CUBIC_FACES_0:
217         case RADEON_EMIT_PP_CUBIC_FACES_1:
218         case RADEON_EMIT_PP_CUBIC_FACES_2:
219         case R200_EMIT_PP_TRI_PERF_CNTL:
220         case R200_EMIT_PP_AFS_0:
221         case R200_EMIT_PP_AFS_1:
222         case R200_EMIT_ATF_TFACTOR:
223         case R200_EMIT_PP_TXCTLALL_0:
224         case R200_EMIT_PP_TXCTLALL_1:
225         case R200_EMIT_PP_TXCTLALL_2:
226         case R200_EMIT_PP_TXCTLALL_3:
227         case R200_EMIT_PP_TXCTLALL_4:
228         case R200_EMIT_PP_TXCTLALL_5:
229                 /* These packets don't contain memory offsets */
230                 break;
231
232         default:
233                 DRM_ERROR("Unknown state packet ID %d\n", id);
234                 return DRM_ERR(EINVAL);
235         }
236
237         return 0;
238 }
239
240 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
241                                                      dev_priv,
242                                                      drm_file_t * filp_priv,
243                                                      drm_radeon_kcmd_buffer_t *cmdbuf,
244                                                      unsigned int *cmdsz)
245 {
246         u32 *cmd = (u32 *) cmdbuf->buf;
247
248         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
249
250         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
251                 DRM_ERROR("Not a type 3 packet\n");
252                 return DRM_ERR(EINVAL);
253         }
254
255         if (4 * *cmdsz > cmdbuf->bufsz) {
256                 DRM_ERROR("Packet size larger than size of data provided\n");
257                 return DRM_ERR(EINVAL);
258         }
259
260         /* Check client state and fix it up if necessary */
261         if (cmd[0] & 0x8000) {  /* MSB of opcode: next DWORD GUI_CNTL */
262                 u32 offset;
263
264                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
265                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
266                         offset = cmd[2] << 10;
267                         if (radeon_check_and_fixup_offset
268                             (dev_priv, filp_priv, &offset)) {
269                                 DRM_ERROR("Invalid first packet offset\n");
270                                 return DRM_ERR(EINVAL);
271                         }
272                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
273                 }
274
275                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
276                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
277                         offset = cmd[3] << 10;
278                         if (radeon_check_and_fixup_offset
279                             (dev_priv, filp_priv, &offset)) {
280                                 DRM_ERROR("Invalid second packet offset\n");
281                                 return DRM_ERR(EINVAL);
282                         }
283                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
284                 }
285         }
286
287         return 0;
288 }
289
290 /* ================================================================
291  * CP hardware state programming functions
292  */
293
294 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
295                                              drm_clip_rect_t * box)
296 {
297         RING_LOCALS;
298
299         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
300                   box->x1, box->y1, box->x2, box->y2);
301
302         BEGIN_RING(4);
303         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
304         OUT_RING((box->y1 << 16) | box->x1);
305         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
306         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
307         ADVANCE_RING();
308 }
309
310 /* Emit 1.1 state
311  */
312 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
313                              drm_file_t * filp_priv,
314                              drm_radeon_context_regs_t * ctx,
315                              drm_radeon_texture_regs_t * tex,
316                              unsigned int dirty)
317 {
318         RING_LOCALS;
319         DRM_DEBUG("dirty=0x%08x\n", dirty);
320
321         if (dirty & RADEON_UPLOAD_CONTEXT) {
322                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
323                                                   &ctx->rb3d_depthoffset)) {
324                         DRM_ERROR("Invalid depth buffer offset\n");
325                         return DRM_ERR(EINVAL);
326                 }
327
328                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
329                                                   &ctx->rb3d_coloroffset)) {
330                         DRM_ERROR("Invalid depth buffer offset\n");
331                         return DRM_ERR(EINVAL);
332                 }
333
334                 BEGIN_RING(14);
335                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
336                 OUT_RING(ctx->pp_misc);
337                 OUT_RING(ctx->pp_fog_color);
338                 OUT_RING(ctx->re_solid_color);
339                 OUT_RING(ctx->rb3d_blendcntl);
340                 OUT_RING(ctx->rb3d_depthoffset);
341                 OUT_RING(ctx->rb3d_depthpitch);
342                 OUT_RING(ctx->rb3d_zstencilcntl);
343                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
344                 OUT_RING(ctx->pp_cntl);
345                 OUT_RING(ctx->rb3d_cntl);
346                 OUT_RING(ctx->rb3d_coloroffset);
347                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
348                 OUT_RING(ctx->rb3d_colorpitch);
349                 ADVANCE_RING();
350         }
351
352         if (dirty & RADEON_UPLOAD_VERTFMT) {
353                 BEGIN_RING(2);
354                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
355                 OUT_RING(ctx->se_coord_fmt);
356                 ADVANCE_RING();
357         }
358
359         if (dirty & RADEON_UPLOAD_LINE) {
360                 BEGIN_RING(5);
361                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
362                 OUT_RING(ctx->re_line_pattern);
363                 OUT_RING(ctx->re_line_state);
364                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
365                 OUT_RING(ctx->se_line_width);
366                 ADVANCE_RING();
367         }
368
369         if (dirty & RADEON_UPLOAD_BUMPMAP) {
370                 BEGIN_RING(5);
371                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
372                 OUT_RING(ctx->pp_lum_matrix);
373                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
374                 OUT_RING(ctx->pp_rot_matrix_0);
375                 OUT_RING(ctx->pp_rot_matrix_1);
376                 ADVANCE_RING();
377         }
378
379         if (dirty & RADEON_UPLOAD_MASKS) {
380                 BEGIN_RING(4);
381                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
382                 OUT_RING(ctx->rb3d_stencilrefmask);
383                 OUT_RING(ctx->rb3d_ropcntl);
384                 OUT_RING(ctx->rb3d_planemask);
385                 ADVANCE_RING();
386         }
387
388         if (dirty & RADEON_UPLOAD_VIEWPORT) {
389                 BEGIN_RING(7);
390                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
391                 OUT_RING(ctx->se_vport_xscale);
392                 OUT_RING(ctx->se_vport_xoffset);
393                 OUT_RING(ctx->se_vport_yscale);
394                 OUT_RING(ctx->se_vport_yoffset);
395                 OUT_RING(ctx->se_vport_zscale);
396                 OUT_RING(ctx->se_vport_zoffset);
397                 ADVANCE_RING();
398         }
399
400         if (dirty & RADEON_UPLOAD_SETUP) {
401                 BEGIN_RING(4);
402                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
403                 OUT_RING(ctx->se_cntl);
404                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
405                 OUT_RING(ctx->se_cntl_status);
406                 ADVANCE_RING();
407         }
408
409         if (dirty & RADEON_UPLOAD_MISC) {
410                 BEGIN_RING(2);
411                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
412                 OUT_RING(ctx->re_misc);
413                 ADVANCE_RING();
414         }
415
416         if (dirty & RADEON_UPLOAD_TEX0) {
417                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
418                                                   &tex[0].pp_txoffset)) {
419                         DRM_ERROR("Invalid texture offset for unit 0\n");
420                         return DRM_ERR(EINVAL);
421                 }
422
423                 BEGIN_RING(9);
424                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
425                 OUT_RING(tex[0].pp_txfilter);
426                 OUT_RING(tex[0].pp_txformat);
427                 OUT_RING(tex[0].pp_txoffset);
428                 OUT_RING(tex[0].pp_txcblend);
429                 OUT_RING(tex[0].pp_txablend);
430                 OUT_RING(tex[0].pp_tfactor);
431                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
432                 OUT_RING(tex[0].pp_border_color);
433                 ADVANCE_RING();
434         }
435
436         if (dirty & RADEON_UPLOAD_TEX1) {
437                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
438                                                   &tex[1].pp_txoffset)) {
439                         DRM_ERROR("Invalid texture offset for unit 1\n");
440                         return DRM_ERR(EINVAL);
441                 }
442
443                 BEGIN_RING(9);
444                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
445                 OUT_RING(tex[1].pp_txfilter);
446                 OUT_RING(tex[1].pp_txformat);
447                 OUT_RING(tex[1].pp_txoffset);
448                 OUT_RING(tex[1].pp_txcblend);
449                 OUT_RING(tex[1].pp_txablend);
450                 OUT_RING(tex[1].pp_tfactor);
451                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
452                 OUT_RING(tex[1].pp_border_color);
453                 ADVANCE_RING();
454         }
455
456         if (dirty & RADEON_UPLOAD_TEX2) {
457                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
458                                                   &tex[2].pp_txoffset)) {
459                         DRM_ERROR("Invalid texture offset for unit 2\n");
460                         return DRM_ERR(EINVAL);
461                 }
462
463                 BEGIN_RING(9);
464                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
465                 OUT_RING(tex[2].pp_txfilter);
466                 OUT_RING(tex[2].pp_txformat);
467                 OUT_RING(tex[2].pp_txoffset);
468                 OUT_RING(tex[2].pp_txcblend);
469                 OUT_RING(tex[2].pp_txablend);
470                 OUT_RING(tex[2].pp_tfactor);
471                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
472                 OUT_RING(tex[2].pp_border_color);
473                 ADVANCE_RING();
474         }
475
476         return 0;
477 }
478
479 /* Emit 1.2 state
480  */
481 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
482                               drm_file_t * filp_priv,
483                               drm_radeon_state_t * state)
484 {
485         RING_LOCALS;
486
487         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
488                 BEGIN_RING(3);
489                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
490                 OUT_RING(state->context2.se_zbias_factor);
491                 OUT_RING(state->context2.se_zbias_constant);
492                 ADVANCE_RING();
493         }
494
495         return radeon_emit_state(dev_priv, filp_priv, &state->context,
496                                  state->tex, state->dirty);
497 }
498
499 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
500  * 1.3 cmdbuffers allow all previous state to be updated as well as
501  * the tcl scalar and vector areas.
502  */
503 static struct {
504         int start;
505         int len;
506         const char *name;
507 } packet[RADEON_MAX_STATE_PACKETS] = {
508         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
509         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
510         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
511         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
512         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
513         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
514         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
515         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
516         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
517         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
518         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
519         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
520         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
521         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
522         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
523         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
524         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
525         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
526         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
527         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
528         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
529                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
530         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
531         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
532         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
533         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
534         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
535         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
536         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
537         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
538         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
539         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
540         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
541         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
542         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
543         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
544         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
545         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
546         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
547         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
548         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
549         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
550         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
551         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
552         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
553         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
554         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
555         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
556         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
557         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
558         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
559         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
560         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
561         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
562         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
563         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
564         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
565         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
566         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
567         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
568         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
569         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
570                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
571         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
572         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"},    /* 62 */
573         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
574         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
575         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
576         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
577         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
578         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
579         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
580         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
581         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
582         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
583         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
584         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
585         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
586         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
587         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
588         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
589         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
590         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
591         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
592         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
593         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
594         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
595         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},   /* 85 */
596         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
597         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
598         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
599         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
600         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
601         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
602         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
603         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
604 };
605
606 /* ================================================================
607  * Performance monitoring functions
608  */
609
610 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
611                              int x, int y, int w, int h, int r, int g, int b)
612 {
613         u32 color;
614         RING_LOCALS;
615
616         x += dev_priv->sarea_priv->boxes[0].x1;
617         y += dev_priv->sarea_priv->boxes[0].y1;
618
619         switch (dev_priv->color_fmt) {
620         case RADEON_COLOR_FORMAT_RGB565:
621                 color = (((r & 0xf8) << 8) |
622                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
623                 break;
624         case RADEON_COLOR_FORMAT_ARGB8888:
625         default:
626                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
627                 break;
628         }
629
630         BEGIN_RING(4);
631         RADEON_WAIT_UNTIL_3D_IDLE();
632         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
633         OUT_RING(0xffffffff);
634         ADVANCE_RING();
635
636         BEGIN_RING(6);
637
638         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
639         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
640                  RADEON_GMC_BRUSH_SOLID_COLOR |
641                  (dev_priv->color_fmt << 8) |
642                  RADEON_GMC_SRC_DATATYPE_COLOR |
643                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
644
645         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
646                 OUT_RING(dev_priv->front_pitch_offset);
647         } else {
648                 OUT_RING(dev_priv->back_pitch_offset);
649         }
650
651         OUT_RING(color);
652
653         OUT_RING((x << 16) | y);
654         OUT_RING((w << 16) | h);
655
656         ADVANCE_RING();
657 }
658
659 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
660 {
661         /* Collapse various things into a wait flag -- trying to
662          * guess if userspase slept -- better just to have them tell us.
663          */
664         if (dev_priv->stats.last_frame_reads > 1 ||
665             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
666                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
667         }
668
669         if (dev_priv->stats.freelist_loops) {
670                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
671         }
672
673         /* Purple box for page flipping
674          */
675         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
676                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
677
678         /* Red box if we have to wait for idle at any point
679          */
680         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
681                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
682
683         /* Blue box: lost context?
684          */
685
686         /* Yellow box for texture swaps
687          */
688         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
689                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
690
691         /* Green box if hardware never idles (as far as we can tell)
692          */
693         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
694                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
695
696         /* Draw bars indicating number of buffers allocated
697          * (not a great measure, easily confused)
698          */
699         if (dev_priv->stats.requested_bufs) {
700                 if (dev_priv->stats.requested_bufs > 100)
701                         dev_priv->stats.requested_bufs = 100;
702
703                 radeon_clear_box(dev_priv, 4, 16,
704                                  dev_priv->stats.requested_bufs, 4,
705                                  196, 128, 128);
706         }
707
708         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
709
710 }
711
712 /* ================================================================
713  * CP command dispatch functions
714  */
715
716 static void radeon_cp_dispatch_clear(drm_device_t * dev,
717                                      drm_radeon_clear_t * clear,
718                                      drm_radeon_clear_rect_t * depth_boxes)
719 {
720         drm_radeon_private_t *dev_priv = dev->dev_private;
721         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
722         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
723         int nbox = sarea_priv->nbox;
724         drm_clip_rect_t *pbox = sarea_priv->boxes;
725         unsigned int flags = clear->flags;
726         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
727         int i;
728         RING_LOCALS;
729         DRM_DEBUG("flags = 0x%x\n", flags);
730
731         dev_priv->stats.clears++;
732
733         if (dev_priv->page_flipping && dev_priv->current_page == 1) {
734                 unsigned int tmp = flags;
735
736                 flags &= ~(RADEON_FRONT | RADEON_BACK);
737                 if (tmp & RADEON_FRONT)
738                         flags |= RADEON_BACK;
739                 if (tmp & RADEON_BACK)
740                         flags |= RADEON_FRONT;
741         }
742
743         if (flags & (RADEON_FRONT | RADEON_BACK)) {
744
745                 BEGIN_RING(4);
746
747                 /* Ensure the 3D stream is idle before doing a
748                  * 2D fill to clear the front or back buffer.
749                  */
750                 RADEON_WAIT_UNTIL_3D_IDLE();
751
752                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
753                 OUT_RING(clear->color_mask);
754
755                 ADVANCE_RING();
756
757                 /* Make sure we restore the 3D state next time.
758                  */
759                 dev_priv->sarea_priv->ctx_owner = 0;
760
761                 for (i = 0; i < nbox; i++) {
762                         int x = pbox[i].x1;
763                         int y = pbox[i].y1;
764                         int w = pbox[i].x2 - x;
765                         int h = pbox[i].y2 - y;
766
767                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
768                                   x, y, w, h, flags);
769
770                         if (flags & RADEON_FRONT) {
771                                 BEGIN_RING(6);
772
773                                 OUT_RING(CP_PACKET3
774                                          (RADEON_CNTL_PAINT_MULTI, 4));
775                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776                                          RADEON_GMC_BRUSH_SOLID_COLOR |
777                                          (dev_priv->
778                                           color_fmt << 8) |
779                                          RADEON_GMC_SRC_DATATYPE_COLOR |
780                                          RADEON_ROP3_P |
781                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
782
783                                 OUT_RING(dev_priv->front_pitch_offset);
784                                 OUT_RING(clear->clear_color);
785
786                                 OUT_RING((x << 16) | y);
787                                 OUT_RING((w << 16) | h);
788
789                                 ADVANCE_RING();
790                         }
791
792                         if (flags & RADEON_BACK) {
793                                 BEGIN_RING(6);
794
795                                 OUT_RING(CP_PACKET3
796                                          (RADEON_CNTL_PAINT_MULTI, 4));
797                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798                                          RADEON_GMC_BRUSH_SOLID_COLOR |
799                                          (dev_priv->
800                                           color_fmt << 8) |
801                                          RADEON_GMC_SRC_DATATYPE_COLOR |
802                                          RADEON_ROP3_P |
803                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
804
805                                 OUT_RING(dev_priv->back_pitch_offset);
806                                 OUT_RING(clear->clear_color);
807
808                                 OUT_RING((x << 16) | y);
809                                 OUT_RING((w << 16) | h);
810
811                                 ADVANCE_RING();
812                         }
813                 }
814         }
815
816         /* hyper z clear */
817         /* no docs available, based on reverse engeneering by Stephane Marchesin */
818         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
819             && (flags & RADEON_CLEAR_FASTZ)) {
820
821                 int i;
822                 int depthpixperline =
823                     dev_priv->depth_fmt ==
824                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
825                                                        2) : (dev_priv->
826                                                              depth_pitch / 4);
827
828                 u32 clearmask;
829
830                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
831                     ((clear->depth_mask & 0xff) << 24);
832
833                 /* Make sure we restore the 3D state next time.
834                  * we haven't touched any "normal" state - still need this?
835                  */
836                 dev_priv->sarea_priv->ctx_owner = 0;
837
838                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
839                     && (flags & RADEON_USE_HIERZ)) {
840                         /* FIXME : reverse engineer that for Rx00 cards */
841                         /* FIXME : the mask supposedly contains low-res z values. So can't set
842                            just to the max (0xff? or actually 0x3fff?), need to take z clear
843                            value into account? */
844                         /* pattern seems to work for r100, though get slight
845                            rendering errors with glxgears. If hierz is not enabled for r100,
846                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
847                            other ones are ignored, and the same clear mask can be used. That's
848                            very different behaviour than R200 which needs different clear mask
849                            and different number of tiles to clear if hierz is enabled or not !?!
850                          */
851                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
852                 } else {
853                         /* clear mask : chooses the clearing pattern.
854                            rv250: could be used to clear only parts of macrotiles
855                            (but that would get really complicated...)?
856                            bit 0 and 1 (either or both of them ?!?!) are used to
857                            not clear tile (or maybe one of the bits indicates if the tile is
858                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
859                            Pattern is as follows:
860                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
861                            bits -------------------------------------------------
862                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
863                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
864                            covers 256 pixels ?!?
865                          */
866                         clearmask = 0x0;
867                 }
868
869                 BEGIN_RING(8);
870                 RADEON_WAIT_UNTIL_2D_IDLE();
871                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
872                              tempRB3D_DEPTHCLEARVALUE);
873                 /* what offset is this exactly ? */
874                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
875                 /* need ctlstat, otherwise get some strange black flickering */
876                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
877                              RADEON_RB3D_ZC_FLUSH_ALL);
878                 ADVANCE_RING();
879
880                 for (i = 0; i < nbox; i++) {
881                         int tileoffset, nrtilesx, nrtilesy, j;
882                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
883                         if ((dev_priv->flags & CHIP_HAS_HIERZ)
884                             && !(dev_priv->microcode_version == UCODE_R200)) {
885                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
886                                    maybe r200 actually doesn't need to put the low-res z value into
887                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
888                                    Works for R100, both with hierz and without.
889                                    R100 seems to operate on 2x1 8x8 tiles, but...
890                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
891                                    problematic with resolutions which are not 64 pix aligned? */
892                                 tileoffset =
893                                     ((pbox[i].y1 >> 3) * depthpixperline +
894                                      pbox[i].x1) >> 6;
895                                 nrtilesx =
896                                     ((pbox[i].x2 & ~63) -
897                                      (pbox[i].x1 & ~63)) >> 4;
898                                 nrtilesy =
899                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
900                                 for (j = 0; j <= nrtilesy; j++) {
901                                         BEGIN_RING(4);
902                                         OUT_RING(CP_PACKET3
903                                                  (RADEON_3D_CLEAR_ZMASK, 2));
904                                         /* first tile */
905                                         OUT_RING(tileoffset * 8);
906                                         /* the number of tiles to clear */
907                                         OUT_RING(nrtilesx + 4);
908                                         /* clear mask : chooses the clearing pattern. */
909                                         OUT_RING(clearmask);
910                                         ADVANCE_RING();
911                                         tileoffset += depthpixperline >> 6;
912                                 }
913                         } else if (dev_priv->microcode_version == UCODE_R200) {
914                                 /* works for rv250. */
915                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
916                                 tileoffset =
917                                     ((pbox[i].y1 >> 3) * depthpixperline +
918                                      pbox[i].x1) >> 5;
919                                 nrtilesx =
920                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
921                                 nrtilesy =
922                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
923                                 for (j = 0; j <= nrtilesy; j++) {
924                                         BEGIN_RING(4);
925                                         OUT_RING(CP_PACKET3
926                                                  (RADEON_3D_CLEAR_ZMASK, 2));
927                                         /* first tile */
928                                         /* judging by the first tile offset needed, could possibly
929                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
930                                            macro tiles, though would still need clear mask for
931                                            right/bottom if truely 4x4 granularity is desired ? */
932                                         OUT_RING(tileoffset * 16);
933                                         /* the number of tiles to clear */
934                                         OUT_RING(nrtilesx + 1);
935                                         /* clear mask : chooses the clearing pattern. */
936                                         OUT_RING(clearmask);
937                                         ADVANCE_RING();
938                                         tileoffset += depthpixperline >> 5;
939                                 }
940                         } else {        /* rv 100 */
941                                 /* rv100 might not need 64 pix alignment, who knows */
942                                 /* offsets are, hmm, weird */
943                                 tileoffset =
944                                     ((pbox[i].y1 >> 4) * depthpixperline +
945                                      pbox[i].x1) >> 6;
946                                 nrtilesx =
947                                     ((pbox[i].x2 & ~63) -
948                                      (pbox[i].x1 & ~63)) >> 4;
949                                 nrtilesy =
950                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
951                                 for (j = 0; j <= nrtilesy; j++) {
952                                         BEGIN_RING(4);
953                                         OUT_RING(CP_PACKET3
954                                                  (RADEON_3D_CLEAR_ZMASK, 2));
955                                         OUT_RING(tileoffset * 128);
956                                         /* the number of tiles to clear */
957                                         OUT_RING(nrtilesx + 4);
958                                         /* clear mask : chooses the clearing pattern. */
959                                         OUT_RING(clearmask);
960                                         ADVANCE_RING();
961                                         tileoffset += depthpixperline >> 6;
962                                 }
963                         }
964                 }
965
966                 /* TODO don't always clear all hi-level z tiles */
967                 if ((dev_priv->flags & CHIP_HAS_HIERZ)
968                     && (dev_priv->microcode_version == UCODE_R200)
969                     && (flags & RADEON_USE_HIERZ))
970                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
971                         /* FIXME : the mask supposedly contains low-res z values. So can't set
972                            just to the max (0xff? or actually 0x3fff?), need to take z clear
973                            value into account? */
974                 {
975                         BEGIN_RING(4);
976                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
977                         OUT_RING(0x0);  /* First tile */
978                         OUT_RING(0x3cc0);
979                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
980                         ADVANCE_RING();
981                 }
982         }
983
984         /* We have to clear the depth and/or stencil buffers by
985          * rendering a quad into just those buffers.  Thus, we have to
986          * make sure the 3D engine is configured correctly.
987          */
988         if ((dev_priv->microcode_version == UCODE_R200) &&
989             (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
990
991                 int tempPP_CNTL;
992                 int tempRE_CNTL;
993                 int tempRB3D_CNTL;
994                 int tempRB3D_ZSTENCILCNTL;
995                 int tempRB3D_STENCILREFMASK;
996                 int tempRB3D_PLANEMASK;
997                 int tempSE_CNTL;
998                 int tempSE_VTE_CNTL;
999                 int tempSE_VTX_FMT_0;
1000                 int tempSE_VTX_FMT_1;
1001                 int tempSE_VAP_CNTL;
1002                 int tempRE_AUX_SCISSOR_CNTL;
1003
1004                 tempPP_CNTL = 0;
1005                 tempRE_CNTL = 0;
1006
1007                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1008
1009                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1010                 tempRB3D_STENCILREFMASK = 0x0;
1011
1012                 tempSE_CNTL = depth_clear->se_cntl;
1013
1014                 /* Disable TCL */
1015
1016                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
1017                                           (0x9 <<
1018                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1019
1020                 tempRB3D_PLANEMASK = 0x0;
1021
1022                 tempRE_AUX_SCISSOR_CNTL = 0x0;
1023
1024                 tempSE_VTE_CNTL =
1025                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1026
1027                 /* Vertex format (X, Y, Z, W) */
1028                 tempSE_VTX_FMT_0 =
1029                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1030                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1031                 tempSE_VTX_FMT_1 = 0x0;
1032
1033                 /*
1034                  * Depth buffer specific enables
1035                  */
1036                 if (flags & RADEON_DEPTH) {
1037                         /* Enable depth buffer */
1038                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
1039                 } else {
1040                         /* Disable depth buffer */
1041                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1042                 }
1043
1044                 /*
1045                  * Stencil buffer specific enables
1046                  */
1047                 if (flags & RADEON_STENCIL) {
1048                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1049                         tempRB3D_STENCILREFMASK = clear->depth_mask;
1050                 } else {
1051                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1052                         tempRB3D_STENCILREFMASK = 0x00000000;
1053                 }
1054
1055                 if (flags & RADEON_USE_COMP_ZBUF) {
1056                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1057                             RADEON_Z_DECOMPRESSION_ENABLE;
1058                 }
1059                 if (flags & RADEON_USE_HIERZ) {
1060                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1061                 }
1062
1063                 BEGIN_RING(26);
1064                 RADEON_WAIT_UNTIL_2D_IDLE();
1065
1066                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1067                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1068                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1069                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1070                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1071                              tempRB3D_STENCILREFMASK);
1072                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1073                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1074                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1075                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1076                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1077                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1078                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1079                 ADVANCE_RING();
1080
1081                 /* Make sure we restore the 3D state next time.
1082                  */
1083                 dev_priv->sarea_priv->ctx_owner = 0;
1084
1085                 for (i = 0; i < nbox; i++) {
1086
1087                         /* Funny that this should be required --
1088                          *  sets top-left?
1089                          */
1090                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1091
1092                         BEGIN_RING(14);
1093                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1094                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1095                                   RADEON_PRIM_WALK_RING |
1096                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1097                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1098                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1099                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1100                         OUT_RING(0x3f800000);
1101                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1102                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1103                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1104                         OUT_RING(0x3f800000);
1105                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1106                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1107                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1108                         OUT_RING(0x3f800000);
1109                         ADVANCE_RING();
1110                 }
1111         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1112
1113                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1114
1115                 rb3d_cntl = depth_clear->rb3d_cntl;
1116
1117                 if (flags & RADEON_DEPTH) {
1118                         rb3d_cntl |= RADEON_Z_ENABLE;
1119                 } else {
1120                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1121                 }
1122
1123                 if (flags & RADEON_STENCIL) {
1124                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
1125                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
1126                 } else {
1127                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1128                         rb3d_stencilrefmask = 0x00000000;
1129                 }
1130
1131                 if (flags & RADEON_USE_COMP_ZBUF) {
1132                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1133                             RADEON_Z_DECOMPRESSION_ENABLE;
1134                 }
1135                 if (flags & RADEON_USE_HIERZ) {
1136                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1137                 }
1138
1139                 BEGIN_RING(13);
1140                 RADEON_WAIT_UNTIL_2D_IDLE();
1141
1142                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1143                 OUT_RING(0x00000000);
1144                 OUT_RING(rb3d_cntl);
1145
1146                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1147                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1148                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1149                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1150                 ADVANCE_RING();
1151
1152                 /* Make sure we restore the 3D state next time.
1153                  */
1154                 dev_priv->sarea_priv->ctx_owner = 0;
1155
1156                 for (i = 0; i < nbox; i++) {
1157
1158                         /* Funny that this should be required --
1159                          *  sets top-left?
1160                          */
1161                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1162
1163                         BEGIN_RING(15);
1164
1165                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1166                         OUT_RING(RADEON_VTX_Z_PRESENT |
1167                                  RADEON_VTX_PKCOLOR_PRESENT);
1168                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1169                                   RADEON_PRIM_WALK_RING |
1170                                   RADEON_MAOS_ENABLE |
1171                                   RADEON_VTX_FMT_RADEON_MODE |
1172                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
1173
1174                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1175                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1176                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1177                         OUT_RING(0x0);
1178
1179                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1180                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1181                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1182                         OUT_RING(0x0);
1183
1184                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1185                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1186                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1187                         OUT_RING(0x0);
1188
1189                         ADVANCE_RING();
1190                 }
1191         }
1192
1193         /* Increment the clear counter.  The client-side 3D driver must
1194          * wait on this value before performing the clear ioctl.  We
1195          * need this because the card's so damned fast...
1196          */
1197         dev_priv->sarea_priv->last_clear++;
1198
1199         BEGIN_RING(4);
1200
1201         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1202         RADEON_WAIT_UNTIL_IDLE();
1203
1204         ADVANCE_RING();
1205 }
1206
1207 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1208 {
1209         drm_radeon_private_t *dev_priv = dev->dev_private;
1210         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1211         int nbox = sarea_priv->nbox;
1212         drm_clip_rect_t *pbox = sarea_priv->boxes;
1213         int i;
1214         RING_LOCALS;
1215         DRM_DEBUG("\n");
1216
1217         /* Do some trivial performance monitoring...
1218          */
1219         if (dev_priv->do_boxes)
1220                 radeon_cp_performance_boxes(dev_priv);
1221
1222         /* Wait for the 3D stream to idle before dispatching the bitblt.
1223          * This will prevent data corruption between the two streams.
1224          */
1225         BEGIN_RING(2);
1226
1227         RADEON_WAIT_UNTIL_3D_IDLE();
1228
1229         ADVANCE_RING();
1230
1231         for (i = 0; i < nbox; i++) {
1232                 int x = pbox[i].x1;
1233                 int y = pbox[i].y1;
1234                 int w = pbox[i].x2 - x;
1235                 int h = pbox[i].y2 - y;
1236
1237                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1238
1239                 BEGIN_RING(7);
1240
1241                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1242                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1243                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1244                          RADEON_GMC_BRUSH_NONE |
1245                          (dev_priv->color_fmt << 8) |
1246                          RADEON_GMC_SRC_DATATYPE_COLOR |
1247                          RADEON_ROP3_S |
1248                          RADEON_DP_SRC_SOURCE_MEMORY |
1249                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1250
1251                 /* Make this work even if front & back are flipped:
1252                  */
1253                 if (dev_priv->current_page == 0) {
1254                         OUT_RING(dev_priv->back_pitch_offset);
1255                         OUT_RING(dev_priv->front_pitch_offset);
1256                 } else {
1257                         OUT_RING(dev_priv->front_pitch_offset);
1258                         OUT_RING(dev_priv->back_pitch_offset);
1259                 }
1260
1261                 OUT_RING((x << 16) | y);
1262                 OUT_RING((x << 16) | y);
1263                 OUT_RING((w << 16) | h);
1264
1265                 ADVANCE_RING();
1266         }
1267
1268         /* Increment the frame counter.  The client-side 3D driver must
1269          * throttle the framerate by waiting for this value before
1270          * performing the swapbuffer ioctl.
1271          */
1272         dev_priv->sarea_priv->last_frame++;
1273
1274         BEGIN_RING(4);
1275
1276         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1277         RADEON_WAIT_UNTIL_2D_IDLE();
1278
1279         ADVANCE_RING();
1280 }
1281
1282 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1283 {
1284         drm_radeon_private_t *dev_priv = dev->dev_private;
1285         drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1286         int offset = (dev_priv->current_page == 1)
1287             ? dev_priv->front_offset : dev_priv->back_offset;
1288         RING_LOCALS;
1289         DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1290                   __FUNCTION__,
1291                   dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1292
1293         /* Do some trivial performance monitoring...
1294          */
1295         if (dev_priv->do_boxes) {
1296                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1297                 radeon_cp_performance_boxes(dev_priv);
1298         }
1299
1300         /* Update the frame offsets for both CRTCs
1301          */
1302         BEGIN_RING(6);
1303
1304         RADEON_WAIT_UNTIL_3D_IDLE();
1305         OUT_RING_REG(RADEON_CRTC_OFFSET,
1306                      ((sarea->frame.y * dev_priv->front_pitch +
1307                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1308                      + offset);
1309         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1310                      + offset);
1311
1312         ADVANCE_RING();
1313
1314         /* Increment the frame counter.  The client-side 3D driver must
1315          * throttle the framerate by waiting for this value before
1316          * performing the swapbuffer ioctl.
1317          */
1318         dev_priv->sarea_priv->last_frame++;
1319         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1320             1 - dev_priv->current_page;
1321
1322         BEGIN_RING(2);
1323
1324         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1325
1326         ADVANCE_RING();
1327 }
1328
1329 static int bad_prim_vertex_nr(int primitive, int nr)
1330 {
1331         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1332         case RADEON_PRIM_TYPE_NONE:
1333         case RADEON_PRIM_TYPE_POINT:
1334                 return nr < 1;
1335         case RADEON_PRIM_TYPE_LINE:
1336                 return (nr & 1) || nr == 0;
1337         case RADEON_PRIM_TYPE_LINE_STRIP:
1338                 return nr < 2;
1339         case RADEON_PRIM_TYPE_TRI_LIST:
1340         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1341         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1342         case RADEON_PRIM_TYPE_RECT_LIST:
1343                 return nr % 3 || nr == 0;
1344         case RADEON_PRIM_TYPE_TRI_FAN:
1345         case RADEON_PRIM_TYPE_TRI_STRIP:
1346                 return nr < 3;
1347         default:
1348                 return 1;
1349         }
1350 }
1351
1352 typedef struct {
1353         unsigned int start;
1354         unsigned int finish;
1355         unsigned int prim;
1356         unsigned int numverts;
1357         unsigned int offset;
1358         unsigned int vc_format;
1359 } drm_radeon_tcl_prim_t;
1360
1361 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1362                                       drm_buf_t * buf,
1363                                       drm_radeon_tcl_prim_t * prim)
1364 {
1365         drm_radeon_private_t *dev_priv = dev->dev_private;
1366         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1367         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1368         int numverts = (int)prim->numverts;
1369         int nbox = sarea_priv->nbox;
1370         int i = 0;
1371         RING_LOCALS;
1372
1373         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1374                   prim->prim,
1375                   prim->vc_format, prim->start, prim->finish, prim->numverts);
1376
1377         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1378                 DRM_ERROR("bad prim %x numverts %d\n",
1379                           prim->prim, prim->numverts);
1380                 return;
1381         }
1382
1383         do {
1384                 /* Emit the next cliprect */
1385                 if (i < nbox) {
1386                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1387                 }
1388
1389                 /* Emit the vertex buffer rendering commands */
1390                 BEGIN_RING(5);
1391
1392                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1393                 OUT_RING(offset);
1394                 OUT_RING(numverts);
1395                 OUT_RING(prim->vc_format);
1396                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1397                          RADEON_COLOR_ORDER_RGBA |
1398                          RADEON_VTX_FMT_RADEON_MODE |
1399                          (numverts << RADEON_NUM_VERTICES_SHIFT));
1400
1401                 ADVANCE_RING();
1402
1403                 i++;
1404         } while (i < nbox);
1405 }
1406
1407 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1408 {
1409         drm_radeon_private_t *dev_priv = dev->dev_private;
1410         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1411         RING_LOCALS;
1412
1413         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1414
1415         /* Emit the vertex buffer age */
1416         BEGIN_RING(2);
1417         RADEON_DISPATCH_AGE(buf_priv->age);
1418         ADVANCE_RING();
1419
1420         buf->pending = 1;
1421         buf->used = 0;
1422 }
1423
1424 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1425                                         drm_buf_t * buf, int start, int end)
1426 {
1427         drm_radeon_private_t *dev_priv = dev->dev_private;
1428         RING_LOCALS;
1429         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1430
1431         if (start != end) {
1432                 int offset = (dev_priv->gart_buffers_offset
1433                               + buf->offset + start);
1434                 int dwords = (end - start + 3) / sizeof(u32);
1435
1436                 /* Indirect buffer data must be an even number of
1437                  * dwords, so if we've been given an odd number we must
1438                  * pad the data with a Type-2 CP packet.
1439                  */
1440                 if (dwords & 1) {
1441                         u32 *data = (u32 *)
1442                             ((char *)dev->agp_buffer_map->handle
1443                              + buf->offset + start);
1444                         data[dwords++] = RADEON_CP_PACKET2;
1445                 }
1446
1447                 /* Fire off the indirect buffer */
1448                 BEGIN_RING(3);
1449
1450                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1451                 OUT_RING(offset);
1452                 OUT_RING(dwords);
1453
1454                 ADVANCE_RING();
1455         }
1456 }
1457
1458 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1459                                        drm_buf_t * elt_buf,
1460                                        drm_radeon_tcl_prim_t * prim)
1461 {
1462         drm_radeon_private_t *dev_priv = dev->dev_private;
1463         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1464         int offset = dev_priv->gart_buffers_offset + prim->offset;
1465         u32 *data;
1466         int dwords;
1467         int i = 0;
1468         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1469         int count = (prim->finish - start) / sizeof(u16);
1470         int nbox = sarea_priv->nbox;
1471
1472         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1473                   prim->prim,
1474                   prim->vc_format,
1475                   prim->start, prim->finish, prim->offset, prim->numverts);
1476
1477         if (bad_prim_vertex_nr(prim->prim, count)) {
1478                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1479                 return;
1480         }
1481
1482         if (start >= prim->finish || (prim->start & 0x7)) {
1483                 DRM_ERROR("buffer prim %d\n", prim->prim);
1484                 return;
1485         }
1486
1487         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1488
1489         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1490                         elt_buf->offset + prim->start);
1491
1492         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1493         data[1] = offset;
1494         data[2] = prim->numverts;
1495         data[3] = prim->vc_format;
1496         data[4] = (prim->prim |
1497                    RADEON_PRIM_WALK_IND |
1498                    RADEON_COLOR_ORDER_RGBA |
1499                    RADEON_VTX_FMT_RADEON_MODE |
1500                    (count << RADEON_NUM_VERTICES_SHIFT));
1501
1502         do {
1503                 if (i < nbox)
1504                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1505
1506                 radeon_cp_dispatch_indirect(dev, elt_buf,
1507                                             prim->start, prim->finish);
1508
1509                 i++;
1510         } while (i < nbox);
1511
1512 }
1513
1514 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1515
1516 static int radeon_cp_dispatch_texture(DRMFILE filp,
1517                                       drm_device_t * dev,
1518                                       drm_radeon_texture_t * tex,
1519                                       drm_radeon_tex_image_t * image)
1520 {
1521         drm_radeon_private_t *dev_priv = dev->dev_private;
1522         drm_file_t *filp_priv;
1523         drm_buf_t *buf;
1524         u32 format;
1525         u32 *buffer;
1526         const u8 __user *data;
1527         int size, dwords, tex_width, blit_width, spitch;
1528         u32 height;
1529         int i;
1530         u32 texpitch, microtile;
1531         u32 offset;
1532         RING_LOCALS;
1533
1534         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1535
1536         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1537                 DRM_ERROR("Invalid destination offset\n");
1538                 return DRM_ERR(EINVAL);
1539         }
1540
1541         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1542
1543         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1544          * up with the texture data from the host data blit, otherwise
1545          * part of the texture image may be corrupted.
1546          */
1547         BEGIN_RING(4);
1548         RADEON_FLUSH_CACHE();
1549         RADEON_WAIT_UNTIL_IDLE();
1550         ADVANCE_RING();
1551
1552         /* The compiler won't optimize away a division by a variable,
1553          * even if the only legal values are powers of two.  Thus, we'll
1554          * use a shift instead.
1555          */
1556         switch (tex->format) {
1557         case RADEON_TXFORMAT_ARGB8888:
1558         case RADEON_TXFORMAT_RGBA8888:
1559                 format = RADEON_COLOR_FORMAT_ARGB8888;
1560                 tex_width = tex->width * 4;
1561                 blit_width = image->width * 4;
1562                 break;
1563         case RADEON_TXFORMAT_AI88:
1564         case RADEON_TXFORMAT_ARGB1555:
1565         case RADEON_TXFORMAT_RGB565:
1566         case RADEON_TXFORMAT_ARGB4444:
1567         case RADEON_TXFORMAT_VYUY422:
1568         case RADEON_TXFORMAT_YVYU422:
1569                 format = RADEON_COLOR_FORMAT_RGB565;
1570                 tex_width = tex->width * 2;
1571                 blit_width = image->width * 2;
1572                 break;
1573         case RADEON_TXFORMAT_I8:
1574         case RADEON_TXFORMAT_RGB332:
1575                 format = RADEON_COLOR_FORMAT_CI8;
1576                 tex_width = tex->width * 1;
1577                 blit_width = image->width * 1;
1578                 break;
1579         default:
1580                 DRM_ERROR("invalid texture format %d\n", tex->format);
1581                 return DRM_ERR(EINVAL);
1582         }
1583         spitch = blit_width >> 6;
1584         if (spitch == 0 && image->height > 1)
1585                 return DRM_ERR(EINVAL);
1586
1587         texpitch = tex->pitch;
1588         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1589                 microtile = 1;
1590                 if (tex_width < 64) {
1591                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1592                         /* we got tiled coordinates, untile them */
1593                         image->x *= 2;
1594                 }
1595         } else
1596                 microtile = 0;
1597
1598         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1599
1600         do {
1601                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1602                           tex->offset >> 10, tex->pitch, tex->format,
1603                           image->x, image->y, image->width, image->height);
1604
1605                 /* Make a copy of some parameters in case we have to
1606                  * update them for a multi-pass texture blit.
1607                  */
1608                 height = image->height;
1609                 data = (const u8 __user *)image->data;
1610
1611                 size = height * blit_width;
1612
1613                 if (size > RADEON_MAX_TEXTURE_SIZE) {
1614                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1615                         size = height * blit_width;
1616                 } else if (size < 4 && size > 0) {
1617                         size = 4;
1618                 } else if (size == 0) {
1619                         return 0;
1620                 }
1621
1622                 buf = radeon_freelist_get(dev);
1623                 if (0 && !buf) {
1624                         radeon_do_cp_idle(dev_priv);
1625                         buf = radeon_freelist_get(dev);
1626                 }
1627                 if (!buf) {
1628                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1629                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1630                                 return DRM_ERR(EFAULT);
1631                         return DRM_ERR(EAGAIN);
1632                 }
1633
1634                 /* Dispatch the indirect buffer.
1635                  */
1636                 buffer =
1637                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1638                 dwords = size / 4;
1639
1640                 if (microtile) {
1641                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1642                            however, we cannot use blitter directly for texture width < 64 bytes,
1643                            since minimum tex pitch is 64 bytes and we need this to match
1644                            the texture width, otherwise the blitter will tile it wrong.
1645                            Thus, tiling manually in this case. Additionally, need to special
1646                            case tex height = 1, since our actual image will have height 2
1647                            and we need to ensure we don't read beyond the texture size
1648                            from user space. */
1649                         if (tex->height == 1) {
1650                                 if (tex_width >= 64 || tex_width <= 16) {
1651                                         if (DRM_COPY_FROM_USER(buffer, data,
1652                                                                tex_width *
1653                                                                sizeof(u32))) {
1654                                                 DRM_ERROR
1655                                                     ("EFAULT on pad, %d bytes\n",
1656                                                      tex_width);
1657                                                 return DRM_ERR(EFAULT);
1658                                         }
1659                                 } else if (tex_width == 32) {
1660                                         if (DRM_COPY_FROM_USER
1661                                             (buffer, data, 16)) {
1662                                                 DRM_ERROR
1663                                                     ("EFAULT on pad, %d bytes\n",
1664                                                      tex_width);
1665                                                 return DRM_ERR(EFAULT);
1666                                         }
1667                                         if (DRM_COPY_FROM_USER
1668                                             (buffer + 8, data + 16, 16)) {
1669                                                 DRM_ERROR
1670                                                     ("EFAULT on pad, %d bytes\n",
1671                                                      tex_width);
1672                                                 return DRM_ERR(EFAULT);
1673                                         }
1674                                 }
1675                         } else if (tex_width >= 64 || tex_width == 16) {
1676                                 if (DRM_COPY_FROM_USER(buffer, data,
1677                                                        dwords * sizeof(u32))) {
1678                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1679                                                   dwords);
1680                                         return DRM_ERR(EFAULT);
1681                                 }
1682                         } else if (tex_width < 16) {
1683                                 for (i = 0; i < tex->height; i++) {
1684                                         if (DRM_COPY_FROM_USER
1685                                             (buffer, data, tex_width)) {
1686                                                 DRM_ERROR
1687                                                     ("EFAULT on pad, %d bytes\n",
1688                                                      tex_width);
1689                                                 return DRM_ERR(EFAULT);
1690                                         }
1691                                         buffer += 4;
1692                                         data += tex_width;
1693                                 }
1694                         } else if (tex_width == 32) {
1695                                 /* TODO: make sure this works when not fitting in one buffer
1696                                    (i.e. 32bytes x 2048...) */
1697                                 for (i = 0; i < tex->height; i += 2) {
1698                                         if (DRM_COPY_FROM_USER
1699                                             (buffer, data, 16)) {
1700                                                 DRM_ERROR
1701                                                     ("EFAULT on pad, %d bytes\n",
1702                                                      tex_width);
1703                                                 return DRM_ERR(EFAULT);
1704                                         }
1705                                         data += 16;
1706                                         if (DRM_COPY_FROM_USER
1707                                             (buffer + 8, data, 16)) {
1708                                                 DRM_ERROR
1709                                                     ("EFAULT on pad, %d bytes\n",
1710                                                      tex_width);
1711                                                 return DRM_ERR(EFAULT);
1712                                         }
1713                                         data += 16;
1714                                         if (DRM_COPY_FROM_USER
1715                                             (buffer + 4, data, 16)) {
1716                                                 DRM_ERROR
1717                                                     ("EFAULT on pad, %d bytes\n",
1718                                                      tex_width);
1719                                                 return DRM_ERR(EFAULT);
1720                                         }
1721                                         data += 16;
1722                                         if (DRM_COPY_FROM_USER
1723                                             (buffer + 12, data, 16)) {
1724                                                 DRM_ERROR
1725                                                     ("EFAULT on pad, %d bytes\n",
1726                                                      tex_width);
1727                                                 return DRM_ERR(EFAULT);
1728                                         }
1729                                         data += 16;
1730                                         buffer += 16;
1731                                 }
1732                         }
1733                 } else {
1734                         if (tex_width >= 32) {
1735                                 /* Texture image width is larger than the minimum, so we
1736                                  * can upload it directly.
1737                                  */
1738                                 if (DRM_COPY_FROM_USER(buffer, data,
1739                                                        dwords * sizeof(u32))) {
1740                                         DRM_ERROR("EFAULT on data, %d dwords\n",
1741                                                   dwords);
1742                                         return DRM_ERR(EFAULT);
1743                                 }
1744                         } else {
1745                                 /* Texture image width is less than the minimum, so we
1746                                  * need to pad out each image scanline to the minimum
1747                                  * width.
1748                                  */
1749                                 for (i = 0; i < tex->height; i++) {
1750                                         if (DRM_COPY_FROM_USER
1751                                             (buffer, data, tex_width)) {
1752                                                 DRM_ERROR
1753                                                     ("EFAULT on pad, %d bytes\n",
1754                                                      tex_width);
1755                                                 return DRM_ERR(EFAULT);
1756                                         }
1757                                         buffer += 8;
1758                                         data += tex_width;
1759                                 }
1760                         }
1761                 }
1762
1763                 buf->filp = filp;
1764                 buf->used = size;
1765                 offset = dev_priv->gart_buffers_offset + buf->offset;
1766                 BEGIN_RING(9);
1767                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1768                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1769                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1770                          RADEON_GMC_BRUSH_NONE |
1771                          (format << 8) |
1772                          RADEON_GMC_SRC_DATATYPE_COLOR |
1773                          RADEON_ROP3_S |
1774                          RADEON_DP_SRC_SOURCE_MEMORY |
1775                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1776                 OUT_RING((spitch << 22) | (offset >> 10));
1777                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1778                 OUT_RING(0);
1779                 OUT_RING((image->x << 16) | image->y);
1780                 OUT_RING((image->width << 16) | height);
1781                 RADEON_WAIT_UNTIL_2D_IDLE();
1782                 ADVANCE_RING();
1783
1784                 radeon_cp_discard_buffer(dev, buf);
1785
1786                 /* Update the input parameters for next time */
1787                 image->y += height;
1788                 image->height -= height;
1789                 image->data = (const u8 __user *)image->data + size;
1790         } while (image->height > 0);
1791
1792         /* Flush the pixel cache after the blit completes.  This ensures
1793          * the texture data is written out to memory before rendering
1794          * continues.
1795          */
1796         BEGIN_RING(4);
1797         RADEON_FLUSH_CACHE();
1798         RADEON_WAIT_UNTIL_2D_IDLE();
1799         ADVANCE_RING();
1800         return 0;
1801 }
1802
1803 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1804 {
1805         drm_radeon_private_t *dev_priv = dev->dev_private;
1806         int i;
1807         RING_LOCALS;
1808         DRM_DEBUG("\n");
1809
1810         BEGIN_RING(35);
1811
1812         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1813         OUT_RING(0x00000000);
1814
1815         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1816         for (i = 0; i < 32; i++) {
1817                 OUT_RING(stipple[i]);
1818         }
1819
1820         ADVANCE_RING();
1821 }
1822
1823 static void radeon_apply_surface_regs(int surf_index,
1824                                       drm_radeon_private_t * dev_priv)
1825 {
1826         if (!dev_priv->mmio)
1827                 return;
1828
1829         radeon_do_cp_idle(dev_priv);
1830
1831         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1832                      dev_priv->surfaces[surf_index].flags);
1833         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1834                      dev_priv->surfaces[surf_index].lower);
1835         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1836                      dev_priv->surfaces[surf_index].upper);
1837 }
1838
1839 /* Allocates a virtual surface
1840  * doesn't always allocate a real surface, will stretch an existing
1841  * surface when possible.
1842  *
1843  * Note that refcount can be at most 2, since during a free refcount=3
1844  * might mean we have to allocate a new surface which might not always
1845  * be available.
1846  * For example : we allocate three contigous surfaces ABC. If B is
1847  * freed, we suddenly need two surfaces to store A and C, which might
1848  * not always be available.
1849  */
1850 static int alloc_surface(drm_radeon_surface_alloc_t * new,
1851                          drm_radeon_private_t * dev_priv, DRMFILE filp)
1852 {
1853         struct radeon_virt_surface *s;
1854         int i;
1855         int virt_surface_index;
1856         uint32_t new_upper, new_lower;
1857
1858         new_lower = new->address;
1859         new_upper = new_lower + new->size - 1;
1860
1861         /* sanity check */
1862         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1863             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1864              RADEON_SURF_ADDRESS_FIXED_MASK)
1865             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1866                 return -1;
1867
1868         /* make sure there is no overlap with existing surfaces */
1869         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1870                 if ((dev_priv->surfaces[i].refcount != 0) &&
1871                     (((new_lower >= dev_priv->surfaces[i].lower) &&
1872                       (new_lower < dev_priv->surfaces[i].upper)) ||
1873                      ((new_lower < dev_priv->surfaces[i].lower) &&
1874                       (new_upper > dev_priv->surfaces[i].lower)))) {
1875                         return -1;
1876                 }
1877         }
1878
1879         /* find a virtual surface */
1880         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1881                 if (dev_priv->virt_surfaces[i].filp == 0)
1882                         break;
1883         if (i == 2 * RADEON_MAX_SURFACES) {
1884                 return -1;
1885         }
1886         virt_surface_index = i;
1887
1888         /* try to reuse an existing surface */
1889         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1890                 /* extend before */
1891                 if ((dev_priv->surfaces[i].refcount == 1) &&
1892                     (new->flags == dev_priv->surfaces[i].flags) &&
1893                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1894                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1895                         s->surface_index = i;
1896                         s->lower = new_lower;
1897                         s->upper = new_upper;
1898                         s->flags = new->flags;
1899                         s->filp = filp;
1900                         dev_priv->surfaces[i].refcount++;
1901                         dev_priv->surfaces[i].lower = s->lower;
1902                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1903                         return virt_surface_index;
1904                 }
1905
1906                 /* extend after */
1907                 if ((dev_priv->surfaces[i].refcount == 1) &&
1908                     (new->flags == dev_priv->surfaces[i].flags) &&
1909                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
1910                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1911                         s->surface_index = i;
1912                         s->lower = new_lower;
1913                         s->upper = new_upper;
1914                         s->flags = new->flags;
1915                         s->filp = filp;
1916                         dev_priv->surfaces[i].refcount++;
1917                         dev_priv->surfaces[i].upper = s->upper;
1918                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1919                         return virt_surface_index;
1920                 }
1921         }
1922
1923         /* okay, we need a new one */
1924         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1925                 if (dev_priv->surfaces[i].refcount == 0) {
1926                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1927                         s->surface_index = i;
1928                         s->lower = new_lower;
1929                         s->upper = new_upper;
1930                         s->flags = new->flags;
1931                         s->filp = filp;
1932                         dev_priv->surfaces[i].refcount = 1;
1933                         dev_priv->surfaces[i].lower = s->lower;
1934                         dev_priv->surfaces[i].upper = s->upper;
1935                         dev_priv->surfaces[i].flags = s->flags;
1936                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1937                         return virt_surface_index;
1938                 }
1939         }
1940
1941         /* we didn't find anything */
1942         return -1;
1943 }
1944
1945 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1946                         int lower)
1947 {
1948         struct radeon_virt_surface *s;
1949         int i;
1950         /* find the virtual surface */
1951         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1952                 s = &(dev_priv->virt_surfaces[i]);
1953                 if (s->filp) {
1954                         if ((lower == s->lower) && (filp == s->filp)) {
1955                                 if (dev_priv->surfaces[s->surface_index].
1956                                     lower == s->lower)
1957                                         dev_priv->surfaces[s->surface_index].
1958                                             lower = s->upper;
1959
1960                                 if (dev_priv->surfaces[s->surface_index].
1961                                     upper == s->upper)
1962                                         dev_priv->surfaces[s->surface_index].
1963                                             upper = s->lower;
1964
1965                                 dev_priv->surfaces[s->surface_index].refcount--;
1966                                 if (dev_priv->surfaces[s->surface_index].
1967                                     refcount == 0)
1968                                         dev_priv->surfaces[s->surface_index].
1969                                             flags = 0;
1970                                 s->filp = NULL;
1971                                 radeon_apply_surface_regs(s->surface_index,
1972                                                           dev_priv);
1973                                 return 0;
1974                         }
1975                 }
1976         }
1977         return 1;
1978 }
1979
1980 static void radeon_surfaces_release(DRMFILE filp,
1981                                     drm_radeon_private_t * dev_priv)
1982 {
1983         int i;
1984         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1985                 if (dev_priv->virt_surfaces[i].filp == filp)
1986                         free_surface(filp, dev_priv,
1987                                      dev_priv->virt_surfaces[i].lower);
1988         }
1989 }
1990
1991 /* ================================================================
1992  * IOCTL functions
1993  */
1994 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1995 {
1996         DRM_DEVICE;
1997         drm_radeon_private_t *dev_priv = dev->dev_private;
1998         drm_radeon_surface_alloc_t alloc;
1999
2000         if (!dev_priv) {
2001                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2002                 return DRM_ERR(EINVAL);
2003         }
2004
2005         DRM_COPY_FROM_USER_IOCTL(alloc,
2006                                  (drm_radeon_surface_alloc_t __user *) data,
2007                                  sizeof(alloc));
2008
2009         if (alloc_surface(&alloc, dev_priv, filp) == -1)
2010                 return DRM_ERR(EINVAL);
2011         else
2012                 return 0;
2013 }
2014
2015 static int radeon_surface_free(DRM_IOCTL_ARGS)
2016 {
2017         DRM_DEVICE;
2018         drm_radeon_private_t *dev_priv = dev->dev_private;
2019         drm_radeon_surface_free_t memfree;
2020
2021         if (!dev_priv) {
2022                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2023                 return DRM_ERR(EINVAL);
2024         }
2025
2026         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
2027                                  sizeof(memfree));
2028
2029         if (free_surface(filp, dev_priv, memfree.address))
2030                 return DRM_ERR(EINVAL);
2031         else
2032                 return 0;
2033 }
2034
2035 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2036 {
2037         DRM_DEVICE;
2038         drm_radeon_private_t *dev_priv = dev->dev_private;
2039         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2040         drm_radeon_clear_t clear;
2041         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2042         DRM_DEBUG("\n");
2043
2044         LOCK_TEST_WITH_RETURN(dev, filp);
2045
2046         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2047                                  sizeof(clear));
2048
2049         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2050
2051         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2052                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2053
2054         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2055                                sarea_priv->nbox * sizeof(depth_boxes[0])))
2056                 return DRM_ERR(EFAULT);
2057
2058         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2059
2060         COMMIT_RING();
2061         return 0;
2062 }
2063
2064 /* Not sure why this isn't set all the time:
2065  */
2066 static int radeon_do_init_pageflip(drm_device_t * dev)
2067 {
2068         drm_radeon_private_t *dev_priv = dev->dev_private;
2069         RING_LOCALS;
2070
2071         DRM_DEBUG("\n");
2072
2073         BEGIN_RING(6);
2074         RADEON_WAIT_UNTIL_3D_IDLE();
2075         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2076         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2077                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2078         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2079         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2080                  RADEON_CRTC_OFFSET_FLIP_CNTL);
2081         ADVANCE_RING();
2082
2083         dev_priv->page_flipping = 1;
2084         dev_priv->current_page = 0;
2085         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2086
2087         return 0;
2088 }
2089
2090 /* Called whenever a client dies, from drm_release.
2091  * NOTE:  Lock isn't necessarily held when this is called!
2092  */
2093 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2094 {
2095         drm_radeon_private_t *dev_priv = dev->dev_private;
2096         DRM_DEBUG("\n");
2097
2098         if (dev_priv->current_page != 0)
2099                 radeon_cp_dispatch_flip(dev);
2100
2101         dev_priv->page_flipping = 0;
2102         return 0;
2103 }
2104
2105 /* Swapping and flipping are different operations, need different ioctls.
2106  * They can & should be intermixed to support multiple 3d windows.
2107  */
2108 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2109 {
2110         DRM_DEVICE;
2111         drm_radeon_private_t *dev_priv = dev->dev_private;
2112         DRM_DEBUG("\n");
2113
2114         LOCK_TEST_WITH_RETURN(dev, filp);
2115
2116         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2117
2118         if (!dev_priv->page_flipping)
2119                 radeon_do_init_pageflip(dev);
2120
2121         radeon_cp_dispatch_flip(dev);
2122
2123         COMMIT_RING();
2124         return 0;
2125 }
2126
2127 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2128 {
2129         DRM_DEVICE;
2130         drm_radeon_private_t *dev_priv = dev->dev_private;
2131         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2132         DRM_DEBUG("\n");
2133
2134         LOCK_TEST_WITH_RETURN(dev, filp);
2135
2136         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2137
2138         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2139                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2140
2141         radeon_cp_dispatch_swap(dev);
2142         dev_priv->sarea_priv->ctx_owner = 0;
2143
2144         COMMIT_RING();
2145         return 0;
2146 }
2147
2148 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2149 {
2150         DRM_DEVICE;
2151         drm_radeon_private_t *dev_priv = dev->dev_private;
2152         drm_file_t *filp_priv;
2153         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2154         drm_device_dma_t *dma = dev->dma;
2155         drm_buf_t *buf;
2156         drm_radeon_vertex_t vertex;
2157         drm_radeon_tcl_prim_t prim;
2158
2159         LOCK_TEST_WITH_RETURN(dev, filp);
2160
2161         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2162
2163         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2164                                  sizeof(vertex));
2165
2166         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2167                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2168
2169         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2170                 DRM_ERROR("buffer index %d (of %d max)\n",
2171                           vertex.idx, dma->buf_count - 1);
2172                 return DRM_ERR(EINVAL);
2173         }
2174         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2175                 DRM_ERROR("buffer prim %d\n", vertex.prim);
2176                 return DRM_ERR(EINVAL);
2177         }
2178
2179         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2180         VB_AGE_TEST_WITH_RETURN(dev_priv);
2181
2182         buf = dma->buflist[vertex.idx];
2183
2184         if (buf->filp != filp) {
2185                 DRM_ERROR("process %d using buffer owned by %p\n",
2186                           DRM_CURRENTPID, buf->filp);
2187                 return DRM_ERR(EINVAL);
2188         }
2189         if (buf->pending) {
2190                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2191                 return DRM_ERR(EINVAL);
2192         }
2193
2194         /* Build up a prim_t record:
2195          */
2196         if (vertex.count) {
2197                 buf->used = vertex.count;       /* not used? */
2198
2199                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2200                         if (radeon_emit_state(dev_priv, filp_priv,
2201                                               &sarea_priv->context_state,
2202                                               sarea_priv->tex_state,
2203                                               sarea_priv->dirty)) {
2204                                 DRM_ERROR("radeon_emit_state failed\n");
2205                                 return DRM_ERR(EINVAL);
2206                         }
2207
2208                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2209                                                RADEON_UPLOAD_TEX1IMAGES |
2210                                                RADEON_UPLOAD_TEX2IMAGES |
2211                                                RADEON_REQUIRE_QUIESCENCE);
2212                 }
2213
2214                 prim.start = 0;
2215                 prim.finish = vertex.count;     /* unused */
2216                 prim.prim = vertex.prim;
2217                 prim.numverts = vertex.count;
2218                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2219
2220                 radeon_cp_dispatch_vertex(dev, buf, &prim);
2221         }
2222
2223         if (vertex.discard) {
2224                 radeon_cp_discard_buffer(dev, buf);
2225         }
2226
2227         COMMIT_RING();
2228         return 0;
2229 }
2230
2231 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2232 {
2233         DRM_DEVICE;
2234         drm_radeon_private_t *dev_priv = dev->dev_private;
2235         drm_file_t *filp_priv;
2236         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2237         drm_device_dma_t *dma = dev->dma;
2238         drm_buf_t *buf;
2239         drm_radeon_indices_t elts;
2240         drm_radeon_tcl_prim_t prim;
2241         int count;
2242
2243         LOCK_TEST_WITH_RETURN(dev, filp);
2244
2245         if (!dev_priv) {
2246                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2247                 return DRM_ERR(EINVAL);
2248         }
2249
2250         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2251
2252         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2253                                  sizeof(elts));
2254
2255         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2256                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2257
2258         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2259                 DRM_ERROR("buffer index %d (of %d max)\n",
2260                           elts.idx, dma->buf_count - 1);
2261                 return DRM_ERR(EINVAL);
2262         }
2263         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2264                 DRM_ERROR("buffer prim %d\n", elts.prim);
2265                 return DRM_ERR(EINVAL);
2266         }
2267
2268         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2269         VB_AGE_TEST_WITH_RETURN(dev_priv);
2270
2271         buf = dma->buflist[elts.idx];
2272
2273         if (buf->filp != filp) {
2274                 DRM_ERROR("process %d using buffer owned by %p\n",
2275                           DRM_CURRENTPID, buf->filp);
2276                 return DRM_ERR(EINVAL);
2277         }
2278         if (buf->pending) {
2279                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2280                 return DRM_ERR(EINVAL);
2281         }
2282
2283         count = (elts.end - elts.start) / sizeof(u16);
2284         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2285
2286         if (elts.start & 0x7) {
2287                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2288                 return DRM_ERR(EINVAL);
2289         }
2290         if (elts.start < buf->used) {
2291                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2292                 return DRM_ERR(EINVAL);
2293         }
2294
2295         buf->used = elts.end;
2296
2297         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2298                 if (radeon_emit_state(dev_priv, filp_priv,
2299                                       &sarea_priv->context_state,
2300                                       sarea_priv->tex_state,
2301                                       sarea_priv->dirty)) {
2302                         DRM_ERROR("radeon_emit_state failed\n");
2303                         return DRM_ERR(EINVAL);
2304                 }
2305
2306                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2307                                        RADEON_UPLOAD_TEX1IMAGES |
2308                                        RADEON_UPLOAD_TEX2IMAGES |
2309                                        RADEON_REQUIRE_QUIESCENCE);
2310         }
2311
2312         /* Build up a prim_t record:
2313          */
2314         prim.start = elts.start;
2315         prim.finish = elts.end;
2316         prim.prim = elts.prim;
2317         prim.offset = 0;        /* offset from start of dma buffers */
2318         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
2319         prim.vc_format = dev_priv->sarea_priv->vc_format;
2320
2321         radeon_cp_dispatch_indices(dev, buf, &prim);
2322         if (elts.discard) {
2323                 radeon_cp_discard_buffer(dev, buf);
2324         }
2325
2326         COMMIT_RING();
2327         return 0;
2328 }
2329
2330 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2331 {
2332         DRM_DEVICE;
2333         drm_radeon_private_t *dev_priv = dev->dev_private;
2334         drm_radeon_texture_t tex;
2335         drm_radeon_tex_image_t image;
2336         int ret;
2337
2338         LOCK_TEST_WITH_RETURN(dev, filp);
2339
2340         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2341                                  sizeof(tex));
2342
2343         if (tex.image == NULL) {
2344                 DRM_ERROR("null texture image!\n");
2345                 return DRM_ERR(EINVAL);
2346         }
2347
2348         if (DRM_COPY_FROM_USER(&image,
2349                                (drm_radeon_tex_image_t __user *) tex.image,
2350                                sizeof(image)))
2351                 return DRM_ERR(EFAULT);
2352
2353         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2354         VB_AGE_TEST_WITH_RETURN(dev_priv);
2355
2356         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2357
2358         COMMIT_RING();
2359         return ret;
2360 }
2361
2362 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2363 {
2364         DRM_DEVICE;
2365         drm_radeon_private_t *dev_priv = dev->dev_private;
2366         drm_radeon_stipple_t stipple;
2367         u32 mask[32];
2368
2369         LOCK_TEST_WITH_RETURN(dev, filp);
2370
2371         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2372                                  sizeof(stipple));
2373
2374         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2375                 return DRM_ERR(EFAULT);
2376
2377         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2378
2379         radeon_cp_dispatch_stipple(dev, mask);
2380
2381         COMMIT_RING();
2382         return 0;
2383 }
2384
2385 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2386 {
2387         DRM_DEVICE;
2388         drm_radeon_private_t *dev_priv = dev->dev_private;
2389         drm_device_dma_t *dma = dev->dma;
2390         drm_buf_t *buf;
2391         drm_radeon_indirect_t indirect;
2392         RING_LOCALS;
2393
2394         LOCK_TEST_WITH_RETURN(dev, filp);
2395
2396         if (!dev_priv) {
2397                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2398                 return DRM_ERR(EINVAL);
2399         }
2400
2401         DRM_COPY_FROM_USER_IOCTL(indirect,
2402                                  (drm_radeon_indirect_t __user *) data,
2403                                  sizeof(indirect));
2404
2405         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2406                   indirect.idx, indirect.start, indirect.end, indirect.discard);
2407
2408         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2409                 DRM_ERROR("buffer index %d (of %d max)\n",
2410                           indirect.idx, dma->buf_count - 1);
2411                 return DRM_ERR(EINVAL);
2412         }
2413
2414         buf = dma->buflist[indirect.idx];
2415
2416         if (buf->filp != filp) {
2417                 DRM_ERROR("process %d using buffer owned by %p\n",
2418                           DRM_CURRENTPID, buf->filp);
2419                 return DRM_ERR(EINVAL);
2420         }
2421         if (buf->pending) {
2422                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2423                 return DRM_ERR(EINVAL);
2424         }
2425
2426         if (indirect.start < buf->used) {
2427                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2428                           indirect.start, buf->used);
2429                 return DRM_ERR(EINVAL);
2430         }
2431
2432         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2433         VB_AGE_TEST_WITH_RETURN(dev_priv);
2434
2435         buf->used = indirect.end;
2436
2437         /* Wait for the 3D stream to idle before the indirect buffer
2438          * containing 2D acceleration commands is processed.
2439          */
2440         BEGIN_RING(2);
2441
2442         RADEON_WAIT_UNTIL_3D_IDLE();
2443
2444         ADVANCE_RING();
2445
2446         /* Dispatch the indirect buffer full of commands from the
2447          * X server.  This is insecure and is thus only available to
2448          * privileged clients.
2449          */
2450         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2451         if (indirect.discard) {
2452                 radeon_cp_discard_buffer(dev, buf);
2453         }
2454
2455         COMMIT_RING();
2456         return 0;
2457 }
2458
2459 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2460 {
2461         DRM_DEVICE;
2462         drm_radeon_private_t *dev_priv = dev->dev_private;
2463         drm_file_t *filp_priv;
2464         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2465         drm_device_dma_t *dma = dev->dma;
2466         drm_buf_t *buf;
2467         drm_radeon_vertex2_t vertex;
2468         int i;
2469         unsigned char laststate;
2470
2471         LOCK_TEST_WITH_RETURN(dev, filp);
2472
2473         if (!dev_priv) {
2474                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2475                 return DRM_ERR(EINVAL);
2476         }
2477
2478         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2479
2480         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2481                                  sizeof(vertex));
2482
2483         DRM_DEBUG("pid=%d index=%d discard=%d\n",
2484                   DRM_CURRENTPID, vertex.idx, vertex.discard);
2485
2486         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2487                 DRM_ERROR("buffer index %d (of %d max)\n",
2488                           vertex.idx, dma->buf_count - 1);
2489                 return DRM_ERR(EINVAL);
2490         }
2491
2492         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2493         VB_AGE_TEST_WITH_RETURN(dev_priv);
2494
2495         buf = dma->buflist[vertex.idx];
2496
2497         if (buf->filp != filp) {
2498                 DRM_ERROR("process %d using buffer owned by %p\n",
2499                           DRM_CURRENTPID, buf->filp);
2500                 return DRM_ERR(EINVAL);
2501         }
2502
2503         if (buf->pending) {
2504                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2505                 return DRM_ERR(EINVAL);
2506         }
2507
2508         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2509                 return DRM_ERR(EINVAL);
2510
2511         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2512                 drm_radeon_prim_t prim;
2513                 drm_radeon_tcl_prim_t tclprim;
2514
2515                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2516                         return DRM_ERR(EFAULT);
2517
2518                 if (prim.stateidx != laststate) {
2519                         drm_radeon_state_t state;
2520
2521                         if (DRM_COPY_FROM_USER(&state,
2522                                                &vertex.state[prim.stateidx],
2523                                                sizeof(state)))
2524                                 return DRM_ERR(EFAULT);
2525
2526                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2527                                 DRM_ERROR("radeon_emit_state2 failed\n");
2528                                 return DRM_ERR(EINVAL);
2529                         }
2530
2531                         laststate = prim.stateidx;
2532                 }
2533
2534                 tclprim.start = prim.start;
2535                 tclprim.finish = prim.finish;
2536                 tclprim.prim = prim.prim;
2537                 tclprim.vc_format = prim.vc_format;
2538
2539                 if (prim.prim & RADEON_PRIM_WALK_IND) {
2540                         tclprim.offset = prim.numverts * 64;
2541                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2542
2543                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
2544                 } else {
2545                         tclprim.numverts = prim.numverts;
2546                         tclprim.offset = 0;     /* not used */
2547
2548                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2549                 }
2550
2551                 if (sarea_priv->nbox == 1)
2552                         sarea_priv->nbox = 0;
2553         }
2554
2555         if (vertex.discard) {
2556                 radeon_cp_discard_buffer(dev, buf);
2557         }
2558
2559         COMMIT_RING();
2560         return 0;
2561 }
2562
2563 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2564                                drm_file_t * filp_priv,
2565                                drm_radeon_cmd_header_t header,
2566                                drm_radeon_kcmd_buffer_t *cmdbuf)
2567 {
2568         int id = (int)header.packet.packet_id;
2569         int sz, reg;
2570         int *data = (int *)cmdbuf->buf;
2571         RING_LOCALS;
2572
2573         if (id >= RADEON_MAX_STATE_PACKETS)
2574                 return DRM_ERR(EINVAL);
2575
2576         sz = packet[id].len;
2577         reg = packet[id].start;
2578
2579         if (sz * sizeof(int) > cmdbuf->bufsz) {
2580                 DRM_ERROR("Packet size provided larger than data provided\n");
2581                 return DRM_ERR(EINVAL);
2582         }
2583
2584         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2585                 DRM_ERROR("Packet verification failed\n");
2586                 return DRM_ERR(EINVAL);
2587         }
2588
2589         BEGIN_RING(sz + 1);
2590         OUT_RING(CP_PACKET0(reg, (sz - 1)));
2591         OUT_RING_TABLE(data, sz);
2592         ADVANCE_RING();
2593
2594         cmdbuf->buf += sz * sizeof(int);
2595         cmdbuf->bufsz -= sz * sizeof(int);
2596         return 0;
2597 }
2598
2599 static __inline__ int radeon_emit_scalars(drm_radeon_private_t * dev_priv,
2600                                           drm_radeon_cmd_header_t header,
2601                                           drm_radeon_kcmd_buffer_t * cmdbuf)
2602 {
2603         int sz = header.scalars.count;
2604         int start = header.scalars.offset;
2605         int stride = header.scalars.stride;
2606         RING_LOCALS;
2607
2608         BEGIN_RING(3 + sz);
2609         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2610         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2611         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2612         OUT_RING_TABLE(cmdbuf->buf, sz);
2613         ADVANCE_RING();
2614         cmdbuf->buf += sz * sizeof(int);
2615         cmdbuf->bufsz -= sz * sizeof(int);
2616         return 0;
2617 }
2618
2619 /* God this is ugly
2620  */
2621 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t * dev_priv,
2622                                            drm_radeon_cmd_header_t header,
2623                                            drm_radeon_kcmd_buffer_t * cmdbuf)
2624 {
2625         int sz = header.scalars.count;
2626         int start = ((unsigned int)header.scalars.offset) + 0x100;
2627         int stride = header.scalars.stride;
2628         RING_LOCALS;
2629
2630         BEGIN_RING(3 + sz);
2631         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2632         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2633         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2634         OUT_RING_TABLE(cmdbuf->buf, sz);
2635         ADVANCE_RING();
2636         cmdbuf->buf += sz * sizeof(int);
2637         cmdbuf->bufsz -= sz * sizeof(int);
2638         return 0;
2639 }
2640
2641 static __inline__ int radeon_emit_vectors(drm_radeon_private_t * dev_priv,
2642                                           drm_radeon_cmd_header_t header,
2643                                           drm_radeon_kcmd_buffer_t * cmdbuf)
2644 {
2645         int sz = header.vectors.count;
2646         int start = header.vectors.offset;
2647         int stride = header.vectors.stride;
2648         RING_LOCALS;
2649
2650         BEGIN_RING(3 + sz);
2651         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2652         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2653         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2654         OUT_RING_TABLE(cmdbuf->buf, sz);
2655         ADVANCE_RING();
2656
2657         cmdbuf->buf += sz * sizeof(int);
2658         cmdbuf->bufsz -= sz * sizeof(int);
2659         return 0;
2660 }
2661
2662 static int radeon_emit_packet3(drm_device_t * dev,
2663                                drm_file_t * filp_priv,
2664                                drm_radeon_kcmd_buffer_t *cmdbuf)
2665 {
2666         drm_radeon_private_t *dev_priv = dev->dev_private;
2667         unsigned int cmdsz;
2668         int ret;
2669         RING_LOCALS;
2670
2671         DRM_DEBUG("\n");
2672
2673         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2674                                                   cmdbuf, &cmdsz))) {
2675                 DRM_ERROR("Packet verification failed\n");
2676                 return ret;
2677         }
2678
2679         BEGIN_RING(cmdsz);
2680         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2681         ADVANCE_RING();
2682
2683         cmdbuf->buf += cmdsz * 4;
2684         cmdbuf->bufsz -= cmdsz * 4;
2685         return 0;
2686 }
2687
2688 static int radeon_emit_packet3_cliprect(drm_device_t * dev,
2689                                         drm_file_t * filp_priv,
2690                                         drm_radeon_kcmd_buffer_t *cmdbuf,
2691                                         int orig_nbox)
2692 {
2693         drm_radeon_private_t *dev_priv = dev->dev_private;
2694         drm_clip_rect_t box;
2695         unsigned int cmdsz;
2696         int ret;
2697         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2698         int i = 0;
2699         RING_LOCALS;
2700
2701         DRM_DEBUG("\n");
2702
2703         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2704                                                   cmdbuf, &cmdsz))) {
2705                 DRM_ERROR("Packet verification failed\n");
2706                 return ret;
2707         }
2708
2709         if (!orig_nbox)
2710                 goto out;
2711
2712         do {
2713                 if (i < cmdbuf->nbox) {
2714                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2715                                 return DRM_ERR(EFAULT);
2716                         /* FIXME The second and subsequent times round
2717                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2718                          * calling emit_clip_rect(). This fixes a
2719                          * lockup on fast machines when sending
2720                          * several cliprects with a cmdbuf, as when
2721                          * waving a 2D window over a 3D
2722                          * window. Something in the commands from user
2723                          * space seems to hang the card when they're
2724                          * sent several times in a row. That would be
2725                          * the correct place to fix it but this works
2726                          * around it until I can figure that out - Tim
2727                          * Smith */
2728                         if (i) {
2729                                 BEGIN_RING(2);
2730                                 RADEON_WAIT_UNTIL_3D_IDLE();
2731                                 ADVANCE_RING();
2732                         }
2733                         radeon_emit_clip_rect(dev_priv, &box);
2734                 }
2735
2736                 BEGIN_RING(cmdsz);
2737                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2738                 ADVANCE_RING();
2739
2740         } while (++i < cmdbuf->nbox);
2741         if (cmdbuf->nbox == 1)
2742                 cmdbuf->nbox = 0;
2743
2744       out:
2745         cmdbuf->buf += cmdsz * 4;
2746         cmdbuf->bufsz -= cmdsz * 4;
2747         return 0;
2748 }
2749
2750 static int radeon_emit_wait(drm_device_t * dev, int flags)
2751 {
2752         drm_radeon_private_t *dev_priv = dev->dev_private;
2753         RING_LOCALS;
2754
2755         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2756         switch (flags) {
2757         case RADEON_WAIT_2D:
2758                 BEGIN_RING(2);
2759                 RADEON_WAIT_UNTIL_2D_IDLE();
2760                 ADVANCE_RING();
2761                 break;
2762         case RADEON_WAIT_3D:
2763                 BEGIN_RING(2);
2764                 RADEON_WAIT_UNTIL_3D_IDLE();
2765                 ADVANCE_RING();
2766                 break;
2767         case RADEON_WAIT_2D | RADEON_WAIT_3D:
2768                 BEGIN_RING(2);
2769                 RADEON_WAIT_UNTIL_IDLE();
2770                 ADVANCE_RING();
2771                 break;
2772         default:
2773                 return DRM_ERR(EINVAL);
2774         }
2775
2776         return 0;
2777 }
2778
2779 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2780 {
2781         DRM_DEVICE;
2782         drm_radeon_private_t *dev_priv = dev->dev_private;
2783         drm_file_t *filp_priv;
2784         drm_device_dma_t *dma = dev->dma;
2785         drm_buf_t *buf = NULL;
2786         int idx;
2787         drm_radeon_kcmd_buffer_t cmdbuf;
2788         drm_radeon_cmd_header_t header;
2789         int orig_nbox, orig_bufsz;
2790         char *kbuf = NULL;
2791
2792         LOCK_TEST_WITH_RETURN(dev, filp);
2793
2794         if (!dev_priv) {
2795                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2796                 return DRM_ERR(EINVAL);
2797         }
2798
2799         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2800
2801         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2802                                  (drm_radeon_cmd_buffer_t __user *) data,
2803                                  sizeof(cmdbuf));
2804
2805         RING_SPACE_TEST_WITH_RETURN(dev_priv);
2806         VB_AGE_TEST_WITH_RETURN(dev_priv);
2807
2808         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2809                 return DRM_ERR(EINVAL);
2810         }
2811
2812         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2813          * races between checking values and using those values in other code,
2814          * and simply to avoid a lot of function calls to copy in data.
2815          */
2816         orig_bufsz = cmdbuf.bufsz;
2817         if (orig_bufsz != 0) {
2818                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2819                 if (kbuf == NULL)
2820                         return DRM_ERR(ENOMEM);
2821                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf, cmdbuf.bufsz)) {
2822                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2823                         return DRM_ERR(EFAULT);
2824                 }
2825                 cmdbuf.buf = kbuf;
2826         }
2827
2828         orig_nbox = cmdbuf.nbox;
2829
2830         if (dev_priv->microcode_version == UCODE_R300) {
2831                 int temp;
2832                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2833
2834                 if (orig_bufsz != 0)
2835                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2836
2837                 return temp;
2838         }
2839
2840         /* microcode_version != r300 */
2841         while (cmdbuf.bufsz >= sizeof(header)) {
2842
2843                 header.i = *(int *)cmdbuf.buf;
2844                 cmdbuf.buf += sizeof(header);
2845                 cmdbuf.bufsz -= sizeof(header);
2846
2847                 switch (header.header.cmd_type) {
2848                 case RADEON_CMD_PACKET:
2849                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2850                         if (radeon_emit_packets
2851                             (dev_priv, filp_priv, header, &cmdbuf)) {
2852                                 DRM_ERROR("radeon_emit_packets failed\n");
2853                                 goto err;
2854                         }
2855                         break;
2856
2857                 case RADEON_CMD_SCALARS:
2858                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2859                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2860                                 DRM_ERROR("radeon_emit_scalars failed\n");
2861                                 goto err;
2862                         }
2863                         break;
2864
2865                 case RADEON_CMD_VECTORS:
2866                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2867                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2868                                 DRM_ERROR("radeon_emit_vectors failed\n");
2869                                 goto err;
2870                         }
2871                         break;
2872
2873                 case RADEON_CMD_DMA_DISCARD:
2874                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2875                         idx = header.dma.buf_idx;
2876                         if (idx < 0 || idx >= dma->buf_count) {
2877                                 DRM_ERROR("buffer index %d (of %d max)\n",
2878                                           idx, dma->buf_count - 1);
2879                                 goto err;
2880                         }
2881
2882                         buf = dma->buflist[idx];
2883                         if (buf->filp != filp || buf->pending) {
2884                                 DRM_ERROR("bad buffer %p %p %d\n",
2885                                           buf->filp, filp, buf->pending);
2886                                 goto err;
2887                         }
2888
2889                         radeon_cp_discard_buffer(dev, buf);
2890                         break;
2891
2892                 case RADEON_CMD_PACKET3:
2893                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2894                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2895                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2896                                 goto err;
2897                         }
2898                         break;
2899
2900                 case RADEON_CMD_PACKET3_CLIP:
2901                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2902                         if (radeon_emit_packet3_cliprect
2903                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
2904                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2905                                 goto err;
2906                         }
2907                         break;
2908
2909                 case RADEON_CMD_SCALARS2:
2910                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2911                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2912                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2913                                 goto err;
2914                         }
2915                         break;
2916
2917                 case RADEON_CMD_WAIT:
2918                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2919                         if (radeon_emit_wait(dev, header.wait.flags)) {
2920                                 DRM_ERROR("radeon_emit_wait failed\n");
2921                                 goto err;
2922                         }
2923                         break;
2924                 default:
2925                         DRM_ERROR("bad cmd_type %d at %p\n",
2926                                   header.header.cmd_type,
2927                                   cmdbuf.buf - sizeof(header));
2928                         goto err;
2929                 }
2930         }
2931
2932         if (orig_bufsz != 0)
2933                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2934
2935         DRM_DEBUG("DONE\n");
2936         COMMIT_RING();
2937         return 0;
2938
2939       err:
2940         if (orig_bufsz != 0)
2941                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2942         return DRM_ERR(EINVAL);
2943 }
2944
2945 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2946 {
2947         DRM_DEVICE;
2948         drm_radeon_private_t *dev_priv = dev->dev_private;
2949         drm_radeon_getparam_t param;
2950         int value;
2951
2952         if (!dev_priv) {
2953                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2954                 return DRM_ERR(EINVAL);
2955         }
2956
2957         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2958                                  sizeof(param));
2959
2960         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2961
2962         switch (param.param) {
2963         case RADEON_PARAM_GART_BUFFER_OFFSET:
2964                 value = dev_priv->gart_buffers_offset;
2965                 break;
2966         case RADEON_PARAM_LAST_FRAME:
2967                 dev_priv->stats.last_frame_reads++;
2968                 value = GET_SCRATCH(0);
2969                 break;
2970         case RADEON_PARAM_LAST_DISPATCH:
2971                 value = GET_SCRATCH(1);
2972                 break;
2973         case RADEON_PARAM_LAST_CLEAR:
2974                 dev_priv->stats.last_clear_reads++;
2975                 value = GET_SCRATCH(2);
2976                 break;
2977         case RADEON_PARAM_IRQ_NR:
2978                 value = dev->irq;
2979                 break;
2980         case RADEON_PARAM_GART_BASE:
2981                 value = dev_priv->gart_vm_start;
2982                 break;
2983         case RADEON_PARAM_REGISTER_HANDLE:
2984                 value = dev_priv->mmio_offset;
2985                 break;
2986         case RADEON_PARAM_STATUS_HANDLE:
2987                 value = dev_priv->ring_rptr_offset;
2988                 break;
2989 #if BITS_PER_LONG == 32
2990                 /*
2991                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2992                  * pointer which can't fit into an int-sized variable.  According to
2993                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2994                  * not supporting it shouldn't be a problem.  If the same functionality
2995                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
2996                  * so backwards-compatibility for the embedded platforms can be
2997                  * maintained.  --davidm 4-Feb-2004.
2998                  */
2999         case RADEON_PARAM_SAREA_HANDLE:
3000                 /* The lock is the first dword in the sarea. */
3001                 value = (long)dev->lock.hw_lock;
3002                 break;
3003 #endif
3004         case RADEON_PARAM_GART_TEX_HANDLE:
3005                 value = dev_priv->gart_textures_offset;
3006                 break;
3007         default:
3008                 return DRM_ERR(EINVAL);
3009         }
3010
3011         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3012                 DRM_ERROR("copy_to_user\n");
3013                 return DRM_ERR(EFAULT);
3014         }
3015
3016         return 0;
3017 }
3018
3019 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3020 {
3021         DRM_DEVICE;
3022         drm_radeon_private_t *dev_priv = dev->dev_private;
3023         drm_file_t *filp_priv;
3024         drm_radeon_setparam_t sp;
3025         struct drm_radeon_driver_file_fields *radeon_priv;
3026
3027         if (!dev_priv) {
3028                 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3029                 return DRM_ERR(EINVAL);
3030         }
3031
3032         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3033
3034         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3035                                  sizeof(sp));
3036
3037         switch (sp.param) {
3038         case RADEON_SETPARAM_FB_LOCATION:
3039                 radeon_priv = filp_priv->driver_priv;
3040                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3041                 break;
3042         case RADEON_SETPARAM_SWITCH_TILING:
3043                 if (sp.value == 0) {
3044                         DRM_DEBUG("color tiling disabled\n");
3045                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3046                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3047                         dev_priv->sarea_priv->tiling_enabled = 0;
3048                 } else if (sp.value == 1) {
3049                         DRM_DEBUG("color tiling enabled\n");
3050                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3051                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3052                         dev_priv->sarea_priv->tiling_enabled = 1;
3053                 }
3054                 break;
3055         case RADEON_SETPARAM_PCIGART_LOCATION:
3056                 dev_priv->pcigart_offset = sp.value;
3057                 break;
3058         default:
3059                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3060                 return DRM_ERR(EINVAL);
3061         }
3062
3063         return 0;
3064 }
3065
3066 /* When a client dies:
3067  *    - Check for and clean up flipped page state
3068  *    - Free any alloced GART memory.
3069  *
3070  * DRM infrastructure takes care of reclaiming dma buffers.
3071  */
3072 void radeon_driver_prerelease(drm_device_t * dev, DRMFILE filp)
3073 {
3074         if (dev->dev_private) {
3075                 drm_radeon_private_t *dev_priv = dev->dev_private;
3076                 if (dev_priv->page_flipping) {
3077                         radeon_do_cleanup_pageflip(dev);
3078                 }
3079                 radeon_mem_release(filp, dev_priv->gart_heap);
3080                 radeon_mem_release(filp, dev_priv->fb_heap);
3081                 radeon_surfaces_release(filp, dev_priv);
3082         }
3083 }
3084
3085 void radeon_driver_pretakedown(drm_device_t * dev)
3086 {
3087         radeon_do_release(dev);
3088 }
3089
3090 int radeon_driver_open_helper(drm_device_t * dev, drm_file_t * filp_priv)
3091 {
3092         drm_radeon_private_t *dev_priv = dev->dev_private;
3093         struct drm_radeon_driver_file_fields *radeon_priv;
3094
3095         radeon_priv =
3096             (struct drm_radeon_driver_file_fields *)
3097             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3098
3099         if (!radeon_priv)
3100                 return -ENOMEM;
3101
3102         filp_priv->driver_priv = radeon_priv;
3103         if (dev_priv)
3104                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3105         else
3106                 radeon_priv->radeon_fb_delta = 0;
3107         return 0;
3108 }
3109
3110 void radeon_driver_free_filp_priv(drm_device_t * dev, drm_file_t * filp_priv)
3111 {
3112         struct drm_radeon_driver_file_fields *radeon_priv =
3113             filp_priv->driver_priv;
3114
3115         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3116 }
3117
3118 drm_ioctl_desc_t radeon_ioctls[] = {
3119         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, 1, 1},
3120         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, 1, 1},
3121         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, 1, 1},
3122         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, 1, 1},
3123         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, 1, 0},
3124         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, 1, 0},
3125         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, 1, 0},
3126         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, 1, 0},
3127         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, 1, 0},
3128         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, 1, 0},
3129         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, 1, 0},
3130         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, 1, 0},
3131         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, 1, 0},
3132         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, 1, 0},
3133         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, 1, 1},
3134         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, 1, 0},
3135         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, 1, 0},
3136         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, 1, 0},
3137         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, 1, 0},
3138         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, 1, 0},
3139         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, 1, 0},
3140         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, 1, 1},
3141         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, 1, 0},
3142         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, 1, 0},
3143         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, 1, 0},
3144         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, 1, 0},
3145         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, 1, 0}
3146 };
3147
3148 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);