1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 struct drm_radeon_driver_file_fields *radeon_priv;
48 /* Hrm ... the story of the offset ... So this function converts
49 * the various ideas of what userland clients might have for an
50 * offset in the card address space into an offset into the card
51 * address space :) So with a sane client, it should just keep
52 * the value intact and just do some boundary checking. However,
53 * not all clients are sane. Some older clients pass us 0 based
54 * offsets relative to the start of the framebuffer and some may
55 * assume the AGP aperture it appended to the framebuffer, so we
56 * try to detect those cases and fix them up.
58 * Note: It might be a good idea here to make sure the offset lands
59 * in some "allowed" area to protect things like the PCIE GART...
62 /* First, the best case, the offset already lands in either the
63 * framebuffer or the GART mapped space
65 if ((off >= dev_priv->fb_location &&
66 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67 (off >= dev_priv->gart_vm_start &&
68 off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
71 /* Ok, that didn't happen... now check if we have a zero based
72 * offset that fits in the framebuffer + gart space, apply the
73 * magic offset we get from SETPARAM or calculated from fb_location
75 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76 radeon_priv = filp_priv->driver_priv;
77 off += radeon_priv->radeon_fb_delta;
80 /* Finally, assume we aimed at a GART offset if beyond the fb */
81 if (off > (dev_priv->fb_location + dev_priv->fb_size))
82 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83 dev_priv->gart_vm_start;
85 /* Now recheck and fail if out of bounds */
86 if ((off >= dev_priv->fb_location &&
87 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88 (off >= dev_priv->gart_vm_start &&
89 off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90 DRM_DEBUG("offset fixed up to 0x%x\n", off);
94 return DRM_ERR(EINVAL);
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
99 drm_file_t * filp_priv,
104 case RADEON_EMIT_PP_MISC:
105 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107 DRM_ERROR("Invalid depth buffer offset\n");
108 return DRM_ERR(EINVAL);
112 case RADEON_EMIT_PP_CNTL:
113 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115 DRM_ERROR("Invalid colour buffer offset\n");
116 return DRM_ERR(EINVAL);
120 case R200_EMIT_PP_TXOFFSET_0:
121 case R200_EMIT_PP_TXOFFSET_1:
122 case R200_EMIT_PP_TXOFFSET_2:
123 case R200_EMIT_PP_TXOFFSET_3:
124 case R200_EMIT_PP_TXOFFSET_4:
125 case R200_EMIT_PP_TXOFFSET_5:
126 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
128 DRM_ERROR("Invalid R200 texture offset\n");
129 return DRM_ERR(EINVAL);
133 case RADEON_EMIT_PP_TXFILTER_0:
134 case RADEON_EMIT_PP_TXFILTER_1:
135 case RADEON_EMIT_PP_TXFILTER_2:
136 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138 DRM_ERROR("Invalid R100 texture offset\n");
139 return DRM_ERR(EINVAL);
143 case R200_EMIT_PP_CUBIC_OFFSETS_0:
144 case R200_EMIT_PP_CUBIC_OFFSETS_1:
145 case R200_EMIT_PP_CUBIC_OFFSETS_2:
146 case R200_EMIT_PP_CUBIC_OFFSETS_3:
147 case R200_EMIT_PP_CUBIC_OFFSETS_4:
148 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
150 for (i = 0; i < 5; i++) {
151 if (radeon_check_and_fixup_offset(dev_priv,
155 ("Invalid R200 cubic texture offset\n");
156 return DRM_ERR(EINVAL);
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
166 for (i = 0; i < 5; i++) {
167 if (radeon_check_and_fixup_offset(dev_priv,
171 ("Invalid R100 cubic texture offset\n");
172 return DRM_ERR(EINVAL);
178 case RADEON_EMIT_RB3D_COLORPITCH:
179 case RADEON_EMIT_RE_LINE_PATTERN:
180 case RADEON_EMIT_SE_LINE_WIDTH:
181 case RADEON_EMIT_PP_LUM_MATRIX:
182 case RADEON_EMIT_PP_ROT_MATRIX_0:
183 case RADEON_EMIT_RB3D_STENCILREFMASK:
184 case RADEON_EMIT_SE_VPORT_XSCALE:
185 case RADEON_EMIT_SE_CNTL:
186 case RADEON_EMIT_SE_CNTL_STATUS:
187 case RADEON_EMIT_RE_MISC:
188 case RADEON_EMIT_PP_BORDER_COLOR_0:
189 case RADEON_EMIT_PP_BORDER_COLOR_1:
190 case RADEON_EMIT_PP_BORDER_COLOR_2:
191 case RADEON_EMIT_SE_ZBIAS_FACTOR:
192 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
193 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
194 case R200_EMIT_PP_TXCBLEND_0:
195 case R200_EMIT_PP_TXCBLEND_1:
196 case R200_EMIT_PP_TXCBLEND_2:
197 case R200_EMIT_PP_TXCBLEND_3:
198 case R200_EMIT_PP_TXCBLEND_4:
199 case R200_EMIT_PP_TXCBLEND_5:
200 case R200_EMIT_PP_TXCBLEND_6:
201 case R200_EMIT_PP_TXCBLEND_7:
202 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
203 case R200_EMIT_TFACTOR_0:
204 case R200_EMIT_VTX_FMT_0:
205 case R200_EMIT_VAP_CTL:
206 case R200_EMIT_MATRIX_SELECT_0:
207 case R200_EMIT_TEX_PROC_CTL_2:
208 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
209 case R200_EMIT_PP_TXFILTER_0:
210 case R200_EMIT_PP_TXFILTER_1:
211 case R200_EMIT_PP_TXFILTER_2:
212 case R200_EMIT_PP_TXFILTER_3:
213 case R200_EMIT_PP_TXFILTER_4:
214 case R200_EMIT_PP_TXFILTER_5:
215 case R200_EMIT_VTE_CNTL:
216 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
217 case R200_EMIT_PP_TAM_DEBUG3:
218 case R200_EMIT_PP_CNTL_X:
219 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
220 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
221 case R200_EMIT_RE_SCISSOR_TL_0:
222 case R200_EMIT_RE_SCISSOR_TL_1:
223 case R200_EMIT_RE_SCISSOR_TL_2:
224 case R200_EMIT_SE_VAP_CNTL_STATUS:
225 case R200_EMIT_SE_VTX_STATE_CNTL:
226 case R200_EMIT_RE_POINTSIZE:
227 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
228 case R200_EMIT_PP_CUBIC_FACES_0:
229 case R200_EMIT_PP_CUBIC_FACES_1:
230 case R200_EMIT_PP_CUBIC_FACES_2:
231 case R200_EMIT_PP_CUBIC_FACES_3:
232 case R200_EMIT_PP_CUBIC_FACES_4:
233 case R200_EMIT_PP_CUBIC_FACES_5:
234 case RADEON_EMIT_PP_TEX_SIZE_0:
235 case RADEON_EMIT_PP_TEX_SIZE_1:
236 case RADEON_EMIT_PP_TEX_SIZE_2:
237 case R200_EMIT_RB3D_BLENDCOLOR:
238 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
239 case RADEON_EMIT_PP_CUBIC_FACES_0:
240 case RADEON_EMIT_PP_CUBIC_FACES_1:
241 case RADEON_EMIT_PP_CUBIC_FACES_2:
242 case R200_EMIT_PP_TRI_PERF_CNTL:
243 case R200_EMIT_PP_AFS_0:
244 case R200_EMIT_PP_AFS_1:
245 case R200_EMIT_ATF_TFACTOR:
246 case R200_EMIT_PP_TXCTLALL_0:
247 case R200_EMIT_PP_TXCTLALL_1:
248 case R200_EMIT_PP_TXCTLALL_2:
249 case R200_EMIT_PP_TXCTLALL_3:
250 case R200_EMIT_PP_TXCTLALL_4:
251 case R200_EMIT_PP_TXCTLALL_5:
252 /* These packets don't contain memory offsets */
256 DRM_ERROR("Unknown state packet ID %d\n", id);
257 return DRM_ERR(EINVAL);
263 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
265 drm_file_t *filp_priv,
266 drm_radeon_kcmd_buffer_t *
270 u32 *cmd = (u32 *) cmdbuf->buf;
272 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
274 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
275 DRM_ERROR("Not a type 3 packet\n");
276 return DRM_ERR(EINVAL);
279 if (4 * *cmdsz > cmdbuf->bufsz) {
280 DRM_ERROR("Packet size larger than size of data provided\n");
281 return DRM_ERR(EINVAL);
284 /* Check client state and fix it up if necessary */
285 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
288 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
289 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
290 offset = cmd[2] << 10;
291 if (radeon_check_and_fixup_offset
292 (dev_priv, filp_priv, &offset)) {
293 DRM_ERROR("Invalid first packet offset\n");
294 return DRM_ERR(EINVAL);
296 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
299 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
300 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
301 offset = cmd[3] << 10;
302 if (radeon_check_and_fixup_offset
303 (dev_priv, filp_priv, &offset)) {
304 DRM_ERROR("Invalid second packet offset\n");
305 return DRM_ERR(EINVAL);
307 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
314 /* ================================================================
315 * CP hardware state programming functions
318 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
319 drm_clip_rect_t * box)
323 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
324 box->x1, box->y1, box->x2, box->y2);
327 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
328 OUT_RING((box->y1 << 16) | box->x1);
329 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
330 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
336 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
337 drm_file_t * filp_priv,
338 drm_radeon_context_regs_t * ctx,
339 drm_radeon_texture_regs_t * tex,
343 DRM_DEBUG("dirty=0x%08x\n", dirty);
345 if (dirty & RADEON_UPLOAD_CONTEXT) {
346 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
347 &ctx->rb3d_depthoffset)) {
348 DRM_ERROR("Invalid depth buffer offset\n");
349 return DRM_ERR(EINVAL);
352 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
353 &ctx->rb3d_coloroffset)) {
354 DRM_ERROR("Invalid depth buffer offset\n");
355 return DRM_ERR(EINVAL);
359 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
360 OUT_RING(ctx->pp_misc);
361 OUT_RING(ctx->pp_fog_color);
362 OUT_RING(ctx->re_solid_color);
363 OUT_RING(ctx->rb3d_blendcntl);
364 OUT_RING(ctx->rb3d_depthoffset);
365 OUT_RING(ctx->rb3d_depthpitch);
366 OUT_RING(ctx->rb3d_zstencilcntl);
367 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
368 OUT_RING(ctx->pp_cntl);
369 OUT_RING(ctx->rb3d_cntl);
370 OUT_RING(ctx->rb3d_coloroffset);
371 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
372 OUT_RING(ctx->rb3d_colorpitch);
376 if (dirty & RADEON_UPLOAD_VERTFMT) {
378 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
379 OUT_RING(ctx->se_coord_fmt);
383 if (dirty & RADEON_UPLOAD_LINE) {
385 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
386 OUT_RING(ctx->re_line_pattern);
387 OUT_RING(ctx->re_line_state);
388 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
389 OUT_RING(ctx->se_line_width);
393 if (dirty & RADEON_UPLOAD_BUMPMAP) {
395 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
396 OUT_RING(ctx->pp_lum_matrix);
397 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
398 OUT_RING(ctx->pp_rot_matrix_0);
399 OUT_RING(ctx->pp_rot_matrix_1);
403 if (dirty & RADEON_UPLOAD_MASKS) {
405 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
406 OUT_RING(ctx->rb3d_stencilrefmask);
407 OUT_RING(ctx->rb3d_ropcntl);
408 OUT_RING(ctx->rb3d_planemask);
412 if (dirty & RADEON_UPLOAD_VIEWPORT) {
414 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
415 OUT_RING(ctx->se_vport_xscale);
416 OUT_RING(ctx->se_vport_xoffset);
417 OUT_RING(ctx->se_vport_yscale);
418 OUT_RING(ctx->se_vport_yoffset);
419 OUT_RING(ctx->se_vport_zscale);
420 OUT_RING(ctx->se_vport_zoffset);
424 if (dirty & RADEON_UPLOAD_SETUP) {
426 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
427 OUT_RING(ctx->se_cntl);
428 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
429 OUT_RING(ctx->se_cntl_status);
433 if (dirty & RADEON_UPLOAD_MISC) {
435 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
436 OUT_RING(ctx->re_misc);
440 if (dirty & RADEON_UPLOAD_TEX0) {
441 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
442 &tex[0].pp_txoffset)) {
443 DRM_ERROR("Invalid texture offset for unit 0\n");
444 return DRM_ERR(EINVAL);
448 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
449 OUT_RING(tex[0].pp_txfilter);
450 OUT_RING(tex[0].pp_txformat);
451 OUT_RING(tex[0].pp_txoffset);
452 OUT_RING(tex[0].pp_txcblend);
453 OUT_RING(tex[0].pp_txablend);
454 OUT_RING(tex[0].pp_tfactor);
455 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
456 OUT_RING(tex[0].pp_border_color);
460 if (dirty & RADEON_UPLOAD_TEX1) {
461 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
462 &tex[1].pp_txoffset)) {
463 DRM_ERROR("Invalid texture offset for unit 1\n");
464 return DRM_ERR(EINVAL);
468 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
469 OUT_RING(tex[1].pp_txfilter);
470 OUT_RING(tex[1].pp_txformat);
471 OUT_RING(tex[1].pp_txoffset);
472 OUT_RING(tex[1].pp_txcblend);
473 OUT_RING(tex[1].pp_txablend);
474 OUT_RING(tex[1].pp_tfactor);
475 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
476 OUT_RING(tex[1].pp_border_color);
480 if (dirty & RADEON_UPLOAD_TEX2) {
481 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
482 &tex[2].pp_txoffset)) {
483 DRM_ERROR("Invalid texture offset for unit 2\n");
484 return DRM_ERR(EINVAL);
488 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
489 OUT_RING(tex[2].pp_txfilter);
490 OUT_RING(tex[2].pp_txformat);
491 OUT_RING(tex[2].pp_txoffset);
492 OUT_RING(tex[2].pp_txcblend);
493 OUT_RING(tex[2].pp_txablend);
494 OUT_RING(tex[2].pp_tfactor);
495 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
496 OUT_RING(tex[2].pp_border_color);
505 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
506 drm_file_t * filp_priv,
507 drm_radeon_state_t * state)
511 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
513 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
514 OUT_RING(state->context2.se_zbias_factor);
515 OUT_RING(state->context2.se_zbias_constant);
519 return radeon_emit_state(dev_priv, filp_priv, &state->context,
520 state->tex, state->dirty);
523 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
524 * 1.3 cmdbuffers allow all previous state to be updated as well as
525 * the tcl scalar and vector areas.
531 } packet[RADEON_MAX_STATE_PACKETS] = {
532 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
533 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
534 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
535 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
536 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
537 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
538 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
539 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
540 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
541 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
542 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
543 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
544 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
545 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
546 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
547 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
548 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
549 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
550 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
551 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
552 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
553 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
554 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
555 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
556 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
557 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
558 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
559 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
560 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
561 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
562 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
563 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
564 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
565 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
566 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
567 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
568 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
569 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
570 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
571 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
572 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
573 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
574 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
575 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
576 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
577 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
578 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
579 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
580 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
581 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
582 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
583 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
584 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
585 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
586 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
587 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
588 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
589 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
590 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
591 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
592 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
593 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
594 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
595 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
596 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
597 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
598 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
599 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
600 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
601 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
602 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
603 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
604 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
605 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
606 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
607 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
608 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
609 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
610 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
611 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
612 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
613 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
614 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
615 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
616 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
617 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
618 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
619 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
620 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
621 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
622 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
623 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
624 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
625 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
626 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
627 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
628 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
631 /* ================================================================
632 * Performance monitoring functions
635 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
636 int x, int y, int w, int h, int r, int g, int b)
641 x += dev_priv->sarea_priv->boxes[0].x1;
642 y += dev_priv->sarea_priv->boxes[0].y1;
644 switch (dev_priv->color_fmt) {
645 case RADEON_COLOR_FORMAT_RGB565:
646 color = (((r & 0xf8) << 8) |
647 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
649 case RADEON_COLOR_FORMAT_ARGB8888:
651 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
656 RADEON_WAIT_UNTIL_3D_IDLE();
657 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
658 OUT_RING(0xffffffff);
663 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
664 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
665 RADEON_GMC_BRUSH_SOLID_COLOR |
666 (dev_priv->color_fmt << 8) |
667 RADEON_GMC_SRC_DATATYPE_COLOR |
668 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
670 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
671 OUT_RING(dev_priv->front_pitch_offset);
673 OUT_RING(dev_priv->back_pitch_offset);
678 OUT_RING((x << 16) | y);
679 OUT_RING((w << 16) | h);
684 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
686 /* Collapse various things into a wait flag -- trying to
687 * guess if userspase slept -- better just to have them tell us.
689 if (dev_priv->stats.last_frame_reads > 1 ||
690 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
691 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
694 if (dev_priv->stats.freelist_loops) {
695 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
698 /* Purple box for page flipping
700 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
701 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
703 /* Red box if we have to wait for idle at any point
705 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
706 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
708 /* Blue box: lost context?
711 /* Yellow box for texture swaps
713 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
714 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
716 /* Green box if hardware never idles (as far as we can tell)
718 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
719 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
721 /* Draw bars indicating number of buffers allocated
722 * (not a great measure, easily confused)
724 if (dev_priv->stats.requested_bufs) {
725 if (dev_priv->stats.requested_bufs > 100)
726 dev_priv->stats.requested_bufs = 100;
728 radeon_clear_box(dev_priv, 4, 16,
729 dev_priv->stats.requested_bufs, 4,
733 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
737 /* ================================================================
738 * CP command dispatch functions
741 static void radeon_cp_dispatch_clear(drm_device_t * dev,
742 drm_radeon_clear_t * clear,
743 drm_radeon_clear_rect_t * depth_boxes)
745 drm_radeon_private_t *dev_priv = dev->dev_private;
746 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
747 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
748 int nbox = sarea_priv->nbox;
749 drm_clip_rect_t *pbox = sarea_priv->boxes;
750 unsigned int flags = clear->flags;
751 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
754 DRM_DEBUG("flags = 0x%x\n", flags);
756 dev_priv->stats.clears++;
758 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
759 unsigned int tmp = flags;
761 flags &= ~(RADEON_FRONT | RADEON_BACK);
762 if (tmp & RADEON_FRONT)
763 flags |= RADEON_BACK;
764 if (tmp & RADEON_BACK)
765 flags |= RADEON_FRONT;
768 if (flags & (RADEON_FRONT | RADEON_BACK)) {
772 /* Ensure the 3D stream is idle before doing a
773 * 2D fill to clear the front or back buffer.
775 RADEON_WAIT_UNTIL_3D_IDLE();
777 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
778 OUT_RING(clear->color_mask);
782 /* Make sure we restore the 3D state next time.
784 dev_priv->sarea_priv->ctx_owner = 0;
786 for (i = 0; i < nbox; i++) {
789 int w = pbox[i].x2 - x;
790 int h = pbox[i].y2 - y;
792 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
795 if (flags & RADEON_FRONT) {
799 (RADEON_CNTL_PAINT_MULTI, 4));
800 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
801 RADEON_GMC_BRUSH_SOLID_COLOR |
804 RADEON_GMC_SRC_DATATYPE_COLOR |
806 RADEON_GMC_CLR_CMP_CNTL_DIS);
808 OUT_RING(dev_priv->front_pitch_offset);
809 OUT_RING(clear->clear_color);
811 OUT_RING((x << 16) | y);
812 OUT_RING((w << 16) | h);
817 if (flags & RADEON_BACK) {
821 (RADEON_CNTL_PAINT_MULTI, 4));
822 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
823 RADEON_GMC_BRUSH_SOLID_COLOR |
826 RADEON_GMC_SRC_DATATYPE_COLOR |
828 RADEON_GMC_CLR_CMP_CNTL_DIS);
830 OUT_RING(dev_priv->back_pitch_offset);
831 OUT_RING(clear->clear_color);
833 OUT_RING((x << 16) | y);
834 OUT_RING((w << 16) | h);
842 /* no docs available, based on reverse engeneering by Stephane Marchesin */
843 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
844 && (flags & RADEON_CLEAR_FASTZ)) {
847 int depthpixperline =
848 dev_priv->depth_fmt ==
849 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
855 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
856 ((clear->depth_mask & 0xff) << 24);
858 /* Make sure we restore the 3D state next time.
859 * we haven't touched any "normal" state - still need this?
861 dev_priv->sarea_priv->ctx_owner = 0;
863 if ((dev_priv->flags & CHIP_HAS_HIERZ)
864 && (flags & RADEON_USE_HIERZ)) {
865 /* FIXME : reverse engineer that for Rx00 cards */
866 /* FIXME : the mask supposedly contains low-res z values. So can't set
867 just to the max (0xff? or actually 0x3fff?), need to take z clear
868 value into account? */
869 /* pattern seems to work for r100, though get slight
870 rendering errors with glxgears. If hierz is not enabled for r100,
871 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
872 other ones are ignored, and the same clear mask can be used. That's
873 very different behaviour than R200 which needs different clear mask
874 and different number of tiles to clear if hierz is enabled or not !?!
876 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
878 /* clear mask : chooses the clearing pattern.
879 rv250: could be used to clear only parts of macrotiles
880 (but that would get really complicated...)?
881 bit 0 and 1 (either or both of them ?!?!) are used to
882 not clear tile (or maybe one of the bits indicates if the tile is
883 compressed or not), bit 2 and 3 to not clear tile 1,...,.
884 Pattern is as follows:
885 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
886 bits -------------------------------------------------
887 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
888 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
889 covers 256 pixels ?!?
895 RADEON_WAIT_UNTIL_2D_IDLE();
896 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
897 tempRB3D_DEPTHCLEARVALUE);
898 /* what offset is this exactly ? */
899 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
900 /* need ctlstat, otherwise get some strange black flickering */
901 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
902 RADEON_RB3D_ZC_FLUSH_ALL);
905 for (i = 0; i < nbox; i++) {
906 int tileoffset, nrtilesx, nrtilesy, j;
907 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
908 if ((dev_priv->flags & CHIP_HAS_HIERZ)
909 && !(dev_priv->microcode_version == UCODE_R200)) {
910 /* FIXME : figure this out for r200 (when hierz is enabled). Or
911 maybe r200 actually doesn't need to put the low-res z value into
912 the tile cache like r100, but just needs to clear the hi-level z-buffer?
913 Works for R100, both with hierz and without.
914 R100 seems to operate on 2x1 8x8 tiles, but...
915 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
916 problematic with resolutions which are not 64 pix aligned? */
918 ((pbox[i].y1 >> 3) * depthpixperline +
921 ((pbox[i].x2 & ~63) -
922 (pbox[i].x1 & ~63)) >> 4;
924 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
925 for (j = 0; j <= nrtilesy; j++) {
928 (RADEON_3D_CLEAR_ZMASK, 2));
930 OUT_RING(tileoffset * 8);
931 /* the number of tiles to clear */
932 OUT_RING(nrtilesx + 4);
933 /* clear mask : chooses the clearing pattern. */
936 tileoffset += depthpixperline >> 6;
938 } else if (dev_priv->microcode_version == UCODE_R200) {
939 /* works for rv250. */
940 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
942 ((pbox[i].y1 >> 3) * depthpixperline +
945 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
947 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
948 for (j = 0; j <= nrtilesy; j++) {
951 (RADEON_3D_CLEAR_ZMASK, 2));
953 /* judging by the first tile offset needed, could possibly
954 directly address/clear 4x4 tiles instead of 8x2 * 4x4
955 macro tiles, though would still need clear mask for
956 right/bottom if truely 4x4 granularity is desired ? */
957 OUT_RING(tileoffset * 16);
958 /* the number of tiles to clear */
959 OUT_RING(nrtilesx + 1);
960 /* clear mask : chooses the clearing pattern. */
963 tileoffset += depthpixperline >> 5;
965 } else { /* rv 100 */
966 /* rv100 might not need 64 pix alignment, who knows */
967 /* offsets are, hmm, weird */
969 ((pbox[i].y1 >> 4) * depthpixperline +
972 ((pbox[i].x2 & ~63) -
973 (pbox[i].x1 & ~63)) >> 4;
975 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
976 for (j = 0; j <= nrtilesy; j++) {
979 (RADEON_3D_CLEAR_ZMASK, 2));
980 OUT_RING(tileoffset * 128);
981 /* the number of tiles to clear */
982 OUT_RING(nrtilesx + 4);
983 /* clear mask : chooses the clearing pattern. */
986 tileoffset += depthpixperline >> 6;
991 /* TODO don't always clear all hi-level z tiles */
992 if ((dev_priv->flags & CHIP_HAS_HIERZ)
993 && (dev_priv->microcode_version == UCODE_R200)
994 && (flags & RADEON_USE_HIERZ))
995 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
996 /* FIXME : the mask supposedly contains low-res z values. So can't set
997 just to the max (0xff? or actually 0x3fff?), need to take z clear
998 value into account? */
1001 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1002 OUT_RING(0x0); /* First tile */
1004 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1009 /* We have to clear the depth and/or stencil buffers by
1010 * rendering a quad into just those buffers. Thus, we have to
1011 * make sure the 3D engine is configured correctly.
1013 else if ((dev_priv->microcode_version == UCODE_R200) &&
1014 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1019 int tempRB3D_ZSTENCILCNTL;
1020 int tempRB3D_STENCILREFMASK;
1021 int tempRB3D_PLANEMASK;
1023 int tempSE_VTE_CNTL;
1024 int tempSE_VTX_FMT_0;
1025 int tempSE_VTX_FMT_1;
1026 int tempSE_VAP_CNTL;
1027 int tempRE_AUX_SCISSOR_CNTL;
1032 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1034 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1035 tempRB3D_STENCILREFMASK = 0x0;
1037 tempSE_CNTL = depth_clear->se_cntl;
1041 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1043 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1045 tempRB3D_PLANEMASK = 0x0;
1047 tempRE_AUX_SCISSOR_CNTL = 0x0;
1050 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1052 /* Vertex format (X, Y, Z, W) */
1054 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1055 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1056 tempSE_VTX_FMT_1 = 0x0;
1059 * Depth buffer specific enables
1061 if (flags & RADEON_DEPTH) {
1062 /* Enable depth buffer */
1063 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1065 /* Disable depth buffer */
1066 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1070 * Stencil buffer specific enables
1072 if (flags & RADEON_STENCIL) {
1073 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1074 tempRB3D_STENCILREFMASK = clear->depth_mask;
1076 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1077 tempRB3D_STENCILREFMASK = 0x00000000;
1080 if (flags & RADEON_USE_COMP_ZBUF) {
1081 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1082 RADEON_Z_DECOMPRESSION_ENABLE;
1084 if (flags & RADEON_USE_HIERZ) {
1085 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089 RADEON_WAIT_UNTIL_2D_IDLE();
1091 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1092 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1093 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1094 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1095 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1096 tempRB3D_STENCILREFMASK);
1097 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1098 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1099 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1100 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1101 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1102 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1103 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1106 /* Make sure we restore the 3D state next time.
1108 dev_priv->sarea_priv->ctx_owner = 0;
1110 for (i = 0; i < nbox; i++) {
1112 /* Funny that this should be required --
1115 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1118 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1119 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1120 RADEON_PRIM_WALK_RING |
1121 (3 << RADEON_NUM_VERTICES_SHIFT)));
1122 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1123 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1124 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1125 OUT_RING(0x3f800000);
1126 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1127 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1128 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1129 OUT_RING(0x3f800000);
1130 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1131 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133 OUT_RING(0x3f800000);
1136 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1138 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1140 rb3d_cntl = depth_clear->rb3d_cntl;
1142 if (flags & RADEON_DEPTH) {
1143 rb3d_cntl |= RADEON_Z_ENABLE;
1145 rb3d_cntl &= ~RADEON_Z_ENABLE;
1148 if (flags & RADEON_STENCIL) {
1149 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1150 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1152 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1153 rb3d_stencilrefmask = 0x00000000;
1156 if (flags & RADEON_USE_COMP_ZBUF) {
1157 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1158 RADEON_Z_DECOMPRESSION_ENABLE;
1160 if (flags & RADEON_USE_HIERZ) {
1161 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1165 RADEON_WAIT_UNTIL_2D_IDLE();
1167 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1168 OUT_RING(0x00000000);
1169 OUT_RING(rb3d_cntl);
1171 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1172 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1173 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1174 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1177 /* Make sure we restore the 3D state next time.
1179 dev_priv->sarea_priv->ctx_owner = 0;
1181 for (i = 0; i < nbox; i++) {
1183 /* Funny that this should be required --
1186 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1190 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1191 OUT_RING(RADEON_VTX_Z_PRESENT |
1192 RADEON_VTX_PKCOLOR_PRESENT);
1193 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1194 RADEON_PRIM_WALK_RING |
1195 RADEON_MAOS_ENABLE |
1196 RADEON_VTX_FMT_RADEON_MODE |
1197 (3 << RADEON_NUM_VERTICES_SHIFT)));
1199 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1200 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1201 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1204 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1205 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1206 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1210 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1211 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1218 /* Increment the clear counter. The client-side 3D driver must
1219 * wait on this value before performing the clear ioctl. We
1220 * need this because the card's so damned fast...
1222 dev_priv->sarea_priv->last_clear++;
1226 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1227 RADEON_WAIT_UNTIL_IDLE();
1232 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1234 drm_radeon_private_t *dev_priv = dev->dev_private;
1235 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1236 int nbox = sarea_priv->nbox;
1237 drm_clip_rect_t *pbox = sarea_priv->boxes;
1242 /* Do some trivial performance monitoring...
1244 if (dev_priv->do_boxes)
1245 radeon_cp_performance_boxes(dev_priv);
1247 /* Wait for the 3D stream to idle before dispatching the bitblt.
1248 * This will prevent data corruption between the two streams.
1252 RADEON_WAIT_UNTIL_3D_IDLE();
1256 for (i = 0; i < nbox; i++) {
1259 int w = pbox[i].x2 - x;
1260 int h = pbox[i].y2 - y;
1262 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1266 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1267 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1268 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1269 RADEON_GMC_BRUSH_NONE |
1270 (dev_priv->color_fmt << 8) |
1271 RADEON_GMC_SRC_DATATYPE_COLOR |
1273 RADEON_DP_SRC_SOURCE_MEMORY |
1274 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1276 /* Make this work even if front & back are flipped:
1278 if (dev_priv->current_page == 0) {
1279 OUT_RING(dev_priv->back_pitch_offset);
1280 OUT_RING(dev_priv->front_pitch_offset);
1282 OUT_RING(dev_priv->front_pitch_offset);
1283 OUT_RING(dev_priv->back_pitch_offset);
1286 OUT_RING((x << 16) | y);
1287 OUT_RING((x << 16) | y);
1288 OUT_RING((w << 16) | h);
1293 /* Increment the frame counter. The client-side 3D driver must
1294 * throttle the framerate by waiting for this value before
1295 * performing the swapbuffer ioctl.
1297 dev_priv->sarea_priv->last_frame++;
1301 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1302 RADEON_WAIT_UNTIL_2D_IDLE();
1307 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1309 drm_radeon_private_t *dev_priv = dev->dev_private;
1310 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1311 int offset = (dev_priv->current_page == 1)
1312 ? dev_priv->front_offset : dev_priv->back_offset;
1314 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1316 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1318 /* Do some trivial performance monitoring...
1320 if (dev_priv->do_boxes) {
1321 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1322 radeon_cp_performance_boxes(dev_priv);
1325 /* Update the frame offsets for both CRTCs
1329 RADEON_WAIT_UNTIL_3D_IDLE();
1330 OUT_RING_REG(RADEON_CRTC_OFFSET,
1331 ((sarea->frame.y * dev_priv->front_pitch +
1332 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1334 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1339 /* Increment the frame counter. The client-side 3D driver must
1340 * throttle the framerate by waiting for this value before
1341 * performing the swapbuffer ioctl.
1343 dev_priv->sarea_priv->last_frame++;
1344 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1345 1 - dev_priv->current_page;
1349 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1354 static int bad_prim_vertex_nr(int primitive, int nr)
1356 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1357 case RADEON_PRIM_TYPE_NONE:
1358 case RADEON_PRIM_TYPE_POINT:
1360 case RADEON_PRIM_TYPE_LINE:
1361 return (nr & 1) || nr == 0;
1362 case RADEON_PRIM_TYPE_LINE_STRIP:
1364 case RADEON_PRIM_TYPE_TRI_LIST:
1365 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1366 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1367 case RADEON_PRIM_TYPE_RECT_LIST:
1368 return nr % 3 || nr == 0;
1369 case RADEON_PRIM_TYPE_TRI_FAN:
1370 case RADEON_PRIM_TYPE_TRI_STRIP:
1379 unsigned int finish;
1381 unsigned int numverts;
1382 unsigned int offset;
1383 unsigned int vc_format;
1384 } drm_radeon_tcl_prim_t;
1386 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1388 drm_radeon_tcl_prim_t * prim)
1390 drm_radeon_private_t *dev_priv = dev->dev_private;
1391 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1392 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1393 int numverts = (int)prim->numverts;
1394 int nbox = sarea_priv->nbox;
1398 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1400 prim->vc_format, prim->start, prim->finish, prim->numverts);
1402 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1403 DRM_ERROR("bad prim %x numverts %d\n",
1404 prim->prim, prim->numverts);
1409 /* Emit the next cliprect */
1411 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1414 /* Emit the vertex buffer rendering commands */
1417 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1420 OUT_RING(prim->vc_format);
1421 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1422 RADEON_COLOR_ORDER_RGBA |
1423 RADEON_VTX_FMT_RADEON_MODE |
1424 (numverts << RADEON_NUM_VERTICES_SHIFT));
1432 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1434 drm_radeon_private_t *dev_priv = dev->dev_private;
1435 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1438 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1440 /* Emit the vertex buffer age */
1442 RADEON_DISPATCH_AGE(buf_priv->age);
1449 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1450 drm_buf_t * buf, int start, int end)
1452 drm_radeon_private_t *dev_priv = dev->dev_private;
1454 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1457 int offset = (dev_priv->gart_buffers_offset
1458 + buf->offset + start);
1459 int dwords = (end - start + 3) / sizeof(u32);
1461 /* Indirect buffer data must be an even number of
1462 * dwords, so if we've been given an odd number we must
1463 * pad the data with a Type-2 CP packet.
1467 ((char *)dev->agp_buffer_map->handle
1468 + buf->offset + start);
1469 data[dwords++] = RADEON_CP_PACKET2;
1472 /* Fire off the indirect buffer */
1475 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1483 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1484 drm_buf_t * elt_buf,
1485 drm_radeon_tcl_prim_t * prim)
1487 drm_radeon_private_t *dev_priv = dev->dev_private;
1488 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1489 int offset = dev_priv->gart_buffers_offset + prim->offset;
1493 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1494 int count = (prim->finish - start) / sizeof(u16);
1495 int nbox = sarea_priv->nbox;
1497 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1500 prim->start, prim->finish, prim->offset, prim->numverts);
1502 if (bad_prim_vertex_nr(prim->prim, count)) {
1503 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1507 if (start >= prim->finish || (prim->start & 0x7)) {
1508 DRM_ERROR("buffer prim %d\n", prim->prim);
1512 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1514 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1515 elt_buf->offset + prim->start);
1517 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1519 data[2] = prim->numverts;
1520 data[3] = prim->vc_format;
1521 data[4] = (prim->prim |
1522 RADEON_PRIM_WALK_IND |
1523 RADEON_COLOR_ORDER_RGBA |
1524 RADEON_VTX_FMT_RADEON_MODE |
1525 (count << RADEON_NUM_VERTICES_SHIFT));
1529 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1531 radeon_cp_dispatch_indirect(dev, elt_buf,
1532 prim->start, prim->finish);
1539 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1541 static int radeon_cp_dispatch_texture(DRMFILE filp,
1543 drm_radeon_texture_t * tex,
1544 drm_radeon_tex_image_t * image)
1546 drm_radeon_private_t *dev_priv = dev->dev_private;
1547 drm_file_t *filp_priv;
1551 const u8 __user *data;
1552 int size, dwords, tex_width, blit_width, spitch;
1555 u32 texpitch, microtile;
1559 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1561 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1562 DRM_ERROR("Invalid destination offset\n");
1563 return DRM_ERR(EINVAL);
1566 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1568 /* Flush the pixel cache. This ensures no pixel data gets mixed
1569 * up with the texture data from the host data blit, otherwise
1570 * part of the texture image may be corrupted.
1573 RADEON_FLUSH_CACHE();
1574 RADEON_WAIT_UNTIL_IDLE();
1577 /* The compiler won't optimize away a division by a variable,
1578 * even if the only legal values are powers of two. Thus, we'll
1579 * use a shift instead.
1581 switch (tex->format) {
1582 case RADEON_TXFORMAT_ARGB8888:
1583 case RADEON_TXFORMAT_RGBA8888:
1584 format = RADEON_COLOR_FORMAT_ARGB8888;
1585 tex_width = tex->width * 4;
1586 blit_width = image->width * 4;
1588 case RADEON_TXFORMAT_AI88:
1589 case RADEON_TXFORMAT_ARGB1555:
1590 case RADEON_TXFORMAT_RGB565:
1591 case RADEON_TXFORMAT_ARGB4444:
1592 case RADEON_TXFORMAT_VYUY422:
1593 case RADEON_TXFORMAT_YVYU422:
1594 format = RADEON_COLOR_FORMAT_RGB565;
1595 tex_width = tex->width * 2;
1596 blit_width = image->width * 2;
1598 case RADEON_TXFORMAT_I8:
1599 case RADEON_TXFORMAT_RGB332:
1600 format = RADEON_COLOR_FORMAT_CI8;
1601 tex_width = tex->width * 1;
1602 blit_width = image->width * 1;
1605 DRM_ERROR("invalid texture format %d\n", tex->format);
1606 return DRM_ERR(EINVAL);
1608 spitch = blit_width >> 6;
1609 if (spitch == 0 && image->height > 1)
1610 return DRM_ERR(EINVAL);
1612 texpitch = tex->pitch;
1613 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1615 if (tex_width < 64) {
1616 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1617 /* we got tiled coordinates, untile them */
1623 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1626 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1627 tex->offset >> 10, tex->pitch, tex->format,
1628 image->x, image->y, image->width, image->height);
1630 /* Make a copy of some parameters in case we have to
1631 * update them for a multi-pass texture blit.
1633 height = image->height;
1634 data = (const u8 __user *)image->data;
1636 size = height * blit_width;
1638 if (size > RADEON_MAX_TEXTURE_SIZE) {
1639 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1640 size = height * blit_width;
1641 } else if (size < 4 && size > 0) {
1643 } else if (size == 0) {
1647 buf = radeon_freelist_get(dev);
1649 radeon_do_cp_idle(dev_priv);
1650 buf = radeon_freelist_get(dev);
1653 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1654 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1655 return DRM_ERR(EFAULT);
1656 return DRM_ERR(EAGAIN);
1659 /* Dispatch the indirect buffer.
1662 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1665 #define RADEON_COPY_MT(_buf, _data, _width) \
1667 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1668 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1669 return DRM_ERR(EFAULT); \
1674 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1675 however, we cannot use blitter directly for texture width < 64 bytes,
1676 since minimum tex pitch is 64 bytes and we need this to match
1677 the texture width, otherwise the blitter will tile it wrong.
1678 Thus, tiling manually in this case. Additionally, need to special
1679 case tex height = 1, since our actual image will have height 2
1680 and we need to ensure we don't read beyond the texture size
1682 if (tex->height == 1) {
1683 if (tex_width >= 64 || tex_width <= 16) {
1684 RADEON_COPY_MT(buffer, data,
1685 (int)(tex_width * sizeof(u32)));
1686 } else if (tex_width == 32) {
1687 RADEON_COPY_MT(buffer, data, 16);
1688 RADEON_COPY_MT(buffer + 8,
1691 } else if (tex_width >= 64 || tex_width == 16) {
1692 RADEON_COPY_MT(buffer, data,
1693 (int)(dwords * sizeof(u32)));
1694 } else if (tex_width < 16) {
1695 for (i = 0; i < tex->height; i++) {
1696 RADEON_COPY_MT(buffer, data, tex_width);
1700 } else if (tex_width == 32) {
1701 /* TODO: make sure this works when not fitting in one buffer
1702 (i.e. 32bytes x 2048...) */
1703 for (i = 0; i < tex->height; i += 2) {
1704 RADEON_COPY_MT(buffer, data, 16);
1706 RADEON_COPY_MT(buffer + 8, data, 16);
1708 RADEON_COPY_MT(buffer + 4, data, 16);
1710 RADEON_COPY_MT(buffer + 12, data, 16);
1716 if (tex_width >= 32) {
1717 /* Texture image width is larger than the minimum, so we
1718 * can upload it directly.
1720 RADEON_COPY_MT(buffer, data,
1721 (int)(dwords * sizeof(u32)));
1723 /* Texture image width is less than the minimum, so we
1724 * need to pad out each image scanline to the minimum
1727 for (i = 0; i < tex->height; i++) {
1728 RADEON_COPY_MT(buffer, data, tex_width);
1735 #undef RADEON_COPY_MT
1738 offset = dev_priv->gart_buffers_offset + buf->offset;
1740 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1741 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1742 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1743 RADEON_GMC_BRUSH_NONE |
1745 RADEON_GMC_SRC_DATATYPE_COLOR |
1747 RADEON_DP_SRC_SOURCE_MEMORY |
1748 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1749 OUT_RING((spitch << 22) | (offset >> 10));
1750 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1752 OUT_RING((image->x << 16) | image->y);
1753 OUT_RING((image->width << 16) | height);
1754 RADEON_WAIT_UNTIL_2D_IDLE();
1757 radeon_cp_discard_buffer(dev, buf);
1759 /* Update the input parameters for next time */
1761 image->height -= height;
1762 image->data = (const u8 __user *)image->data + size;
1763 } while (image->height > 0);
1765 /* Flush the pixel cache after the blit completes. This ensures
1766 * the texture data is written out to memory before rendering
1770 RADEON_FLUSH_CACHE();
1771 RADEON_WAIT_UNTIL_2D_IDLE();
1776 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1778 drm_radeon_private_t *dev_priv = dev->dev_private;
1785 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1786 OUT_RING(0x00000000);
1788 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1789 for (i = 0; i < 32; i++) {
1790 OUT_RING(stipple[i]);
1796 static void radeon_apply_surface_regs(int surf_index,
1797 drm_radeon_private_t *dev_priv)
1799 if (!dev_priv->mmio)
1802 radeon_do_cp_idle(dev_priv);
1804 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1805 dev_priv->surfaces[surf_index].flags);
1806 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1807 dev_priv->surfaces[surf_index].lower);
1808 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1809 dev_priv->surfaces[surf_index].upper);
1812 /* Allocates a virtual surface
1813 * doesn't always allocate a real surface, will stretch an existing
1814 * surface when possible.
1816 * Note that refcount can be at most 2, since during a free refcount=3
1817 * might mean we have to allocate a new surface which might not always
1819 * For example : we allocate three contigous surfaces ABC. If B is
1820 * freed, we suddenly need two surfaces to store A and C, which might
1821 * not always be available.
1823 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1824 drm_radeon_private_t *dev_priv, DRMFILE filp)
1826 struct radeon_virt_surface *s;
1828 int virt_surface_index;
1829 uint32_t new_upper, new_lower;
1831 new_lower = new->address;
1832 new_upper = new_lower + new->size - 1;
1835 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1836 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1837 RADEON_SURF_ADDRESS_FIXED_MASK)
1838 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1841 /* make sure there is no overlap with existing surfaces */
1842 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1843 if ((dev_priv->surfaces[i].refcount != 0) &&
1844 (((new_lower >= dev_priv->surfaces[i].lower) &&
1845 (new_lower < dev_priv->surfaces[i].upper)) ||
1846 ((new_lower < dev_priv->surfaces[i].lower) &&
1847 (new_upper > dev_priv->surfaces[i].lower)))) {
1852 /* find a virtual surface */
1853 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1854 if (dev_priv->virt_surfaces[i].filp == 0)
1856 if (i == 2 * RADEON_MAX_SURFACES) {
1859 virt_surface_index = i;
1861 /* try to reuse an existing surface */
1862 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1864 if ((dev_priv->surfaces[i].refcount == 1) &&
1865 (new->flags == dev_priv->surfaces[i].flags) &&
1866 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1867 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1868 s->surface_index = i;
1869 s->lower = new_lower;
1870 s->upper = new_upper;
1871 s->flags = new->flags;
1873 dev_priv->surfaces[i].refcount++;
1874 dev_priv->surfaces[i].lower = s->lower;
1875 radeon_apply_surface_regs(s->surface_index, dev_priv);
1876 return virt_surface_index;
1880 if ((dev_priv->surfaces[i].refcount == 1) &&
1881 (new->flags == dev_priv->surfaces[i].flags) &&
1882 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1883 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1884 s->surface_index = i;
1885 s->lower = new_lower;
1886 s->upper = new_upper;
1887 s->flags = new->flags;
1889 dev_priv->surfaces[i].refcount++;
1890 dev_priv->surfaces[i].upper = s->upper;
1891 radeon_apply_surface_regs(s->surface_index, dev_priv);
1892 return virt_surface_index;
1896 /* okay, we need a new one */
1897 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1898 if (dev_priv->surfaces[i].refcount == 0) {
1899 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1900 s->surface_index = i;
1901 s->lower = new_lower;
1902 s->upper = new_upper;
1903 s->flags = new->flags;
1905 dev_priv->surfaces[i].refcount = 1;
1906 dev_priv->surfaces[i].lower = s->lower;
1907 dev_priv->surfaces[i].upper = s->upper;
1908 dev_priv->surfaces[i].flags = s->flags;
1909 radeon_apply_surface_regs(s->surface_index, dev_priv);
1910 return virt_surface_index;
1914 /* we didn't find anything */
1918 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1921 struct radeon_virt_surface *s;
1923 /* find the virtual surface */
1924 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1925 s = &(dev_priv->virt_surfaces[i]);
1927 if ((lower == s->lower) && (filp == s->filp)) {
1928 if (dev_priv->surfaces[s->surface_index].
1930 dev_priv->surfaces[s->surface_index].
1933 if (dev_priv->surfaces[s->surface_index].
1935 dev_priv->surfaces[s->surface_index].
1938 dev_priv->surfaces[s->surface_index].refcount--;
1939 if (dev_priv->surfaces[s->surface_index].
1941 dev_priv->surfaces[s->surface_index].
1944 radeon_apply_surface_regs(s->surface_index,
1953 static void radeon_surfaces_release(DRMFILE filp,
1954 drm_radeon_private_t * dev_priv)
1957 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1958 if (dev_priv->virt_surfaces[i].filp == filp)
1959 free_surface(filp, dev_priv,
1960 dev_priv->virt_surfaces[i].lower);
1964 /* ================================================================
1967 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1970 drm_radeon_private_t *dev_priv = dev->dev_private;
1971 drm_radeon_surface_alloc_t alloc;
1973 DRM_COPY_FROM_USER_IOCTL(alloc,
1974 (drm_radeon_surface_alloc_t __user *) data,
1977 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1978 return DRM_ERR(EINVAL);
1983 static int radeon_surface_free(DRM_IOCTL_ARGS)
1986 drm_radeon_private_t *dev_priv = dev->dev_private;
1987 drm_radeon_surface_free_t memfree;
1989 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
1992 if (free_surface(filp, dev_priv, memfree.address))
1993 return DRM_ERR(EINVAL);
1998 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2001 drm_radeon_private_t *dev_priv = dev->dev_private;
2002 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2003 drm_radeon_clear_t clear;
2004 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2007 LOCK_TEST_WITH_RETURN(dev, filp);
2009 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2012 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2014 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2015 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2017 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2018 sarea_priv->nbox * sizeof(depth_boxes[0])))
2019 return DRM_ERR(EFAULT);
2021 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2027 /* Not sure why this isn't set all the time:
2029 static int radeon_do_init_pageflip(drm_device_t * dev)
2031 drm_radeon_private_t *dev_priv = dev->dev_private;
2037 RADEON_WAIT_UNTIL_3D_IDLE();
2038 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2039 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2040 RADEON_CRTC_OFFSET_FLIP_CNTL);
2041 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2042 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2043 RADEON_CRTC_OFFSET_FLIP_CNTL);
2046 dev_priv->page_flipping = 1;
2047 dev_priv->current_page = 0;
2048 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2053 /* Called whenever a client dies, from drm_release.
2054 * NOTE: Lock isn't necessarily held when this is called!
2056 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2058 drm_radeon_private_t *dev_priv = dev->dev_private;
2061 if (dev_priv->current_page != 0)
2062 radeon_cp_dispatch_flip(dev);
2064 dev_priv->page_flipping = 0;
2068 /* Swapping and flipping are different operations, need different ioctls.
2069 * They can & should be intermixed to support multiple 3d windows.
2071 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2074 drm_radeon_private_t *dev_priv = dev->dev_private;
2077 LOCK_TEST_WITH_RETURN(dev, filp);
2079 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2081 if (!dev_priv->page_flipping)
2082 radeon_do_init_pageflip(dev);
2084 radeon_cp_dispatch_flip(dev);
2090 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2093 drm_radeon_private_t *dev_priv = dev->dev_private;
2094 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2097 LOCK_TEST_WITH_RETURN(dev, filp);
2099 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2101 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2102 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2104 radeon_cp_dispatch_swap(dev);
2105 dev_priv->sarea_priv->ctx_owner = 0;
2111 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2114 drm_radeon_private_t *dev_priv = dev->dev_private;
2115 drm_file_t *filp_priv;
2116 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2117 drm_device_dma_t *dma = dev->dma;
2119 drm_radeon_vertex_t vertex;
2120 drm_radeon_tcl_prim_t prim;
2122 LOCK_TEST_WITH_RETURN(dev, filp);
2124 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2126 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2129 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2130 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2132 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2133 DRM_ERROR("buffer index %d (of %d max)\n",
2134 vertex.idx, dma->buf_count - 1);
2135 return DRM_ERR(EINVAL);
2137 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2138 DRM_ERROR("buffer prim %d\n", vertex.prim);
2139 return DRM_ERR(EINVAL);
2142 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2143 VB_AGE_TEST_WITH_RETURN(dev_priv);
2145 buf = dma->buflist[vertex.idx];
2147 if (buf->filp != filp) {
2148 DRM_ERROR("process %d using buffer owned by %p\n",
2149 DRM_CURRENTPID, buf->filp);
2150 return DRM_ERR(EINVAL);
2153 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2154 return DRM_ERR(EINVAL);
2157 /* Build up a prim_t record:
2160 buf->used = vertex.count; /* not used? */
2162 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2163 if (radeon_emit_state(dev_priv, filp_priv,
2164 &sarea_priv->context_state,
2165 sarea_priv->tex_state,
2166 sarea_priv->dirty)) {
2167 DRM_ERROR("radeon_emit_state failed\n");
2168 return DRM_ERR(EINVAL);
2171 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2172 RADEON_UPLOAD_TEX1IMAGES |
2173 RADEON_UPLOAD_TEX2IMAGES |
2174 RADEON_REQUIRE_QUIESCENCE);
2178 prim.finish = vertex.count; /* unused */
2179 prim.prim = vertex.prim;
2180 prim.numverts = vertex.count;
2181 prim.vc_format = dev_priv->sarea_priv->vc_format;
2183 radeon_cp_dispatch_vertex(dev, buf, &prim);
2186 if (vertex.discard) {
2187 radeon_cp_discard_buffer(dev, buf);
2194 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2197 drm_radeon_private_t *dev_priv = dev->dev_private;
2198 drm_file_t *filp_priv;
2199 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2200 drm_device_dma_t *dma = dev->dma;
2202 drm_radeon_indices_t elts;
2203 drm_radeon_tcl_prim_t prim;
2206 LOCK_TEST_WITH_RETURN(dev, filp);
2208 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2210 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2213 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2214 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2216 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2217 DRM_ERROR("buffer index %d (of %d max)\n",
2218 elts.idx, dma->buf_count - 1);
2219 return DRM_ERR(EINVAL);
2221 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2222 DRM_ERROR("buffer prim %d\n", elts.prim);
2223 return DRM_ERR(EINVAL);
2226 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2227 VB_AGE_TEST_WITH_RETURN(dev_priv);
2229 buf = dma->buflist[elts.idx];
2231 if (buf->filp != filp) {
2232 DRM_ERROR("process %d using buffer owned by %p\n",
2233 DRM_CURRENTPID, buf->filp);
2234 return DRM_ERR(EINVAL);
2237 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2238 return DRM_ERR(EINVAL);
2241 count = (elts.end - elts.start) / sizeof(u16);
2242 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2244 if (elts.start & 0x7) {
2245 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2246 return DRM_ERR(EINVAL);
2248 if (elts.start < buf->used) {
2249 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2250 return DRM_ERR(EINVAL);
2253 buf->used = elts.end;
2255 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2256 if (radeon_emit_state(dev_priv, filp_priv,
2257 &sarea_priv->context_state,
2258 sarea_priv->tex_state,
2259 sarea_priv->dirty)) {
2260 DRM_ERROR("radeon_emit_state failed\n");
2261 return DRM_ERR(EINVAL);
2264 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2265 RADEON_UPLOAD_TEX1IMAGES |
2266 RADEON_UPLOAD_TEX2IMAGES |
2267 RADEON_REQUIRE_QUIESCENCE);
2270 /* Build up a prim_t record:
2272 prim.start = elts.start;
2273 prim.finish = elts.end;
2274 prim.prim = elts.prim;
2275 prim.offset = 0; /* offset from start of dma buffers */
2276 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2277 prim.vc_format = dev_priv->sarea_priv->vc_format;
2279 radeon_cp_dispatch_indices(dev, buf, &prim);
2281 radeon_cp_discard_buffer(dev, buf);
2288 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2291 drm_radeon_private_t *dev_priv = dev->dev_private;
2292 drm_radeon_texture_t tex;
2293 drm_radeon_tex_image_t image;
2296 LOCK_TEST_WITH_RETURN(dev, filp);
2298 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2301 if (tex.image == NULL) {
2302 DRM_ERROR("null texture image!\n");
2303 return DRM_ERR(EINVAL);
2306 if (DRM_COPY_FROM_USER(&image,
2307 (drm_radeon_tex_image_t __user *) tex.image,
2309 return DRM_ERR(EFAULT);
2311 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2312 VB_AGE_TEST_WITH_RETURN(dev_priv);
2314 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2320 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2323 drm_radeon_private_t *dev_priv = dev->dev_private;
2324 drm_radeon_stipple_t stipple;
2327 LOCK_TEST_WITH_RETURN(dev, filp);
2329 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2332 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2333 return DRM_ERR(EFAULT);
2335 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2337 radeon_cp_dispatch_stipple(dev, mask);
2343 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2346 drm_radeon_private_t *dev_priv = dev->dev_private;
2347 drm_device_dma_t *dma = dev->dma;
2349 drm_radeon_indirect_t indirect;
2352 LOCK_TEST_WITH_RETURN(dev, filp);
2354 DRM_COPY_FROM_USER_IOCTL(indirect,
2355 (drm_radeon_indirect_t __user *) data,
2358 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2359 indirect.idx, indirect.start, indirect.end, indirect.discard);
2361 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2362 DRM_ERROR("buffer index %d (of %d max)\n",
2363 indirect.idx, dma->buf_count - 1);
2364 return DRM_ERR(EINVAL);
2367 buf = dma->buflist[indirect.idx];
2369 if (buf->filp != filp) {
2370 DRM_ERROR("process %d using buffer owned by %p\n",
2371 DRM_CURRENTPID, buf->filp);
2372 return DRM_ERR(EINVAL);
2375 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2376 return DRM_ERR(EINVAL);
2379 if (indirect.start < buf->used) {
2380 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2381 indirect.start, buf->used);
2382 return DRM_ERR(EINVAL);
2385 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2386 VB_AGE_TEST_WITH_RETURN(dev_priv);
2388 buf->used = indirect.end;
2390 /* Wait for the 3D stream to idle before the indirect buffer
2391 * containing 2D acceleration commands is processed.
2395 RADEON_WAIT_UNTIL_3D_IDLE();
2399 /* Dispatch the indirect buffer full of commands from the
2400 * X server. This is insecure and is thus only available to
2401 * privileged clients.
2403 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2404 if (indirect.discard) {
2405 radeon_cp_discard_buffer(dev, buf);
2412 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2415 drm_radeon_private_t *dev_priv = dev->dev_private;
2416 drm_file_t *filp_priv;
2417 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2418 drm_device_dma_t *dma = dev->dma;
2420 drm_radeon_vertex2_t vertex;
2422 unsigned char laststate;
2424 LOCK_TEST_WITH_RETURN(dev, filp);
2426 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2428 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2431 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2432 DRM_CURRENTPID, vertex.idx, vertex.discard);
2434 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2435 DRM_ERROR("buffer index %d (of %d max)\n",
2436 vertex.idx, dma->buf_count - 1);
2437 return DRM_ERR(EINVAL);
2440 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2441 VB_AGE_TEST_WITH_RETURN(dev_priv);
2443 buf = dma->buflist[vertex.idx];
2445 if (buf->filp != filp) {
2446 DRM_ERROR("process %d using buffer owned by %p\n",
2447 DRM_CURRENTPID, buf->filp);
2448 return DRM_ERR(EINVAL);
2452 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2453 return DRM_ERR(EINVAL);
2456 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2457 return DRM_ERR(EINVAL);
2459 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2460 drm_radeon_prim_t prim;
2461 drm_radeon_tcl_prim_t tclprim;
2463 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2464 return DRM_ERR(EFAULT);
2466 if (prim.stateidx != laststate) {
2467 drm_radeon_state_t state;
2469 if (DRM_COPY_FROM_USER(&state,
2470 &vertex.state[prim.stateidx],
2472 return DRM_ERR(EFAULT);
2474 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2475 DRM_ERROR("radeon_emit_state2 failed\n");
2476 return DRM_ERR(EINVAL);
2479 laststate = prim.stateidx;
2482 tclprim.start = prim.start;
2483 tclprim.finish = prim.finish;
2484 tclprim.prim = prim.prim;
2485 tclprim.vc_format = prim.vc_format;
2487 if (prim.prim & RADEON_PRIM_WALK_IND) {
2488 tclprim.offset = prim.numverts * 64;
2489 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2491 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2493 tclprim.numverts = prim.numverts;
2494 tclprim.offset = 0; /* not used */
2496 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2499 if (sarea_priv->nbox == 1)
2500 sarea_priv->nbox = 0;
2503 if (vertex.discard) {
2504 radeon_cp_discard_buffer(dev, buf);
2511 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2512 drm_file_t * filp_priv,
2513 drm_radeon_cmd_header_t header,
2514 drm_radeon_kcmd_buffer_t *cmdbuf)
2516 int id = (int)header.packet.packet_id;
2518 int *data = (int *)cmdbuf->buf;
2521 if (id >= RADEON_MAX_STATE_PACKETS)
2522 return DRM_ERR(EINVAL);
2524 sz = packet[id].len;
2525 reg = packet[id].start;
2527 if (sz * sizeof(int) > cmdbuf->bufsz) {
2528 DRM_ERROR("Packet size provided larger than data provided\n");
2529 return DRM_ERR(EINVAL);
2532 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2533 DRM_ERROR("Packet verification failed\n");
2534 return DRM_ERR(EINVAL);
2538 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2539 OUT_RING_TABLE(data, sz);
2542 cmdbuf->buf += sz * sizeof(int);
2543 cmdbuf->bufsz -= sz * sizeof(int);
2547 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2548 drm_radeon_cmd_header_t header,
2549 drm_radeon_kcmd_buffer_t *cmdbuf)
2551 int sz = header.scalars.count;
2552 int start = header.scalars.offset;
2553 int stride = header.scalars.stride;
2557 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2558 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2559 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2560 OUT_RING_TABLE(cmdbuf->buf, sz);
2562 cmdbuf->buf += sz * sizeof(int);
2563 cmdbuf->bufsz -= sz * sizeof(int);
2569 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2570 drm_radeon_cmd_header_t header,
2571 drm_radeon_kcmd_buffer_t *cmdbuf)
2573 int sz = header.scalars.count;
2574 int start = ((unsigned int)header.scalars.offset) + 0x100;
2575 int stride = header.scalars.stride;
2579 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2580 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2581 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2582 OUT_RING_TABLE(cmdbuf->buf, sz);
2584 cmdbuf->buf += sz * sizeof(int);
2585 cmdbuf->bufsz -= sz * sizeof(int);
2589 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2590 drm_radeon_cmd_header_t header,
2591 drm_radeon_kcmd_buffer_t *cmdbuf)
2593 int sz = header.vectors.count;
2594 int start = header.vectors.offset;
2595 int stride = header.vectors.stride;
2599 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2600 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2601 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2602 OUT_RING_TABLE(cmdbuf->buf, sz);
2605 cmdbuf->buf += sz * sizeof(int);
2606 cmdbuf->bufsz -= sz * sizeof(int);
2610 static int radeon_emit_packet3(drm_device_t * dev,
2611 drm_file_t * filp_priv,
2612 drm_radeon_kcmd_buffer_t *cmdbuf)
2614 drm_radeon_private_t *dev_priv = dev->dev_private;
2621 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2623 DRM_ERROR("Packet verification failed\n");
2628 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2631 cmdbuf->buf += cmdsz * 4;
2632 cmdbuf->bufsz -= cmdsz * 4;
2636 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2637 drm_file_t *filp_priv,
2638 drm_radeon_kcmd_buffer_t *cmdbuf,
2641 drm_radeon_private_t *dev_priv = dev->dev_private;
2642 drm_clip_rect_t box;
2645 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2651 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2653 DRM_ERROR("Packet verification failed\n");
2661 if (i < cmdbuf->nbox) {
2662 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2663 return DRM_ERR(EFAULT);
2664 /* FIXME The second and subsequent times round
2665 * this loop, send a WAIT_UNTIL_3D_IDLE before
2666 * calling emit_clip_rect(). This fixes a
2667 * lockup on fast machines when sending
2668 * several cliprects with a cmdbuf, as when
2669 * waving a 2D window over a 3D
2670 * window. Something in the commands from user
2671 * space seems to hang the card when they're
2672 * sent several times in a row. That would be
2673 * the correct place to fix it but this works
2674 * around it until I can figure that out - Tim
2678 RADEON_WAIT_UNTIL_3D_IDLE();
2681 radeon_emit_clip_rect(dev_priv, &box);
2685 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2688 } while (++i < cmdbuf->nbox);
2689 if (cmdbuf->nbox == 1)
2693 cmdbuf->buf += cmdsz * 4;
2694 cmdbuf->bufsz -= cmdsz * 4;
2698 static int radeon_emit_wait(drm_device_t * dev, int flags)
2700 drm_radeon_private_t *dev_priv = dev->dev_private;
2703 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2705 case RADEON_WAIT_2D:
2707 RADEON_WAIT_UNTIL_2D_IDLE();
2710 case RADEON_WAIT_3D:
2712 RADEON_WAIT_UNTIL_3D_IDLE();
2715 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2717 RADEON_WAIT_UNTIL_IDLE();
2721 return DRM_ERR(EINVAL);
2727 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2730 drm_radeon_private_t *dev_priv = dev->dev_private;
2731 drm_file_t *filp_priv;
2732 drm_device_dma_t *dma = dev->dma;
2733 drm_buf_t *buf = NULL;
2735 drm_radeon_kcmd_buffer_t cmdbuf;
2736 drm_radeon_cmd_header_t header;
2737 int orig_nbox, orig_bufsz;
2740 LOCK_TEST_WITH_RETURN(dev, filp);
2742 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2744 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2745 (drm_radeon_cmd_buffer_t __user *) data,
2748 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2749 VB_AGE_TEST_WITH_RETURN(dev_priv);
2751 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2752 return DRM_ERR(EINVAL);
2755 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2756 * races between checking values and using those values in other code,
2757 * and simply to avoid a lot of function calls to copy in data.
2759 orig_bufsz = cmdbuf.bufsz;
2760 if (orig_bufsz != 0) {
2761 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2763 return DRM_ERR(ENOMEM);
2764 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2766 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2767 return DRM_ERR(EFAULT);
2772 orig_nbox = cmdbuf.nbox;
2774 if (dev_priv->microcode_version == UCODE_R300) {
2776 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2778 if (orig_bufsz != 0)
2779 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2784 /* microcode_version != r300 */
2785 while (cmdbuf.bufsz >= sizeof(header)) {
2787 header.i = *(int *)cmdbuf.buf;
2788 cmdbuf.buf += sizeof(header);
2789 cmdbuf.bufsz -= sizeof(header);
2791 switch (header.header.cmd_type) {
2792 case RADEON_CMD_PACKET:
2793 DRM_DEBUG("RADEON_CMD_PACKET\n");
2794 if (radeon_emit_packets
2795 (dev_priv, filp_priv, header, &cmdbuf)) {
2796 DRM_ERROR("radeon_emit_packets failed\n");
2801 case RADEON_CMD_SCALARS:
2802 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2803 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2804 DRM_ERROR("radeon_emit_scalars failed\n");
2809 case RADEON_CMD_VECTORS:
2810 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2811 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2812 DRM_ERROR("radeon_emit_vectors failed\n");
2817 case RADEON_CMD_DMA_DISCARD:
2818 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2819 idx = header.dma.buf_idx;
2820 if (idx < 0 || idx >= dma->buf_count) {
2821 DRM_ERROR("buffer index %d (of %d max)\n",
2822 idx, dma->buf_count - 1);
2826 buf = dma->buflist[idx];
2827 if (buf->filp != filp || buf->pending) {
2828 DRM_ERROR("bad buffer %p %p %d\n",
2829 buf->filp, filp, buf->pending);
2833 radeon_cp_discard_buffer(dev, buf);
2836 case RADEON_CMD_PACKET3:
2837 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2838 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2839 DRM_ERROR("radeon_emit_packet3 failed\n");
2844 case RADEON_CMD_PACKET3_CLIP:
2845 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2846 if (radeon_emit_packet3_cliprect
2847 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2848 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2853 case RADEON_CMD_SCALARS2:
2854 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2855 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2856 DRM_ERROR("radeon_emit_scalars2 failed\n");
2861 case RADEON_CMD_WAIT:
2862 DRM_DEBUG("RADEON_CMD_WAIT\n");
2863 if (radeon_emit_wait(dev, header.wait.flags)) {
2864 DRM_ERROR("radeon_emit_wait failed\n");
2869 DRM_ERROR("bad cmd_type %d at %p\n",
2870 header.header.cmd_type,
2871 cmdbuf.buf - sizeof(header));
2876 if (orig_bufsz != 0)
2877 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2879 DRM_DEBUG("DONE\n");
2884 if (orig_bufsz != 0)
2885 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2886 return DRM_ERR(EINVAL);
2889 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2892 drm_radeon_private_t *dev_priv = dev->dev_private;
2893 drm_radeon_getparam_t param;
2896 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2899 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2901 switch (param.param) {
2902 case RADEON_PARAM_GART_BUFFER_OFFSET:
2903 value = dev_priv->gart_buffers_offset;
2905 case RADEON_PARAM_LAST_FRAME:
2906 dev_priv->stats.last_frame_reads++;
2907 value = GET_SCRATCH(0);
2909 case RADEON_PARAM_LAST_DISPATCH:
2910 value = GET_SCRATCH(1);
2912 case RADEON_PARAM_LAST_CLEAR:
2913 dev_priv->stats.last_clear_reads++;
2914 value = GET_SCRATCH(2);
2916 case RADEON_PARAM_IRQ_NR:
2919 case RADEON_PARAM_GART_BASE:
2920 value = dev_priv->gart_vm_start;
2922 case RADEON_PARAM_REGISTER_HANDLE:
2923 value = dev_priv->mmio->offset;
2925 case RADEON_PARAM_STATUS_HANDLE:
2926 value = dev_priv->ring_rptr_offset;
2928 #if BITS_PER_LONG == 32
2930 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2931 * pointer which can't fit into an int-sized variable. According to
2932 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2933 * not supporting it shouldn't be a problem. If the same functionality
2934 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2935 * so backwards-compatibility for the embedded platforms can be
2936 * maintained. --davidm 4-Feb-2004.
2938 case RADEON_PARAM_SAREA_HANDLE:
2939 /* The lock is the first dword in the sarea. */
2940 value = (long)dev->lock.hw_lock;
2943 case RADEON_PARAM_GART_TEX_HANDLE:
2944 value = dev_priv->gart_textures_offset;
2947 case RADEON_PARAM_CARD_TYPE:
2948 if (dev_priv->flags & CHIP_IS_PCIE)
2949 value = RADEON_CARD_PCIE;
2950 else if (dev_priv->flags & CHIP_IS_AGP)
2951 value = RADEON_CARD_AGP;
2953 value = RADEON_CARD_PCI;
2956 return DRM_ERR(EINVAL);
2959 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
2960 DRM_ERROR("copy_to_user\n");
2961 return DRM_ERR(EFAULT);
2967 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
2970 drm_radeon_private_t *dev_priv = dev->dev_private;
2971 drm_file_t *filp_priv;
2972 drm_radeon_setparam_t sp;
2973 struct drm_radeon_driver_file_fields *radeon_priv;
2975 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2977 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
2981 case RADEON_SETPARAM_FB_LOCATION:
2982 radeon_priv = filp_priv->driver_priv;
2983 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2985 case RADEON_SETPARAM_SWITCH_TILING:
2986 if (sp.value == 0) {
2987 DRM_DEBUG("color tiling disabled\n");
2988 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2989 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2990 dev_priv->sarea_priv->tiling_enabled = 0;
2991 } else if (sp.value == 1) {
2992 DRM_DEBUG("color tiling enabled\n");
2993 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
2994 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
2995 dev_priv->sarea_priv->tiling_enabled = 1;
2998 case RADEON_SETPARAM_PCIGART_LOCATION:
2999 dev_priv->pcigart_offset = sp.value;
3001 case RADEON_SETPARAM_NEW_MEMMAP:
3002 dev_priv->new_memmap = sp.value;
3005 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3006 return DRM_ERR(EINVAL);
3012 /* When a client dies:
3013 * - Check for and clean up flipped page state
3014 * - Free any alloced GART memory.
3015 * - Free any alloced radeon surfaces.
3017 * DRM infrastructure takes care of reclaiming dma buffers.
3019 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3021 if (dev->dev_private) {
3022 drm_radeon_private_t *dev_priv = dev->dev_private;
3023 if (dev_priv->page_flipping) {
3024 radeon_do_cleanup_pageflip(dev);
3026 radeon_mem_release(filp, dev_priv->gart_heap);
3027 radeon_mem_release(filp, dev_priv->fb_heap);
3028 radeon_surfaces_release(filp, dev_priv);
3032 void radeon_driver_lastclose(drm_device_t * dev)
3034 radeon_do_release(dev);
3037 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3039 drm_radeon_private_t *dev_priv = dev->dev_private;
3040 struct drm_radeon_driver_file_fields *radeon_priv;
3044 (struct drm_radeon_driver_file_fields *)
3045 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3050 filp_priv->driver_priv = radeon_priv;
3053 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3055 radeon_priv->radeon_fb_delta = 0;
3059 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3061 struct drm_radeon_driver_file_fields *radeon_priv =
3062 filp_priv->driver_priv;
3064 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3067 drm_ioctl_desc_t radeon_ioctls[] = {
3068 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3069 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3070 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3071 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3072 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3073 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3074 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3075 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3076 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3077 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3078 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3079 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3080 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3081 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3082 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3083 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3084 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3085 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3086 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3087 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3088 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3089 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3090 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3091 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3092 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3093 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3094 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3097 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);