1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 struct drm_radeon_driver_file_fields *radeon_priv;
48 /* Hrm ... the story of the offset ... So this function converts
49 * the various ideas of what userland clients might have for an
50 * offset in the card address space into an offset into the card
51 * address space :) So with a sane client, it should just keep
52 * the value intact and just do some boundary checking. However,
53 * not all clients are sane. Some older clients pass us 0 based
54 * offsets relative to the start of the framebuffer and some may
55 * assume the AGP aperture it appended to the framebuffer, so we
56 * try to detect those cases and fix them up.
58 * Note: It might be a good idea here to make sure the offset lands
59 * in some "allowed" area to protect things like the PCIE GART...
62 /* First, the best case, the offset already lands in either the
63 * framebuffer or the GART mapped space
65 if ((off >= dev_priv->fb_location &&
66 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
67 (off >= dev_priv->gart_vm_start &&
68 off < (dev_priv->gart_vm_start + dev_priv->gart_size)))
71 /* Ok, that didn't happen... now check if we have a zero based
72 * offset that fits in the framebuffer + gart space, apply the
73 * magic offset we get from SETPARAM or calculated from fb_location
75 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
76 radeon_priv = filp_priv->driver_priv;
77 off += radeon_priv->radeon_fb_delta;
80 /* Finally, assume we aimed at a GART offset if beyond the fb */
81 if (off > (dev_priv->fb_location + dev_priv->fb_size))
82 off = off - (dev_priv->fb_location + dev_priv->fb_size) +
83 dev_priv->gart_vm_start;
85 /* Now recheck and fail if out of bounds */
86 if ((off >= dev_priv->fb_location &&
87 off < (dev_priv->fb_location + dev_priv->fb_size)) ||
88 (off >= dev_priv->gart_vm_start &&
89 off < (dev_priv->gart_vm_start + dev_priv->gart_size))) {
90 DRM_DEBUG("offset fixed up to 0x%x\n", off);
94 return DRM_ERR(EINVAL);
97 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
99 drm_file_t * filp_priv,
104 case RADEON_EMIT_PP_MISC:
105 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
106 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
107 DRM_ERROR("Invalid depth buffer offset\n");
108 return DRM_ERR(EINVAL);
112 case RADEON_EMIT_PP_CNTL:
113 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
114 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
115 DRM_ERROR("Invalid colour buffer offset\n");
116 return DRM_ERR(EINVAL);
120 case R200_EMIT_PP_TXOFFSET_0:
121 case R200_EMIT_PP_TXOFFSET_1:
122 case R200_EMIT_PP_TXOFFSET_2:
123 case R200_EMIT_PP_TXOFFSET_3:
124 case R200_EMIT_PP_TXOFFSET_4:
125 case R200_EMIT_PP_TXOFFSET_5:
126 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
128 DRM_ERROR("Invalid R200 texture offset\n");
129 return DRM_ERR(EINVAL);
133 case RADEON_EMIT_PP_TXFILTER_0:
134 case RADEON_EMIT_PP_TXFILTER_1:
135 case RADEON_EMIT_PP_TXFILTER_2:
136 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
137 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
138 DRM_ERROR("Invalid R100 texture offset\n");
139 return DRM_ERR(EINVAL);
143 case R200_EMIT_PP_CUBIC_OFFSETS_0:
144 case R200_EMIT_PP_CUBIC_OFFSETS_1:
145 case R200_EMIT_PP_CUBIC_OFFSETS_2:
146 case R200_EMIT_PP_CUBIC_OFFSETS_3:
147 case R200_EMIT_PP_CUBIC_OFFSETS_4:
148 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
150 for (i = 0; i < 5; i++) {
151 if (radeon_check_and_fixup_offset(dev_priv,
155 ("Invalid R200 cubic texture offset\n");
156 return DRM_ERR(EINVAL);
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
164 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
166 for (i = 0; i < 5; i++) {
167 if (radeon_check_and_fixup_offset(dev_priv,
171 ("Invalid R100 cubic texture offset\n");
172 return DRM_ERR(EINVAL);
178 case R200_EMIT_VAP_CTL:{
181 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
186 case RADEON_EMIT_RB3D_COLORPITCH:
187 case RADEON_EMIT_RE_LINE_PATTERN:
188 case RADEON_EMIT_SE_LINE_WIDTH:
189 case RADEON_EMIT_PP_LUM_MATRIX:
190 case RADEON_EMIT_PP_ROT_MATRIX_0:
191 case RADEON_EMIT_RB3D_STENCILREFMASK:
192 case RADEON_EMIT_SE_VPORT_XSCALE:
193 case RADEON_EMIT_SE_CNTL:
194 case RADEON_EMIT_SE_CNTL_STATUS:
195 case RADEON_EMIT_RE_MISC:
196 case RADEON_EMIT_PP_BORDER_COLOR_0:
197 case RADEON_EMIT_PP_BORDER_COLOR_1:
198 case RADEON_EMIT_PP_BORDER_COLOR_2:
199 case RADEON_EMIT_SE_ZBIAS_FACTOR:
200 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
201 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
202 case R200_EMIT_PP_TXCBLEND_0:
203 case R200_EMIT_PP_TXCBLEND_1:
204 case R200_EMIT_PP_TXCBLEND_2:
205 case R200_EMIT_PP_TXCBLEND_3:
206 case R200_EMIT_PP_TXCBLEND_4:
207 case R200_EMIT_PP_TXCBLEND_5:
208 case R200_EMIT_PP_TXCBLEND_6:
209 case R200_EMIT_PP_TXCBLEND_7:
210 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
211 case R200_EMIT_TFACTOR_0:
212 case R200_EMIT_VTX_FMT_0:
213 case R200_EMIT_MATRIX_SELECT_0:
214 case R200_EMIT_TEX_PROC_CTL_2:
215 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
216 case R200_EMIT_PP_TXFILTER_0:
217 case R200_EMIT_PP_TXFILTER_1:
218 case R200_EMIT_PP_TXFILTER_2:
219 case R200_EMIT_PP_TXFILTER_3:
220 case R200_EMIT_PP_TXFILTER_4:
221 case R200_EMIT_PP_TXFILTER_5:
222 case R200_EMIT_VTE_CNTL:
223 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
224 case R200_EMIT_PP_TAM_DEBUG3:
225 case R200_EMIT_PP_CNTL_X:
226 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
227 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
228 case R200_EMIT_RE_SCISSOR_TL_0:
229 case R200_EMIT_RE_SCISSOR_TL_1:
230 case R200_EMIT_RE_SCISSOR_TL_2:
231 case R200_EMIT_SE_VAP_CNTL_STATUS:
232 case R200_EMIT_SE_VTX_STATE_CNTL:
233 case R200_EMIT_RE_POINTSIZE:
234 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
235 case R200_EMIT_PP_CUBIC_FACES_0:
236 case R200_EMIT_PP_CUBIC_FACES_1:
237 case R200_EMIT_PP_CUBIC_FACES_2:
238 case R200_EMIT_PP_CUBIC_FACES_3:
239 case R200_EMIT_PP_CUBIC_FACES_4:
240 case R200_EMIT_PP_CUBIC_FACES_5:
241 case RADEON_EMIT_PP_TEX_SIZE_0:
242 case RADEON_EMIT_PP_TEX_SIZE_1:
243 case RADEON_EMIT_PP_TEX_SIZE_2:
244 case R200_EMIT_RB3D_BLENDCOLOR:
245 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
246 case RADEON_EMIT_PP_CUBIC_FACES_0:
247 case RADEON_EMIT_PP_CUBIC_FACES_1:
248 case RADEON_EMIT_PP_CUBIC_FACES_2:
249 case R200_EMIT_PP_TRI_PERF_CNTL:
250 case R200_EMIT_PP_AFS_0:
251 case R200_EMIT_PP_AFS_1:
252 case R200_EMIT_ATF_TFACTOR:
253 case R200_EMIT_PP_TXCTLALL_0:
254 case R200_EMIT_PP_TXCTLALL_1:
255 case R200_EMIT_PP_TXCTLALL_2:
256 case R200_EMIT_PP_TXCTLALL_3:
257 case R200_EMIT_PP_TXCTLALL_4:
258 case R200_EMIT_PP_TXCTLALL_5:
259 case R200_EMIT_VAP_PVS_CNTL:
260 /* These packets don't contain memory offsets */
264 DRM_ERROR("Unknown state packet ID %d\n", id);
265 return DRM_ERR(EINVAL);
271 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
273 drm_file_t *filp_priv,
274 drm_radeon_kcmd_buffer_t *
278 u32 *cmd = (u32 *) cmdbuf->buf;
280 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
282 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
283 DRM_ERROR("Not a type 3 packet\n");
284 return DRM_ERR(EINVAL);
287 if (4 * *cmdsz > cmdbuf->bufsz) {
288 DRM_ERROR("Packet size larger than size of data provided\n");
289 return DRM_ERR(EINVAL);
292 /* Check client state and fix it up if necessary */
293 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
296 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
297 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
298 offset = cmd[2] << 10;
299 if (radeon_check_and_fixup_offset
300 (dev_priv, filp_priv, &offset)) {
301 DRM_ERROR("Invalid first packet offset\n");
302 return DRM_ERR(EINVAL);
304 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
307 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
308 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
309 offset = cmd[3] << 10;
310 if (radeon_check_and_fixup_offset
311 (dev_priv, filp_priv, &offset)) {
312 DRM_ERROR("Invalid second packet offset\n");
313 return DRM_ERR(EINVAL);
315 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
322 /* ================================================================
323 * CP hardware state programming functions
326 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
327 drm_clip_rect_t * box)
331 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
332 box->x1, box->y1, box->x2, box->y2);
335 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
336 OUT_RING((box->y1 << 16) | box->x1);
337 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
338 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
344 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
345 drm_file_t * filp_priv,
346 drm_radeon_context_regs_t * ctx,
347 drm_radeon_texture_regs_t * tex,
351 DRM_DEBUG("dirty=0x%08x\n", dirty);
353 if (dirty & RADEON_UPLOAD_CONTEXT) {
354 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
355 &ctx->rb3d_depthoffset)) {
356 DRM_ERROR("Invalid depth buffer offset\n");
357 return DRM_ERR(EINVAL);
360 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
361 &ctx->rb3d_coloroffset)) {
362 DRM_ERROR("Invalid depth buffer offset\n");
363 return DRM_ERR(EINVAL);
367 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
368 OUT_RING(ctx->pp_misc);
369 OUT_RING(ctx->pp_fog_color);
370 OUT_RING(ctx->re_solid_color);
371 OUT_RING(ctx->rb3d_blendcntl);
372 OUT_RING(ctx->rb3d_depthoffset);
373 OUT_RING(ctx->rb3d_depthpitch);
374 OUT_RING(ctx->rb3d_zstencilcntl);
375 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
376 OUT_RING(ctx->pp_cntl);
377 OUT_RING(ctx->rb3d_cntl);
378 OUT_RING(ctx->rb3d_coloroffset);
379 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
380 OUT_RING(ctx->rb3d_colorpitch);
384 if (dirty & RADEON_UPLOAD_VERTFMT) {
386 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
387 OUT_RING(ctx->se_coord_fmt);
391 if (dirty & RADEON_UPLOAD_LINE) {
393 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
394 OUT_RING(ctx->re_line_pattern);
395 OUT_RING(ctx->re_line_state);
396 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
397 OUT_RING(ctx->se_line_width);
401 if (dirty & RADEON_UPLOAD_BUMPMAP) {
403 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
404 OUT_RING(ctx->pp_lum_matrix);
405 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
406 OUT_RING(ctx->pp_rot_matrix_0);
407 OUT_RING(ctx->pp_rot_matrix_1);
411 if (dirty & RADEON_UPLOAD_MASKS) {
413 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
414 OUT_RING(ctx->rb3d_stencilrefmask);
415 OUT_RING(ctx->rb3d_ropcntl);
416 OUT_RING(ctx->rb3d_planemask);
420 if (dirty & RADEON_UPLOAD_VIEWPORT) {
422 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
423 OUT_RING(ctx->se_vport_xscale);
424 OUT_RING(ctx->se_vport_xoffset);
425 OUT_RING(ctx->se_vport_yscale);
426 OUT_RING(ctx->se_vport_yoffset);
427 OUT_RING(ctx->se_vport_zscale);
428 OUT_RING(ctx->se_vport_zoffset);
432 if (dirty & RADEON_UPLOAD_SETUP) {
434 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
435 OUT_RING(ctx->se_cntl);
436 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
437 OUT_RING(ctx->se_cntl_status);
441 if (dirty & RADEON_UPLOAD_MISC) {
443 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
444 OUT_RING(ctx->re_misc);
448 if (dirty & RADEON_UPLOAD_TEX0) {
449 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
450 &tex[0].pp_txoffset)) {
451 DRM_ERROR("Invalid texture offset for unit 0\n");
452 return DRM_ERR(EINVAL);
456 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
457 OUT_RING(tex[0].pp_txfilter);
458 OUT_RING(tex[0].pp_txformat);
459 OUT_RING(tex[0].pp_txoffset);
460 OUT_RING(tex[0].pp_txcblend);
461 OUT_RING(tex[0].pp_txablend);
462 OUT_RING(tex[0].pp_tfactor);
463 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
464 OUT_RING(tex[0].pp_border_color);
468 if (dirty & RADEON_UPLOAD_TEX1) {
469 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
470 &tex[1].pp_txoffset)) {
471 DRM_ERROR("Invalid texture offset for unit 1\n");
472 return DRM_ERR(EINVAL);
476 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
477 OUT_RING(tex[1].pp_txfilter);
478 OUT_RING(tex[1].pp_txformat);
479 OUT_RING(tex[1].pp_txoffset);
480 OUT_RING(tex[1].pp_txcblend);
481 OUT_RING(tex[1].pp_txablend);
482 OUT_RING(tex[1].pp_tfactor);
483 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
484 OUT_RING(tex[1].pp_border_color);
488 if (dirty & RADEON_UPLOAD_TEX2) {
489 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
490 &tex[2].pp_txoffset)) {
491 DRM_ERROR("Invalid texture offset for unit 2\n");
492 return DRM_ERR(EINVAL);
496 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
497 OUT_RING(tex[2].pp_txfilter);
498 OUT_RING(tex[2].pp_txformat);
499 OUT_RING(tex[2].pp_txoffset);
500 OUT_RING(tex[2].pp_txcblend);
501 OUT_RING(tex[2].pp_txablend);
502 OUT_RING(tex[2].pp_tfactor);
503 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
504 OUT_RING(tex[2].pp_border_color);
513 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
514 drm_file_t * filp_priv,
515 drm_radeon_state_t * state)
519 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
521 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
522 OUT_RING(state->context2.se_zbias_factor);
523 OUT_RING(state->context2.se_zbias_constant);
527 return radeon_emit_state(dev_priv, filp_priv, &state->context,
528 state->tex, state->dirty);
531 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
532 * 1.3 cmdbuffers allow all previous state to be updated as well as
533 * the tcl scalar and vector areas.
539 } packet[RADEON_MAX_STATE_PACKETS] = {
540 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
541 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
542 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
543 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
544 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
545 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
546 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
547 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
548 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
549 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
550 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
551 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
552 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
553 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
554 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
555 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
556 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
557 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
558 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
559 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
560 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
561 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
562 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
563 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
564 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
565 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
566 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
567 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
568 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
569 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
570 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
571 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
572 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
573 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
574 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
575 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
576 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
577 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
578 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
579 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
580 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
581 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
582 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
583 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
584 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
585 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
586 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
587 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
588 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
589 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
590 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
591 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
592 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
593 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
594 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
595 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
596 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
597 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
598 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
599 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
600 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
601 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
602 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
603 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
604 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
605 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
606 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
607 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
608 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
609 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
610 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
611 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
612 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
613 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
614 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
615 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
616 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
617 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
618 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
619 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
620 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
621 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
622 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
623 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
624 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
625 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
626 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
627 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
628 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
629 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
630 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
631 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
632 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
633 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
634 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
635 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
636 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
637 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
640 /* ================================================================
641 * Performance monitoring functions
644 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
645 int x, int y, int w, int h, int r, int g, int b)
650 x += dev_priv->sarea_priv->boxes[0].x1;
651 y += dev_priv->sarea_priv->boxes[0].y1;
653 switch (dev_priv->color_fmt) {
654 case RADEON_COLOR_FORMAT_RGB565:
655 color = (((r & 0xf8) << 8) |
656 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
658 case RADEON_COLOR_FORMAT_ARGB8888:
660 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
665 RADEON_WAIT_UNTIL_3D_IDLE();
666 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
667 OUT_RING(0xffffffff);
672 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
673 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
674 RADEON_GMC_BRUSH_SOLID_COLOR |
675 (dev_priv->color_fmt << 8) |
676 RADEON_GMC_SRC_DATATYPE_COLOR |
677 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
679 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
680 OUT_RING(dev_priv->front_pitch_offset);
682 OUT_RING(dev_priv->back_pitch_offset);
687 OUT_RING((x << 16) | y);
688 OUT_RING((w << 16) | h);
693 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
695 /* Collapse various things into a wait flag -- trying to
696 * guess if userspase slept -- better just to have them tell us.
698 if (dev_priv->stats.last_frame_reads > 1 ||
699 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
700 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
703 if (dev_priv->stats.freelist_loops) {
704 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
707 /* Purple box for page flipping
709 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
710 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
712 /* Red box if we have to wait for idle at any point
714 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
715 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
717 /* Blue box: lost context?
720 /* Yellow box for texture swaps
722 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
723 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
725 /* Green box if hardware never idles (as far as we can tell)
727 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
728 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
730 /* Draw bars indicating number of buffers allocated
731 * (not a great measure, easily confused)
733 if (dev_priv->stats.requested_bufs) {
734 if (dev_priv->stats.requested_bufs > 100)
735 dev_priv->stats.requested_bufs = 100;
737 radeon_clear_box(dev_priv, 4, 16,
738 dev_priv->stats.requested_bufs, 4,
742 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
746 /* ================================================================
747 * CP command dispatch functions
750 static void radeon_cp_dispatch_clear(drm_device_t * dev,
751 drm_radeon_clear_t * clear,
752 drm_radeon_clear_rect_t * depth_boxes)
754 drm_radeon_private_t *dev_priv = dev->dev_private;
755 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
756 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
757 int nbox = sarea_priv->nbox;
758 drm_clip_rect_t *pbox = sarea_priv->boxes;
759 unsigned int flags = clear->flags;
760 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
763 DRM_DEBUG("flags = 0x%x\n", flags);
765 dev_priv->stats.clears++;
767 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
768 unsigned int tmp = flags;
770 flags &= ~(RADEON_FRONT | RADEON_BACK);
771 if (tmp & RADEON_FRONT)
772 flags |= RADEON_BACK;
773 if (tmp & RADEON_BACK)
774 flags |= RADEON_FRONT;
777 if (flags & (RADEON_FRONT | RADEON_BACK)) {
781 /* Ensure the 3D stream is idle before doing a
782 * 2D fill to clear the front or back buffer.
784 RADEON_WAIT_UNTIL_3D_IDLE();
786 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
787 OUT_RING(clear->color_mask);
791 /* Make sure we restore the 3D state next time.
793 dev_priv->sarea_priv->ctx_owner = 0;
795 for (i = 0; i < nbox; i++) {
798 int w = pbox[i].x2 - x;
799 int h = pbox[i].y2 - y;
801 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
804 if (flags & RADEON_FRONT) {
808 (RADEON_CNTL_PAINT_MULTI, 4));
809 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
810 RADEON_GMC_BRUSH_SOLID_COLOR |
813 RADEON_GMC_SRC_DATATYPE_COLOR |
815 RADEON_GMC_CLR_CMP_CNTL_DIS);
817 OUT_RING(dev_priv->front_pitch_offset);
818 OUT_RING(clear->clear_color);
820 OUT_RING((x << 16) | y);
821 OUT_RING((w << 16) | h);
826 if (flags & RADEON_BACK) {
830 (RADEON_CNTL_PAINT_MULTI, 4));
831 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
832 RADEON_GMC_BRUSH_SOLID_COLOR |
835 RADEON_GMC_SRC_DATATYPE_COLOR |
837 RADEON_GMC_CLR_CMP_CNTL_DIS);
839 OUT_RING(dev_priv->back_pitch_offset);
840 OUT_RING(clear->clear_color);
842 OUT_RING((x << 16) | y);
843 OUT_RING((w << 16) | h);
851 /* no docs available, based on reverse engeneering by Stephane Marchesin */
852 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
853 && (flags & RADEON_CLEAR_FASTZ)) {
856 int depthpixperline =
857 dev_priv->depth_fmt ==
858 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
864 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
865 ((clear->depth_mask & 0xff) << 24);
867 /* Make sure we restore the 3D state next time.
868 * we haven't touched any "normal" state - still need this?
870 dev_priv->sarea_priv->ctx_owner = 0;
872 if ((dev_priv->flags & CHIP_HAS_HIERZ)
873 && (flags & RADEON_USE_HIERZ)) {
874 /* FIXME : reverse engineer that for Rx00 cards */
875 /* FIXME : the mask supposedly contains low-res z values. So can't set
876 just to the max (0xff? or actually 0x3fff?), need to take z clear
877 value into account? */
878 /* pattern seems to work for r100, though get slight
879 rendering errors with glxgears. If hierz is not enabled for r100,
880 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
881 other ones are ignored, and the same clear mask can be used. That's
882 very different behaviour than R200 which needs different clear mask
883 and different number of tiles to clear if hierz is enabled or not !?!
885 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
887 /* clear mask : chooses the clearing pattern.
888 rv250: could be used to clear only parts of macrotiles
889 (but that would get really complicated...)?
890 bit 0 and 1 (either or both of them ?!?!) are used to
891 not clear tile (or maybe one of the bits indicates if the tile is
892 compressed or not), bit 2 and 3 to not clear tile 1,...,.
893 Pattern is as follows:
894 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
895 bits -------------------------------------------------
896 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
897 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
898 covers 256 pixels ?!?
904 RADEON_WAIT_UNTIL_2D_IDLE();
905 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
906 tempRB3D_DEPTHCLEARVALUE);
907 /* what offset is this exactly ? */
908 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
909 /* need ctlstat, otherwise get some strange black flickering */
910 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
911 RADEON_RB3D_ZC_FLUSH_ALL);
914 for (i = 0; i < nbox; i++) {
915 int tileoffset, nrtilesx, nrtilesy, j;
916 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
917 if ((dev_priv->flags & CHIP_HAS_HIERZ)
918 && !(dev_priv->microcode_version == UCODE_R200)) {
919 /* FIXME : figure this out for r200 (when hierz is enabled). Or
920 maybe r200 actually doesn't need to put the low-res z value into
921 the tile cache like r100, but just needs to clear the hi-level z-buffer?
922 Works for R100, both with hierz and without.
923 R100 seems to operate on 2x1 8x8 tiles, but...
924 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
925 problematic with resolutions which are not 64 pix aligned? */
927 ((pbox[i].y1 >> 3) * depthpixperline +
930 ((pbox[i].x2 & ~63) -
931 (pbox[i].x1 & ~63)) >> 4;
933 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
934 for (j = 0; j <= nrtilesy; j++) {
937 (RADEON_3D_CLEAR_ZMASK, 2));
939 OUT_RING(tileoffset * 8);
940 /* the number of tiles to clear */
941 OUT_RING(nrtilesx + 4);
942 /* clear mask : chooses the clearing pattern. */
945 tileoffset += depthpixperline >> 6;
947 } else if (dev_priv->microcode_version == UCODE_R200) {
948 /* works for rv250. */
949 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
951 ((pbox[i].y1 >> 3) * depthpixperline +
954 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
956 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
957 for (j = 0; j <= nrtilesy; j++) {
960 (RADEON_3D_CLEAR_ZMASK, 2));
962 /* judging by the first tile offset needed, could possibly
963 directly address/clear 4x4 tiles instead of 8x2 * 4x4
964 macro tiles, though would still need clear mask for
965 right/bottom if truely 4x4 granularity is desired ? */
966 OUT_RING(tileoffset * 16);
967 /* the number of tiles to clear */
968 OUT_RING(nrtilesx + 1);
969 /* clear mask : chooses the clearing pattern. */
972 tileoffset += depthpixperline >> 5;
974 } else { /* rv 100 */
975 /* rv100 might not need 64 pix alignment, who knows */
976 /* offsets are, hmm, weird */
978 ((pbox[i].y1 >> 4) * depthpixperline +
981 ((pbox[i].x2 & ~63) -
982 (pbox[i].x1 & ~63)) >> 4;
984 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
985 for (j = 0; j <= nrtilesy; j++) {
988 (RADEON_3D_CLEAR_ZMASK, 2));
989 OUT_RING(tileoffset * 128);
990 /* the number of tiles to clear */
991 OUT_RING(nrtilesx + 4);
992 /* clear mask : chooses the clearing pattern. */
995 tileoffset += depthpixperline >> 6;
1000 /* TODO don't always clear all hi-level z tiles */
1001 if ((dev_priv->flags & CHIP_HAS_HIERZ)
1002 && (dev_priv->microcode_version == UCODE_R200)
1003 && (flags & RADEON_USE_HIERZ))
1004 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1005 /* FIXME : the mask supposedly contains low-res z values. So can't set
1006 just to the max (0xff? or actually 0x3fff?), need to take z clear
1007 value into account? */
1010 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1011 OUT_RING(0x0); /* First tile */
1013 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1018 /* We have to clear the depth and/or stencil buffers by
1019 * rendering a quad into just those buffers. Thus, we have to
1020 * make sure the 3D engine is configured correctly.
1022 else if ((dev_priv->microcode_version == UCODE_R200) &&
1023 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1028 int tempRB3D_ZSTENCILCNTL;
1029 int tempRB3D_STENCILREFMASK;
1030 int tempRB3D_PLANEMASK;
1032 int tempSE_VTE_CNTL;
1033 int tempSE_VTX_FMT_0;
1034 int tempSE_VTX_FMT_1;
1035 int tempSE_VAP_CNTL;
1036 int tempRE_AUX_SCISSOR_CNTL;
1041 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1043 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1044 tempRB3D_STENCILREFMASK = 0x0;
1046 tempSE_CNTL = depth_clear->se_cntl;
1050 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1052 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1054 tempRB3D_PLANEMASK = 0x0;
1056 tempRE_AUX_SCISSOR_CNTL = 0x0;
1059 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1061 /* Vertex format (X, Y, Z, W) */
1063 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1064 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1065 tempSE_VTX_FMT_1 = 0x0;
1068 * Depth buffer specific enables
1070 if (flags & RADEON_DEPTH) {
1071 /* Enable depth buffer */
1072 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1074 /* Disable depth buffer */
1075 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1079 * Stencil buffer specific enables
1081 if (flags & RADEON_STENCIL) {
1082 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1083 tempRB3D_STENCILREFMASK = clear->depth_mask;
1085 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1086 tempRB3D_STENCILREFMASK = 0x00000000;
1089 if (flags & RADEON_USE_COMP_ZBUF) {
1090 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1091 RADEON_Z_DECOMPRESSION_ENABLE;
1093 if (flags & RADEON_USE_HIERZ) {
1094 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1098 RADEON_WAIT_UNTIL_2D_IDLE();
1100 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1101 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1102 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1103 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1104 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1105 tempRB3D_STENCILREFMASK);
1106 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1107 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1108 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1109 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1110 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1111 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1112 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1115 /* Make sure we restore the 3D state next time.
1117 dev_priv->sarea_priv->ctx_owner = 0;
1119 for (i = 0; i < nbox; i++) {
1121 /* Funny that this should be required --
1124 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1127 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1128 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1129 RADEON_PRIM_WALK_RING |
1130 (3 << RADEON_NUM_VERTICES_SHIFT)));
1131 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1133 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1134 OUT_RING(0x3f800000);
1135 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1136 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1137 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1138 OUT_RING(0x3f800000);
1139 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1140 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1141 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1142 OUT_RING(0x3f800000);
1145 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1147 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1149 rb3d_cntl = depth_clear->rb3d_cntl;
1151 if (flags & RADEON_DEPTH) {
1152 rb3d_cntl |= RADEON_Z_ENABLE;
1154 rb3d_cntl &= ~RADEON_Z_ENABLE;
1157 if (flags & RADEON_STENCIL) {
1158 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1159 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1161 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1162 rb3d_stencilrefmask = 0x00000000;
1165 if (flags & RADEON_USE_COMP_ZBUF) {
1166 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1167 RADEON_Z_DECOMPRESSION_ENABLE;
1169 if (flags & RADEON_USE_HIERZ) {
1170 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1174 RADEON_WAIT_UNTIL_2D_IDLE();
1176 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1177 OUT_RING(0x00000000);
1178 OUT_RING(rb3d_cntl);
1180 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1181 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1182 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1183 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1186 /* Make sure we restore the 3D state next time.
1188 dev_priv->sarea_priv->ctx_owner = 0;
1190 for (i = 0; i < nbox; i++) {
1192 /* Funny that this should be required --
1195 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1199 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1200 OUT_RING(RADEON_VTX_Z_PRESENT |
1201 RADEON_VTX_PKCOLOR_PRESENT);
1202 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1203 RADEON_PRIM_WALK_RING |
1204 RADEON_MAOS_ENABLE |
1205 RADEON_VTX_FMT_RADEON_MODE |
1206 (3 << RADEON_NUM_VERTICES_SHIFT)));
1208 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1210 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1213 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1214 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1215 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1218 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1219 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1220 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1227 /* Increment the clear counter. The client-side 3D driver must
1228 * wait on this value before performing the clear ioctl. We
1229 * need this because the card's so damned fast...
1231 dev_priv->sarea_priv->last_clear++;
1235 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1236 RADEON_WAIT_UNTIL_IDLE();
1241 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1243 drm_radeon_private_t *dev_priv = dev->dev_private;
1244 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1245 int nbox = sarea_priv->nbox;
1246 drm_clip_rect_t *pbox = sarea_priv->boxes;
1251 /* Do some trivial performance monitoring...
1253 if (dev_priv->do_boxes)
1254 radeon_cp_performance_boxes(dev_priv);
1256 /* Wait for the 3D stream to idle before dispatching the bitblt.
1257 * This will prevent data corruption between the two streams.
1261 RADEON_WAIT_UNTIL_3D_IDLE();
1265 for (i = 0; i < nbox; i++) {
1268 int w = pbox[i].x2 - x;
1269 int h = pbox[i].y2 - y;
1271 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1275 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1276 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1277 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1278 RADEON_GMC_BRUSH_NONE |
1279 (dev_priv->color_fmt << 8) |
1280 RADEON_GMC_SRC_DATATYPE_COLOR |
1282 RADEON_DP_SRC_SOURCE_MEMORY |
1283 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1285 /* Make this work even if front & back are flipped:
1287 if (dev_priv->current_page == 0) {
1288 OUT_RING(dev_priv->back_pitch_offset);
1289 OUT_RING(dev_priv->front_pitch_offset);
1291 OUT_RING(dev_priv->front_pitch_offset);
1292 OUT_RING(dev_priv->back_pitch_offset);
1295 OUT_RING((x << 16) | y);
1296 OUT_RING((x << 16) | y);
1297 OUT_RING((w << 16) | h);
1302 /* Increment the frame counter. The client-side 3D driver must
1303 * throttle the framerate by waiting for this value before
1304 * performing the swapbuffer ioctl.
1306 dev_priv->sarea_priv->last_frame++;
1310 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1311 RADEON_WAIT_UNTIL_2D_IDLE();
1316 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1318 drm_radeon_private_t *dev_priv = dev->dev_private;
1319 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1320 int offset = (dev_priv->current_page == 1)
1321 ? dev_priv->front_offset : dev_priv->back_offset;
1323 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1325 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1327 /* Do some trivial performance monitoring...
1329 if (dev_priv->do_boxes) {
1330 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1331 radeon_cp_performance_boxes(dev_priv);
1334 /* Update the frame offsets for both CRTCs
1338 RADEON_WAIT_UNTIL_3D_IDLE();
1339 OUT_RING_REG(RADEON_CRTC_OFFSET,
1340 ((sarea->frame.y * dev_priv->front_pitch +
1341 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1343 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1348 /* Increment the frame counter. The client-side 3D driver must
1349 * throttle the framerate by waiting for this value before
1350 * performing the swapbuffer ioctl.
1352 dev_priv->sarea_priv->last_frame++;
1353 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1354 1 - dev_priv->current_page;
1358 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1363 static int bad_prim_vertex_nr(int primitive, int nr)
1365 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1366 case RADEON_PRIM_TYPE_NONE:
1367 case RADEON_PRIM_TYPE_POINT:
1369 case RADEON_PRIM_TYPE_LINE:
1370 return (nr & 1) || nr == 0;
1371 case RADEON_PRIM_TYPE_LINE_STRIP:
1373 case RADEON_PRIM_TYPE_TRI_LIST:
1374 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1375 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1376 case RADEON_PRIM_TYPE_RECT_LIST:
1377 return nr % 3 || nr == 0;
1378 case RADEON_PRIM_TYPE_TRI_FAN:
1379 case RADEON_PRIM_TYPE_TRI_STRIP:
1388 unsigned int finish;
1390 unsigned int numverts;
1391 unsigned int offset;
1392 unsigned int vc_format;
1393 } drm_radeon_tcl_prim_t;
1395 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1397 drm_radeon_tcl_prim_t * prim)
1399 drm_radeon_private_t *dev_priv = dev->dev_private;
1400 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1401 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1402 int numverts = (int)prim->numverts;
1403 int nbox = sarea_priv->nbox;
1407 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1409 prim->vc_format, prim->start, prim->finish, prim->numverts);
1411 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1412 DRM_ERROR("bad prim %x numverts %d\n",
1413 prim->prim, prim->numverts);
1418 /* Emit the next cliprect */
1420 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1423 /* Emit the vertex buffer rendering commands */
1426 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1429 OUT_RING(prim->vc_format);
1430 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1431 RADEON_COLOR_ORDER_RGBA |
1432 RADEON_VTX_FMT_RADEON_MODE |
1433 (numverts << RADEON_NUM_VERTICES_SHIFT));
1441 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1443 drm_radeon_private_t *dev_priv = dev->dev_private;
1444 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1447 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1449 /* Emit the vertex buffer age */
1451 RADEON_DISPATCH_AGE(buf_priv->age);
1458 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1459 drm_buf_t * buf, int start, int end)
1461 drm_radeon_private_t *dev_priv = dev->dev_private;
1463 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1466 int offset = (dev_priv->gart_buffers_offset
1467 + buf->offset + start);
1468 int dwords = (end - start + 3) / sizeof(u32);
1470 /* Indirect buffer data must be an even number of
1471 * dwords, so if we've been given an odd number we must
1472 * pad the data with a Type-2 CP packet.
1476 ((char *)dev->agp_buffer_map->handle
1477 + buf->offset + start);
1478 data[dwords++] = RADEON_CP_PACKET2;
1481 /* Fire off the indirect buffer */
1484 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1492 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1493 drm_buf_t * elt_buf,
1494 drm_radeon_tcl_prim_t * prim)
1496 drm_radeon_private_t *dev_priv = dev->dev_private;
1497 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1498 int offset = dev_priv->gart_buffers_offset + prim->offset;
1502 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1503 int count = (prim->finish - start) / sizeof(u16);
1504 int nbox = sarea_priv->nbox;
1506 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1509 prim->start, prim->finish, prim->offset, prim->numverts);
1511 if (bad_prim_vertex_nr(prim->prim, count)) {
1512 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1516 if (start >= prim->finish || (prim->start & 0x7)) {
1517 DRM_ERROR("buffer prim %d\n", prim->prim);
1521 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1523 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1524 elt_buf->offset + prim->start);
1526 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1528 data[2] = prim->numverts;
1529 data[3] = prim->vc_format;
1530 data[4] = (prim->prim |
1531 RADEON_PRIM_WALK_IND |
1532 RADEON_COLOR_ORDER_RGBA |
1533 RADEON_VTX_FMT_RADEON_MODE |
1534 (count << RADEON_NUM_VERTICES_SHIFT));
1538 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1540 radeon_cp_dispatch_indirect(dev, elt_buf,
1541 prim->start, prim->finish);
1548 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1550 static int radeon_cp_dispatch_texture(DRMFILE filp,
1552 drm_radeon_texture_t * tex,
1553 drm_radeon_tex_image_t * image)
1555 drm_radeon_private_t *dev_priv = dev->dev_private;
1556 drm_file_t *filp_priv;
1560 const u8 __user *data;
1561 int size, dwords, tex_width, blit_width, spitch;
1564 u32 texpitch, microtile;
1568 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1570 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1571 DRM_ERROR("Invalid destination offset\n");
1572 return DRM_ERR(EINVAL);
1575 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1577 /* Flush the pixel cache. This ensures no pixel data gets mixed
1578 * up with the texture data from the host data blit, otherwise
1579 * part of the texture image may be corrupted.
1582 RADEON_FLUSH_CACHE();
1583 RADEON_WAIT_UNTIL_IDLE();
1586 /* The compiler won't optimize away a division by a variable,
1587 * even if the only legal values are powers of two. Thus, we'll
1588 * use a shift instead.
1590 switch (tex->format) {
1591 case RADEON_TXFORMAT_ARGB8888:
1592 case RADEON_TXFORMAT_RGBA8888:
1593 format = RADEON_COLOR_FORMAT_ARGB8888;
1594 tex_width = tex->width * 4;
1595 blit_width = image->width * 4;
1597 case RADEON_TXFORMAT_AI88:
1598 case RADEON_TXFORMAT_ARGB1555:
1599 case RADEON_TXFORMAT_RGB565:
1600 case RADEON_TXFORMAT_ARGB4444:
1601 case RADEON_TXFORMAT_VYUY422:
1602 case RADEON_TXFORMAT_YVYU422:
1603 format = RADEON_COLOR_FORMAT_RGB565;
1604 tex_width = tex->width * 2;
1605 blit_width = image->width * 2;
1607 case RADEON_TXFORMAT_I8:
1608 case RADEON_TXFORMAT_RGB332:
1609 format = RADEON_COLOR_FORMAT_CI8;
1610 tex_width = tex->width * 1;
1611 blit_width = image->width * 1;
1614 DRM_ERROR("invalid texture format %d\n", tex->format);
1615 return DRM_ERR(EINVAL);
1617 spitch = blit_width >> 6;
1618 if (spitch == 0 && image->height > 1)
1619 return DRM_ERR(EINVAL);
1621 texpitch = tex->pitch;
1622 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1624 if (tex_width < 64) {
1625 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1626 /* we got tiled coordinates, untile them */
1632 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1635 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1636 tex->offset >> 10, tex->pitch, tex->format,
1637 image->x, image->y, image->width, image->height);
1639 /* Make a copy of some parameters in case we have to
1640 * update them for a multi-pass texture blit.
1642 height = image->height;
1643 data = (const u8 __user *)image->data;
1645 size = height * blit_width;
1647 if (size > RADEON_MAX_TEXTURE_SIZE) {
1648 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1649 size = height * blit_width;
1650 } else if (size < 4 && size > 0) {
1652 } else if (size == 0) {
1656 buf = radeon_freelist_get(dev);
1658 radeon_do_cp_idle(dev_priv);
1659 buf = radeon_freelist_get(dev);
1662 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1663 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1664 return DRM_ERR(EFAULT);
1665 return DRM_ERR(EAGAIN);
1668 /* Dispatch the indirect buffer.
1671 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1674 #define RADEON_COPY_MT(_buf, _data, _width) \
1676 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1677 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1678 return DRM_ERR(EFAULT); \
1683 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1684 however, we cannot use blitter directly for texture width < 64 bytes,
1685 since minimum tex pitch is 64 bytes and we need this to match
1686 the texture width, otherwise the blitter will tile it wrong.
1687 Thus, tiling manually in this case. Additionally, need to special
1688 case tex height = 1, since our actual image will have height 2
1689 and we need to ensure we don't read beyond the texture size
1691 if (tex->height == 1) {
1692 if (tex_width >= 64 || tex_width <= 16) {
1693 RADEON_COPY_MT(buffer, data,
1694 (int)(tex_width * sizeof(u32)));
1695 } else if (tex_width == 32) {
1696 RADEON_COPY_MT(buffer, data, 16);
1697 RADEON_COPY_MT(buffer + 8,
1700 } else if (tex_width >= 64 || tex_width == 16) {
1701 RADEON_COPY_MT(buffer, data,
1702 (int)(dwords * sizeof(u32)));
1703 } else if (tex_width < 16) {
1704 for (i = 0; i < tex->height; i++) {
1705 RADEON_COPY_MT(buffer, data, tex_width);
1709 } else if (tex_width == 32) {
1710 /* TODO: make sure this works when not fitting in one buffer
1711 (i.e. 32bytes x 2048...) */
1712 for (i = 0; i < tex->height; i += 2) {
1713 RADEON_COPY_MT(buffer, data, 16);
1715 RADEON_COPY_MT(buffer + 8, data, 16);
1717 RADEON_COPY_MT(buffer + 4, data, 16);
1719 RADEON_COPY_MT(buffer + 12, data, 16);
1725 if (tex_width >= 32) {
1726 /* Texture image width is larger than the minimum, so we
1727 * can upload it directly.
1729 RADEON_COPY_MT(buffer, data,
1730 (int)(dwords * sizeof(u32)));
1732 /* Texture image width is less than the minimum, so we
1733 * need to pad out each image scanline to the minimum
1736 for (i = 0; i < tex->height; i++) {
1737 RADEON_COPY_MT(buffer, data, tex_width);
1744 #undef RADEON_COPY_MT
1747 offset = dev_priv->gart_buffers_offset + buf->offset;
1749 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1750 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1751 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1752 RADEON_GMC_BRUSH_NONE |
1754 RADEON_GMC_SRC_DATATYPE_COLOR |
1756 RADEON_DP_SRC_SOURCE_MEMORY |
1757 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1758 OUT_RING((spitch << 22) | (offset >> 10));
1759 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1761 OUT_RING((image->x << 16) | image->y);
1762 OUT_RING((image->width << 16) | height);
1763 RADEON_WAIT_UNTIL_2D_IDLE();
1766 radeon_cp_discard_buffer(dev, buf);
1768 /* Update the input parameters for next time */
1770 image->height -= height;
1771 image->data = (const u8 __user *)image->data + size;
1772 } while (image->height > 0);
1774 /* Flush the pixel cache after the blit completes. This ensures
1775 * the texture data is written out to memory before rendering
1779 RADEON_FLUSH_CACHE();
1780 RADEON_WAIT_UNTIL_2D_IDLE();
1785 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1787 drm_radeon_private_t *dev_priv = dev->dev_private;
1794 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1795 OUT_RING(0x00000000);
1797 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1798 for (i = 0; i < 32; i++) {
1799 OUT_RING(stipple[i]);
1805 static void radeon_apply_surface_regs(int surf_index,
1806 drm_radeon_private_t *dev_priv)
1808 if (!dev_priv->mmio)
1811 radeon_do_cp_idle(dev_priv);
1813 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1814 dev_priv->surfaces[surf_index].flags);
1815 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1816 dev_priv->surfaces[surf_index].lower);
1817 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1818 dev_priv->surfaces[surf_index].upper);
1821 /* Allocates a virtual surface
1822 * doesn't always allocate a real surface, will stretch an existing
1823 * surface when possible.
1825 * Note that refcount can be at most 2, since during a free refcount=3
1826 * might mean we have to allocate a new surface which might not always
1828 * For example : we allocate three contigous surfaces ABC. If B is
1829 * freed, we suddenly need two surfaces to store A and C, which might
1830 * not always be available.
1832 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1833 drm_radeon_private_t *dev_priv, DRMFILE filp)
1835 struct radeon_virt_surface *s;
1837 int virt_surface_index;
1838 uint32_t new_upper, new_lower;
1840 new_lower = new->address;
1841 new_upper = new_lower + new->size - 1;
1844 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1845 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1846 RADEON_SURF_ADDRESS_FIXED_MASK)
1847 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1850 /* make sure there is no overlap with existing surfaces */
1851 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1852 if ((dev_priv->surfaces[i].refcount != 0) &&
1853 (((new_lower >= dev_priv->surfaces[i].lower) &&
1854 (new_lower < dev_priv->surfaces[i].upper)) ||
1855 ((new_lower < dev_priv->surfaces[i].lower) &&
1856 (new_upper > dev_priv->surfaces[i].lower)))) {
1861 /* find a virtual surface */
1862 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1863 if (dev_priv->virt_surfaces[i].filp == 0)
1865 if (i == 2 * RADEON_MAX_SURFACES) {
1868 virt_surface_index = i;
1870 /* try to reuse an existing surface */
1871 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1873 if ((dev_priv->surfaces[i].refcount == 1) &&
1874 (new->flags == dev_priv->surfaces[i].flags) &&
1875 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1876 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1877 s->surface_index = i;
1878 s->lower = new_lower;
1879 s->upper = new_upper;
1880 s->flags = new->flags;
1882 dev_priv->surfaces[i].refcount++;
1883 dev_priv->surfaces[i].lower = s->lower;
1884 radeon_apply_surface_regs(s->surface_index, dev_priv);
1885 return virt_surface_index;
1889 if ((dev_priv->surfaces[i].refcount == 1) &&
1890 (new->flags == dev_priv->surfaces[i].flags) &&
1891 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1892 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1893 s->surface_index = i;
1894 s->lower = new_lower;
1895 s->upper = new_upper;
1896 s->flags = new->flags;
1898 dev_priv->surfaces[i].refcount++;
1899 dev_priv->surfaces[i].upper = s->upper;
1900 radeon_apply_surface_regs(s->surface_index, dev_priv);
1901 return virt_surface_index;
1905 /* okay, we need a new one */
1906 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1907 if (dev_priv->surfaces[i].refcount == 0) {
1908 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1909 s->surface_index = i;
1910 s->lower = new_lower;
1911 s->upper = new_upper;
1912 s->flags = new->flags;
1914 dev_priv->surfaces[i].refcount = 1;
1915 dev_priv->surfaces[i].lower = s->lower;
1916 dev_priv->surfaces[i].upper = s->upper;
1917 dev_priv->surfaces[i].flags = s->flags;
1918 radeon_apply_surface_regs(s->surface_index, dev_priv);
1919 return virt_surface_index;
1923 /* we didn't find anything */
1927 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1930 struct radeon_virt_surface *s;
1932 /* find the virtual surface */
1933 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1934 s = &(dev_priv->virt_surfaces[i]);
1936 if ((lower == s->lower) && (filp == s->filp)) {
1937 if (dev_priv->surfaces[s->surface_index].
1939 dev_priv->surfaces[s->surface_index].
1942 if (dev_priv->surfaces[s->surface_index].
1944 dev_priv->surfaces[s->surface_index].
1947 dev_priv->surfaces[s->surface_index].refcount--;
1948 if (dev_priv->surfaces[s->surface_index].
1950 dev_priv->surfaces[s->surface_index].
1953 radeon_apply_surface_regs(s->surface_index,
1962 static void radeon_surfaces_release(DRMFILE filp,
1963 drm_radeon_private_t * dev_priv)
1966 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1967 if (dev_priv->virt_surfaces[i].filp == filp)
1968 free_surface(filp, dev_priv,
1969 dev_priv->virt_surfaces[i].lower);
1973 /* ================================================================
1976 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1979 drm_radeon_private_t *dev_priv = dev->dev_private;
1980 drm_radeon_surface_alloc_t alloc;
1982 DRM_COPY_FROM_USER_IOCTL(alloc,
1983 (drm_radeon_surface_alloc_t __user *) data,
1986 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1987 return DRM_ERR(EINVAL);
1992 static int radeon_surface_free(DRM_IOCTL_ARGS)
1995 drm_radeon_private_t *dev_priv = dev->dev_private;
1996 drm_radeon_surface_free_t memfree;
1998 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2001 if (free_surface(filp, dev_priv, memfree.address))
2002 return DRM_ERR(EINVAL);
2007 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2010 drm_radeon_private_t *dev_priv = dev->dev_private;
2011 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2012 drm_radeon_clear_t clear;
2013 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2016 LOCK_TEST_WITH_RETURN(dev, filp);
2018 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2021 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2023 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2024 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2026 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2027 sarea_priv->nbox * sizeof(depth_boxes[0])))
2028 return DRM_ERR(EFAULT);
2030 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2036 /* Not sure why this isn't set all the time:
2038 static int radeon_do_init_pageflip(drm_device_t * dev)
2040 drm_radeon_private_t *dev_priv = dev->dev_private;
2046 RADEON_WAIT_UNTIL_3D_IDLE();
2047 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2048 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2049 RADEON_CRTC_OFFSET_FLIP_CNTL);
2050 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2051 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2052 RADEON_CRTC_OFFSET_FLIP_CNTL);
2055 dev_priv->page_flipping = 1;
2056 dev_priv->current_page = 0;
2057 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2062 /* Called whenever a client dies, from drm_release.
2063 * NOTE: Lock isn't necessarily held when this is called!
2065 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2067 drm_radeon_private_t *dev_priv = dev->dev_private;
2070 if (dev_priv->current_page != 0)
2071 radeon_cp_dispatch_flip(dev);
2073 dev_priv->page_flipping = 0;
2077 /* Swapping and flipping are different operations, need different ioctls.
2078 * They can & should be intermixed to support multiple 3d windows.
2080 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2083 drm_radeon_private_t *dev_priv = dev->dev_private;
2086 LOCK_TEST_WITH_RETURN(dev, filp);
2088 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2090 if (!dev_priv->page_flipping)
2091 radeon_do_init_pageflip(dev);
2093 radeon_cp_dispatch_flip(dev);
2099 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2102 drm_radeon_private_t *dev_priv = dev->dev_private;
2103 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2106 LOCK_TEST_WITH_RETURN(dev, filp);
2108 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2110 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2111 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2113 radeon_cp_dispatch_swap(dev);
2114 dev_priv->sarea_priv->ctx_owner = 0;
2120 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2123 drm_radeon_private_t *dev_priv = dev->dev_private;
2124 drm_file_t *filp_priv;
2125 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2126 drm_device_dma_t *dma = dev->dma;
2128 drm_radeon_vertex_t vertex;
2129 drm_radeon_tcl_prim_t prim;
2131 LOCK_TEST_WITH_RETURN(dev, filp);
2133 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2135 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2138 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2139 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2141 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2142 DRM_ERROR("buffer index %d (of %d max)\n",
2143 vertex.idx, dma->buf_count - 1);
2144 return DRM_ERR(EINVAL);
2146 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2147 DRM_ERROR("buffer prim %d\n", vertex.prim);
2148 return DRM_ERR(EINVAL);
2151 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2152 VB_AGE_TEST_WITH_RETURN(dev_priv);
2154 buf = dma->buflist[vertex.idx];
2156 if (buf->filp != filp) {
2157 DRM_ERROR("process %d using buffer owned by %p\n",
2158 DRM_CURRENTPID, buf->filp);
2159 return DRM_ERR(EINVAL);
2162 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2163 return DRM_ERR(EINVAL);
2166 /* Build up a prim_t record:
2169 buf->used = vertex.count; /* not used? */
2171 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2172 if (radeon_emit_state(dev_priv, filp_priv,
2173 &sarea_priv->context_state,
2174 sarea_priv->tex_state,
2175 sarea_priv->dirty)) {
2176 DRM_ERROR("radeon_emit_state failed\n");
2177 return DRM_ERR(EINVAL);
2180 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2181 RADEON_UPLOAD_TEX1IMAGES |
2182 RADEON_UPLOAD_TEX2IMAGES |
2183 RADEON_REQUIRE_QUIESCENCE);
2187 prim.finish = vertex.count; /* unused */
2188 prim.prim = vertex.prim;
2189 prim.numverts = vertex.count;
2190 prim.vc_format = dev_priv->sarea_priv->vc_format;
2192 radeon_cp_dispatch_vertex(dev, buf, &prim);
2195 if (vertex.discard) {
2196 radeon_cp_discard_buffer(dev, buf);
2203 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2206 drm_radeon_private_t *dev_priv = dev->dev_private;
2207 drm_file_t *filp_priv;
2208 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2209 drm_device_dma_t *dma = dev->dma;
2211 drm_radeon_indices_t elts;
2212 drm_radeon_tcl_prim_t prim;
2215 LOCK_TEST_WITH_RETURN(dev, filp);
2217 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2219 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2222 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2223 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2225 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2226 DRM_ERROR("buffer index %d (of %d max)\n",
2227 elts.idx, dma->buf_count - 1);
2228 return DRM_ERR(EINVAL);
2230 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2231 DRM_ERROR("buffer prim %d\n", elts.prim);
2232 return DRM_ERR(EINVAL);
2235 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2236 VB_AGE_TEST_WITH_RETURN(dev_priv);
2238 buf = dma->buflist[elts.idx];
2240 if (buf->filp != filp) {
2241 DRM_ERROR("process %d using buffer owned by %p\n",
2242 DRM_CURRENTPID, buf->filp);
2243 return DRM_ERR(EINVAL);
2246 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2247 return DRM_ERR(EINVAL);
2250 count = (elts.end - elts.start) / sizeof(u16);
2251 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2253 if (elts.start & 0x7) {
2254 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2255 return DRM_ERR(EINVAL);
2257 if (elts.start < buf->used) {
2258 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2259 return DRM_ERR(EINVAL);
2262 buf->used = elts.end;
2264 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2265 if (radeon_emit_state(dev_priv, filp_priv,
2266 &sarea_priv->context_state,
2267 sarea_priv->tex_state,
2268 sarea_priv->dirty)) {
2269 DRM_ERROR("radeon_emit_state failed\n");
2270 return DRM_ERR(EINVAL);
2273 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2274 RADEON_UPLOAD_TEX1IMAGES |
2275 RADEON_UPLOAD_TEX2IMAGES |
2276 RADEON_REQUIRE_QUIESCENCE);
2279 /* Build up a prim_t record:
2281 prim.start = elts.start;
2282 prim.finish = elts.end;
2283 prim.prim = elts.prim;
2284 prim.offset = 0; /* offset from start of dma buffers */
2285 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2286 prim.vc_format = dev_priv->sarea_priv->vc_format;
2288 radeon_cp_dispatch_indices(dev, buf, &prim);
2290 radeon_cp_discard_buffer(dev, buf);
2297 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2300 drm_radeon_private_t *dev_priv = dev->dev_private;
2301 drm_radeon_texture_t tex;
2302 drm_radeon_tex_image_t image;
2305 LOCK_TEST_WITH_RETURN(dev, filp);
2307 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2310 if (tex.image == NULL) {
2311 DRM_ERROR("null texture image!\n");
2312 return DRM_ERR(EINVAL);
2315 if (DRM_COPY_FROM_USER(&image,
2316 (drm_radeon_tex_image_t __user *) tex.image,
2318 return DRM_ERR(EFAULT);
2320 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2321 VB_AGE_TEST_WITH_RETURN(dev_priv);
2323 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2329 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2332 drm_radeon_private_t *dev_priv = dev->dev_private;
2333 drm_radeon_stipple_t stipple;
2336 LOCK_TEST_WITH_RETURN(dev, filp);
2338 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2341 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2342 return DRM_ERR(EFAULT);
2344 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2346 radeon_cp_dispatch_stipple(dev, mask);
2352 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2355 drm_radeon_private_t *dev_priv = dev->dev_private;
2356 drm_device_dma_t *dma = dev->dma;
2358 drm_radeon_indirect_t indirect;
2361 LOCK_TEST_WITH_RETURN(dev, filp);
2363 DRM_COPY_FROM_USER_IOCTL(indirect,
2364 (drm_radeon_indirect_t __user *) data,
2367 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2368 indirect.idx, indirect.start, indirect.end, indirect.discard);
2370 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2371 DRM_ERROR("buffer index %d (of %d max)\n",
2372 indirect.idx, dma->buf_count - 1);
2373 return DRM_ERR(EINVAL);
2376 buf = dma->buflist[indirect.idx];
2378 if (buf->filp != filp) {
2379 DRM_ERROR("process %d using buffer owned by %p\n",
2380 DRM_CURRENTPID, buf->filp);
2381 return DRM_ERR(EINVAL);
2384 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2385 return DRM_ERR(EINVAL);
2388 if (indirect.start < buf->used) {
2389 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2390 indirect.start, buf->used);
2391 return DRM_ERR(EINVAL);
2394 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2395 VB_AGE_TEST_WITH_RETURN(dev_priv);
2397 buf->used = indirect.end;
2399 /* Wait for the 3D stream to idle before the indirect buffer
2400 * containing 2D acceleration commands is processed.
2404 RADEON_WAIT_UNTIL_3D_IDLE();
2408 /* Dispatch the indirect buffer full of commands from the
2409 * X server. This is insecure and is thus only available to
2410 * privileged clients.
2412 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2413 if (indirect.discard) {
2414 radeon_cp_discard_buffer(dev, buf);
2421 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2424 drm_radeon_private_t *dev_priv = dev->dev_private;
2425 drm_file_t *filp_priv;
2426 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2427 drm_device_dma_t *dma = dev->dma;
2429 drm_radeon_vertex2_t vertex;
2431 unsigned char laststate;
2433 LOCK_TEST_WITH_RETURN(dev, filp);
2435 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2437 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2440 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2441 DRM_CURRENTPID, vertex.idx, vertex.discard);
2443 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2444 DRM_ERROR("buffer index %d (of %d max)\n",
2445 vertex.idx, dma->buf_count - 1);
2446 return DRM_ERR(EINVAL);
2449 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2450 VB_AGE_TEST_WITH_RETURN(dev_priv);
2452 buf = dma->buflist[vertex.idx];
2454 if (buf->filp != filp) {
2455 DRM_ERROR("process %d using buffer owned by %p\n",
2456 DRM_CURRENTPID, buf->filp);
2457 return DRM_ERR(EINVAL);
2461 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2462 return DRM_ERR(EINVAL);
2465 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466 return DRM_ERR(EINVAL);
2468 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2469 drm_radeon_prim_t prim;
2470 drm_radeon_tcl_prim_t tclprim;
2472 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2473 return DRM_ERR(EFAULT);
2475 if (prim.stateidx != laststate) {
2476 drm_radeon_state_t state;
2478 if (DRM_COPY_FROM_USER(&state,
2479 &vertex.state[prim.stateidx],
2481 return DRM_ERR(EFAULT);
2483 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2484 DRM_ERROR("radeon_emit_state2 failed\n");
2485 return DRM_ERR(EINVAL);
2488 laststate = prim.stateidx;
2491 tclprim.start = prim.start;
2492 tclprim.finish = prim.finish;
2493 tclprim.prim = prim.prim;
2494 tclprim.vc_format = prim.vc_format;
2496 if (prim.prim & RADEON_PRIM_WALK_IND) {
2497 tclprim.offset = prim.numverts * 64;
2498 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2500 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2502 tclprim.numverts = prim.numverts;
2503 tclprim.offset = 0; /* not used */
2505 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2508 if (sarea_priv->nbox == 1)
2509 sarea_priv->nbox = 0;
2512 if (vertex.discard) {
2513 radeon_cp_discard_buffer(dev, buf);
2520 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2521 drm_file_t * filp_priv,
2522 drm_radeon_cmd_header_t header,
2523 drm_radeon_kcmd_buffer_t *cmdbuf)
2525 int id = (int)header.packet.packet_id;
2527 int *data = (int *)cmdbuf->buf;
2530 if (id >= RADEON_MAX_STATE_PACKETS)
2531 return DRM_ERR(EINVAL);
2533 sz = packet[id].len;
2534 reg = packet[id].start;
2536 if (sz * sizeof(int) > cmdbuf->bufsz) {
2537 DRM_ERROR("Packet size provided larger than data provided\n");
2538 return DRM_ERR(EINVAL);
2541 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2542 DRM_ERROR("Packet verification failed\n");
2543 return DRM_ERR(EINVAL);
2547 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2548 OUT_RING_TABLE(data, sz);
2551 cmdbuf->buf += sz * sizeof(int);
2552 cmdbuf->bufsz -= sz * sizeof(int);
2556 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2557 drm_radeon_cmd_header_t header,
2558 drm_radeon_kcmd_buffer_t *cmdbuf)
2560 int sz = header.scalars.count;
2561 int start = header.scalars.offset;
2562 int stride = header.scalars.stride;
2566 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2567 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2568 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2569 OUT_RING_TABLE(cmdbuf->buf, sz);
2571 cmdbuf->buf += sz * sizeof(int);
2572 cmdbuf->bufsz -= sz * sizeof(int);
2578 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2579 drm_radeon_cmd_header_t header,
2580 drm_radeon_kcmd_buffer_t *cmdbuf)
2582 int sz = header.scalars.count;
2583 int start = ((unsigned int)header.scalars.offset) + 0x100;
2584 int stride = header.scalars.stride;
2588 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2589 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2590 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2591 OUT_RING_TABLE(cmdbuf->buf, sz);
2593 cmdbuf->buf += sz * sizeof(int);
2594 cmdbuf->bufsz -= sz * sizeof(int);
2598 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2599 drm_radeon_cmd_header_t header,
2600 drm_radeon_kcmd_buffer_t *cmdbuf)
2602 int sz = header.vectors.count;
2603 int start = header.vectors.offset;
2604 int stride = header.vectors.stride;
2608 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2609 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2610 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2611 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2612 OUT_RING_TABLE(cmdbuf->buf, sz);
2615 cmdbuf->buf += sz * sizeof(int);
2616 cmdbuf->bufsz -= sz * sizeof(int);
2620 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2621 drm_radeon_cmd_header_t header,
2622 drm_radeon_kcmd_buffer_t *cmdbuf)
2624 int sz = header.veclinear.count * 4;
2625 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2630 if (sz * 4 > cmdbuf->bufsz)
2631 return DRM_ERR(EINVAL);
2634 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2635 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2636 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2637 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2638 OUT_RING_TABLE(cmdbuf->buf, sz);
2641 cmdbuf->buf += sz * sizeof(int);
2642 cmdbuf->bufsz -= sz * sizeof(int);
2646 static int radeon_emit_packet3(drm_device_t * dev,
2647 drm_file_t * filp_priv,
2648 drm_radeon_kcmd_buffer_t *cmdbuf)
2650 drm_radeon_private_t *dev_priv = dev->dev_private;
2657 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2659 DRM_ERROR("Packet verification failed\n");
2664 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2667 cmdbuf->buf += cmdsz * 4;
2668 cmdbuf->bufsz -= cmdsz * 4;
2672 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2673 drm_file_t *filp_priv,
2674 drm_radeon_kcmd_buffer_t *cmdbuf,
2677 drm_radeon_private_t *dev_priv = dev->dev_private;
2678 drm_clip_rect_t box;
2681 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2687 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2689 DRM_ERROR("Packet verification failed\n");
2697 if (i < cmdbuf->nbox) {
2698 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2699 return DRM_ERR(EFAULT);
2700 /* FIXME The second and subsequent times round
2701 * this loop, send a WAIT_UNTIL_3D_IDLE before
2702 * calling emit_clip_rect(). This fixes a
2703 * lockup on fast machines when sending
2704 * several cliprects with a cmdbuf, as when
2705 * waving a 2D window over a 3D
2706 * window. Something in the commands from user
2707 * space seems to hang the card when they're
2708 * sent several times in a row. That would be
2709 * the correct place to fix it but this works
2710 * around it until I can figure that out - Tim
2714 RADEON_WAIT_UNTIL_3D_IDLE();
2717 radeon_emit_clip_rect(dev_priv, &box);
2721 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2724 } while (++i < cmdbuf->nbox);
2725 if (cmdbuf->nbox == 1)
2729 cmdbuf->buf += cmdsz * 4;
2730 cmdbuf->bufsz -= cmdsz * 4;
2734 static int radeon_emit_wait(drm_device_t * dev, int flags)
2736 drm_radeon_private_t *dev_priv = dev->dev_private;
2739 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2741 case RADEON_WAIT_2D:
2743 RADEON_WAIT_UNTIL_2D_IDLE();
2746 case RADEON_WAIT_3D:
2748 RADEON_WAIT_UNTIL_3D_IDLE();
2751 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2753 RADEON_WAIT_UNTIL_IDLE();
2757 return DRM_ERR(EINVAL);
2763 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2766 drm_radeon_private_t *dev_priv = dev->dev_private;
2767 drm_file_t *filp_priv;
2768 drm_device_dma_t *dma = dev->dma;
2769 drm_buf_t *buf = NULL;
2771 drm_radeon_kcmd_buffer_t cmdbuf;
2772 drm_radeon_cmd_header_t header;
2773 int orig_nbox, orig_bufsz;
2776 LOCK_TEST_WITH_RETURN(dev, filp);
2778 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2780 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2781 (drm_radeon_cmd_buffer_t __user *) data,
2784 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2785 VB_AGE_TEST_WITH_RETURN(dev_priv);
2787 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2788 return DRM_ERR(EINVAL);
2791 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2792 * races between checking values and using those values in other code,
2793 * and simply to avoid a lot of function calls to copy in data.
2795 orig_bufsz = cmdbuf.bufsz;
2796 if (orig_bufsz != 0) {
2797 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2799 return DRM_ERR(ENOMEM);
2800 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2802 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2803 return DRM_ERR(EFAULT);
2808 orig_nbox = cmdbuf.nbox;
2810 if (dev_priv->microcode_version == UCODE_R300) {
2812 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2814 if (orig_bufsz != 0)
2815 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2820 /* microcode_version != r300 */
2821 while (cmdbuf.bufsz >= sizeof(header)) {
2823 header.i = *(int *)cmdbuf.buf;
2824 cmdbuf.buf += sizeof(header);
2825 cmdbuf.bufsz -= sizeof(header);
2827 switch (header.header.cmd_type) {
2828 case RADEON_CMD_PACKET:
2829 DRM_DEBUG("RADEON_CMD_PACKET\n");
2830 if (radeon_emit_packets
2831 (dev_priv, filp_priv, header, &cmdbuf)) {
2832 DRM_ERROR("radeon_emit_packets failed\n");
2837 case RADEON_CMD_SCALARS:
2838 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2839 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2840 DRM_ERROR("radeon_emit_scalars failed\n");
2845 case RADEON_CMD_VECTORS:
2846 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2847 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2848 DRM_ERROR("radeon_emit_vectors failed\n");
2853 case RADEON_CMD_DMA_DISCARD:
2854 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2855 idx = header.dma.buf_idx;
2856 if (idx < 0 || idx >= dma->buf_count) {
2857 DRM_ERROR("buffer index %d (of %d max)\n",
2858 idx, dma->buf_count - 1);
2862 buf = dma->buflist[idx];
2863 if (buf->filp != filp || buf->pending) {
2864 DRM_ERROR("bad buffer %p %p %d\n",
2865 buf->filp, filp, buf->pending);
2869 radeon_cp_discard_buffer(dev, buf);
2872 case RADEON_CMD_PACKET3:
2873 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2874 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2875 DRM_ERROR("radeon_emit_packet3 failed\n");
2880 case RADEON_CMD_PACKET3_CLIP:
2881 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2882 if (radeon_emit_packet3_cliprect
2883 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2884 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2889 case RADEON_CMD_SCALARS2:
2890 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2891 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2892 DRM_ERROR("radeon_emit_scalars2 failed\n");
2897 case RADEON_CMD_WAIT:
2898 DRM_DEBUG("RADEON_CMD_WAIT\n");
2899 if (radeon_emit_wait(dev, header.wait.flags)) {
2900 DRM_ERROR("radeon_emit_wait failed\n");
2904 case RADEON_CMD_VECLINEAR:
2905 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2906 if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2907 DRM_ERROR("radeon_emit_veclinear failed\n");
2913 DRM_ERROR("bad cmd_type %d at %p\n",
2914 header.header.cmd_type,
2915 cmdbuf.buf - sizeof(header));
2920 if (orig_bufsz != 0)
2921 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2923 DRM_DEBUG("DONE\n");
2928 if (orig_bufsz != 0)
2929 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2930 return DRM_ERR(EINVAL);
2933 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2936 drm_radeon_private_t *dev_priv = dev->dev_private;
2937 drm_radeon_getparam_t param;
2940 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2943 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2945 switch (param.param) {
2946 case RADEON_PARAM_GART_BUFFER_OFFSET:
2947 value = dev_priv->gart_buffers_offset;
2949 case RADEON_PARAM_LAST_FRAME:
2950 dev_priv->stats.last_frame_reads++;
2951 value = GET_SCRATCH(0);
2953 case RADEON_PARAM_LAST_DISPATCH:
2954 value = GET_SCRATCH(1);
2956 case RADEON_PARAM_LAST_CLEAR:
2957 dev_priv->stats.last_clear_reads++;
2958 value = GET_SCRATCH(2);
2960 case RADEON_PARAM_IRQ_NR:
2963 case RADEON_PARAM_GART_BASE:
2964 value = dev_priv->gart_vm_start;
2966 case RADEON_PARAM_REGISTER_HANDLE:
2967 value = dev_priv->mmio->offset;
2969 case RADEON_PARAM_STATUS_HANDLE:
2970 value = dev_priv->ring_rptr_offset;
2972 #if BITS_PER_LONG == 32
2974 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2975 * pointer which can't fit into an int-sized variable. According to
2976 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2977 * not supporting it shouldn't be a problem. If the same functionality
2978 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2979 * so backwards-compatibility for the embedded platforms can be
2980 * maintained. --davidm 4-Feb-2004.
2982 case RADEON_PARAM_SAREA_HANDLE:
2983 /* The lock is the first dword in the sarea. */
2984 value = (long)dev->lock.hw_lock;
2987 case RADEON_PARAM_GART_TEX_HANDLE:
2988 value = dev_priv->gart_textures_offset;
2991 case RADEON_PARAM_CARD_TYPE:
2992 if (dev_priv->flags & CHIP_IS_PCIE)
2993 value = RADEON_CARD_PCIE;
2994 else if (dev_priv->flags & CHIP_IS_AGP)
2995 value = RADEON_CARD_AGP;
2997 value = RADEON_CARD_PCI;
3000 return DRM_ERR(EINVAL);
3003 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3004 DRM_ERROR("copy_to_user\n");
3005 return DRM_ERR(EFAULT);
3011 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3014 drm_radeon_private_t *dev_priv = dev->dev_private;
3015 drm_file_t *filp_priv;
3016 drm_radeon_setparam_t sp;
3017 struct drm_radeon_driver_file_fields *radeon_priv;
3019 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3021 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3025 case RADEON_SETPARAM_FB_LOCATION:
3026 radeon_priv = filp_priv->driver_priv;
3027 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3029 case RADEON_SETPARAM_SWITCH_TILING:
3030 if (sp.value == 0) {
3031 DRM_DEBUG("color tiling disabled\n");
3032 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3033 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3034 dev_priv->sarea_priv->tiling_enabled = 0;
3035 } else if (sp.value == 1) {
3036 DRM_DEBUG("color tiling enabled\n");
3037 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3038 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3039 dev_priv->sarea_priv->tiling_enabled = 1;
3042 case RADEON_SETPARAM_PCIGART_LOCATION:
3043 dev_priv->pcigart_offset = sp.value;
3045 case RADEON_SETPARAM_NEW_MEMMAP:
3046 dev_priv->new_memmap = sp.value;
3049 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3050 return DRM_ERR(EINVAL);
3056 /* When a client dies:
3057 * - Check for and clean up flipped page state
3058 * - Free any alloced GART memory.
3059 * - Free any alloced radeon surfaces.
3061 * DRM infrastructure takes care of reclaiming dma buffers.
3063 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3065 if (dev->dev_private) {
3066 drm_radeon_private_t *dev_priv = dev->dev_private;
3067 if (dev_priv->page_flipping) {
3068 radeon_do_cleanup_pageflip(dev);
3070 radeon_mem_release(filp, dev_priv->gart_heap);
3071 radeon_mem_release(filp, dev_priv->fb_heap);
3072 radeon_surfaces_release(filp, dev_priv);
3076 void radeon_driver_lastclose(drm_device_t * dev)
3078 radeon_do_release(dev);
3081 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3083 drm_radeon_private_t *dev_priv = dev->dev_private;
3084 struct drm_radeon_driver_file_fields *radeon_priv;
3088 (struct drm_radeon_driver_file_fields *)
3089 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3094 filp_priv->driver_priv = radeon_priv;
3097 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3099 radeon_priv->radeon_fb_delta = 0;
3103 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3105 struct drm_radeon_driver_file_fields *radeon_priv =
3106 filp_priv->driver_priv;
3108 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3111 drm_ioctl_desc_t radeon_ioctls[] = {
3112 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3113 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3114 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3115 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3116 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3117 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3118 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3119 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3120 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3121 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3122 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3127 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3128 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3131 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3132 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3133 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3134 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3135 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3138 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3141 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);