1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 u32 fb_start = dev_priv->fb_location;
47 u32 fb_end = fb_start + dev_priv->fb_size - 1;
48 u32 gart_start = dev_priv->gart_vm_start;
49 u32 gart_end = gart_start + dev_priv->gart_size - 1;
50 struct drm_radeon_driver_file_fields *radeon_priv;
52 /* Hrm ... the story of the offset ... So this function converts
53 * the various ideas of what userland clients might have for an
54 * offset in the card address space into an offset into the card
55 * address space :) So with a sane client, it should just keep
56 * the value intact and just do some boundary checking. However,
57 * not all clients are sane. Some older clients pass us 0 based
58 * offsets relative to the start of the framebuffer and some may
59 * assume the AGP aperture it appended to the framebuffer, so we
60 * try to detect those cases and fix them up.
62 * Note: It might be a good idea here to make sure the offset lands
63 * in some "allowed" area to protect things like the PCIE GART...
66 /* First, the best case, the offset already lands in either the
67 * framebuffer or the GART mapped space
69 if ((off >= fb_start && off <= fb_end) ||
70 (off >= gart_start && off <= gart_end))
73 /* Ok, that didn't happen... now check if we have a zero based
74 * offset that fits in the framebuffer + gart space, apply the
75 * magic offset we get from SETPARAM or calculated from fb_location
77 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
78 radeon_priv = filp_priv->driver_priv;
79 off += radeon_priv->radeon_fb_delta;
82 /* Finally, assume we aimed at a GART offset if beyond the fb */
84 off = off - fb_end - 1 + gart_start;
86 /* Now recheck and fail if out of bounds */
87 if ((off >= fb_start && off <= fb_end) ||
88 (off >= gart_start && off <= gart_end)) {
89 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
93 return DRM_ERR(EINVAL);
96 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
98 drm_file_t * filp_priv,
103 case RADEON_EMIT_PP_MISC:
104 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
105 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
106 DRM_ERROR("Invalid depth buffer offset\n");
107 return DRM_ERR(EINVAL);
111 case RADEON_EMIT_PP_CNTL:
112 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
113 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
114 DRM_ERROR("Invalid colour buffer offset\n");
115 return DRM_ERR(EINVAL);
119 case R200_EMIT_PP_TXOFFSET_0:
120 case R200_EMIT_PP_TXOFFSET_1:
121 case R200_EMIT_PP_TXOFFSET_2:
122 case R200_EMIT_PP_TXOFFSET_3:
123 case R200_EMIT_PP_TXOFFSET_4:
124 case R200_EMIT_PP_TXOFFSET_5:
125 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
127 DRM_ERROR("Invalid R200 texture offset\n");
128 return DRM_ERR(EINVAL);
132 case RADEON_EMIT_PP_TXFILTER_0:
133 case RADEON_EMIT_PP_TXFILTER_1:
134 case RADEON_EMIT_PP_TXFILTER_2:
135 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
136 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
137 DRM_ERROR("Invalid R100 texture offset\n");
138 return DRM_ERR(EINVAL);
142 case R200_EMIT_PP_CUBIC_OFFSETS_0:
143 case R200_EMIT_PP_CUBIC_OFFSETS_1:
144 case R200_EMIT_PP_CUBIC_OFFSETS_2:
145 case R200_EMIT_PP_CUBIC_OFFSETS_3:
146 case R200_EMIT_PP_CUBIC_OFFSETS_4:
147 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
149 for (i = 0; i < 5; i++) {
150 if (radeon_check_and_fixup_offset(dev_priv,
154 ("Invalid R200 cubic texture offset\n");
155 return DRM_ERR(EINVAL);
161 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
162 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
163 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
165 for (i = 0; i < 5; i++) {
166 if (radeon_check_and_fixup_offset(dev_priv,
170 ("Invalid R100 cubic texture offset\n");
171 return DRM_ERR(EINVAL);
177 case R200_EMIT_VAP_CTL:{
180 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
185 case RADEON_EMIT_RB3D_COLORPITCH:
186 case RADEON_EMIT_RE_LINE_PATTERN:
187 case RADEON_EMIT_SE_LINE_WIDTH:
188 case RADEON_EMIT_PP_LUM_MATRIX:
189 case RADEON_EMIT_PP_ROT_MATRIX_0:
190 case RADEON_EMIT_RB3D_STENCILREFMASK:
191 case RADEON_EMIT_SE_VPORT_XSCALE:
192 case RADEON_EMIT_SE_CNTL:
193 case RADEON_EMIT_SE_CNTL_STATUS:
194 case RADEON_EMIT_RE_MISC:
195 case RADEON_EMIT_PP_BORDER_COLOR_0:
196 case RADEON_EMIT_PP_BORDER_COLOR_1:
197 case RADEON_EMIT_PP_BORDER_COLOR_2:
198 case RADEON_EMIT_SE_ZBIAS_FACTOR:
199 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
200 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
201 case R200_EMIT_PP_TXCBLEND_0:
202 case R200_EMIT_PP_TXCBLEND_1:
203 case R200_EMIT_PP_TXCBLEND_2:
204 case R200_EMIT_PP_TXCBLEND_3:
205 case R200_EMIT_PP_TXCBLEND_4:
206 case R200_EMIT_PP_TXCBLEND_5:
207 case R200_EMIT_PP_TXCBLEND_6:
208 case R200_EMIT_PP_TXCBLEND_7:
209 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
210 case R200_EMIT_TFACTOR_0:
211 case R200_EMIT_VTX_FMT_0:
212 case R200_EMIT_MATRIX_SELECT_0:
213 case R200_EMIT_TEX_PROC_CTL_2:
214 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
215 case R200_EMIT_PP_TXFILTER_0:
216 case R200_EMIT_PP_TXFILTER_1:
217 case R200_EMIT_PP_TXFILTER_2:
218 case R200_EMIT_PP_TXFILTER_3:
219 case R200_EMIT_PP_TXFILTER_4:
220 case R200_EMIT_PP_TXFILTER_5:
221 case R200_EMIT_VTE_CNTL:
222 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
223 case R200_EMIT_PP_TAM_DEBUG3:
224 case R200_EMIT_PP_CNTL_X:
225 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
226 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
227 case R200_EMIT_RE_SCISSOR_TL_0:
228 case R200_EMIT_RE_SCISSOR_TL_1:
229 case R200_EMIT_RE_SCISSOR_TL_2:
230 case R200_EMIT_SE_VAP_CNTL_STATUS:
231 case R200_EMIT_SE_VTX_STATE_CNTL:
232 case R200_EMIT_RE_POINTSIZE:
233 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
234 case R200_EMIT_PP_CUBIC_FACES_0:
235 case R200_EMIT_PP_CUBIC_FACES_1:
236 case R200_EMIT_PP_CUBIC_FACES_2:
237 case R200_EMIT_PP_CUBIC_FACES_3:
238 case R200_EMIT_PP_CUBIC_FACES_4:
239 case R200_EMIT_PP_CUBIC_FACES_5:
240 case RADEON_EMIT_PP_TEX_SIZE_0:
241 case RADEON_EMIT_PP_TEX_SIZE_1:
242 case RADEON_EMIT_PP_TEX_SIZE_2:
243 case R200_EMIT_RB3D_BLENDCOLOR:
244 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
245 case RADEON_EMIT_PP_CUBIC_FACES_0:
246 case RADEON_EMIT_PP_CUBIC_FACES_1:
247 case RADEON_EMIT_PP_CUBIC_FACES_2:
248 case R200_EMIT_PP_TRI_PERF_CNTL:
249 case R200_EMIT_PP_AFS_0:
250 case R200_EMIT_PP_AFS_1:
251 case R200_EMIT_ATF_TFACTOR:
252 case R200_EMIT_PP_TXCTLALL_0:
253 case R200_EMIT_PP_TXCTLALL_1:
254 case R200_EMIT_PP_TXCTLALL_2:
255 case R200_EMIT_PP_TXCTLALL_3:
256 case R200_EMIT_PP_TXCTLALL_4:
257 case R200_EMIT_PP_TXCTLALL_5:
258 case R200_EMIT_VAP_PVS_CNTL:
259 /* These packets don't contain memory offsets */
263 DRM_ERROR("Unknown state packet ID %d\n", id);
264 return DRM_ERR(EINVAL);
270 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
272 drm_file_t *filp_priv,
273 drm_radeon_kcmd_buffer_t *
277 u32 *cmd = (u32 *) cmdbuf->buf;
279 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
281 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
282 DRM_ERROR("Not a type 3 packet\n");
283 return DRM_ERR(EINVAL);
286 if (4 * *cmdsz > cmdbuf->bufsz) {
287 DRM_ERROR("Packet size larger than size of data provided\n");
288 return DRM_ERR(EINVAL);
291 /* Check client state and fix it up if necessary */
292 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
295 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
296 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
297 offset = cmd[2] << 10;
298 if (radeon_check_and_fixup_offset
299 (dev_priv, filp_priv, &offset)) {
300 DRM_ERROR("Invalid first packet offset\n");
301 return DRM_ERR(EINVAL);
303 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
306 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
307 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
308 offset = cmd[3] << 10;
309 if (radeon_check_and_fixup_offset
310 (dev_priv, filp_priv, &offset)) {
311 DRM_ERROR("Invalid second packet offset\n");
312 return DRM_ERR(EINVAL);
314 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
321 /* ================================================================
322 * CP hardware state programming functions
325 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
326 drm_clip_rect_t * box)
330 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
331 box->x1, box->y1, box->x2, box->y2);
334 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
335 OUT_RING((box->y1 << 16) | box->x1);
336 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
337 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
343 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
344 drm_file_t * filp_priv,
345 drm_radeon_context_regs_t * ctx,
346 drm_radeon_texture_regs_t * tex,
350 DRM_DEBUG("dirty=0x%08x\n", dirty);
352 if (dirty & RADEON_UPLOAD_CONTEXT) {
353 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
354 &ctx->rb3d_depthoffset)) {
355 DRM_ERROR("Invalid depth buffer offset\n");
356 return DRM_ERR(EINVAL);
359 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
360 &ctx->rb3d_coloroffset)) {
361 DRM_ERROR("Invalid depth buffer offset\n");
362 return DRM_ERR(EINVAL);
366 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
367 OUT_RING(ctx->pp_misc);
368 OUT_RING(ctx->pp_fog_color);
369 OUT_RING(ctx->re_solid_color);
370 OUT_RING(ctx->rb3d_blendcntl);
371 OUT_RING(ctx->rb3d_depthoffset);
372 OUT_RING(ctx->rb3d_depthpitch);
373 OUT_RING(ctx->rb3d_zstencilcntl);
374 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
375 OUT_RING(ctx->pp_cntl);
376 OUT_RING(ctx->rb3d_cntl);
377 OUT_RING(ctx->rb3d_coloroffset);
378 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
379 OUT_RING(ctx->rb3d_colorpitch);
383 if (dirty & RADEON_UPLOAD_VERTFMT) {
385 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
386 OUT_RING(ctx->se_coord_fmt);
390 if (dirty & RADEON_UPLOAD_LINE) {
392 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
393 OUT_RING(ctx->re_line_pattern);
394 OUT_RING(ctx->re_line_state);
395 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
396 OUT_RING(ctx->se_line_width);
400 if (dirty & RADEON_UPLOAD_BUMPMAP) {
402 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
403 OUT_RING(ctx->pp_lum_matrix);
404 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
405 OUT_RING(ctx->pp_rot_matrix_0);
406 OUT_RING(ctx->pp_rot_matrix_1);
410 if (dirty & RADEON_UPLOAD_MASKS) {
412 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
413 OUT_RING(ctx->rb3d_stencilrefmask);
414 OUT_RING(ctx->rb3d_ropcntl);
415 OUT_RING(ctx->rb3d_planemask);
419 if (dirty & RADEON_UPLOAD_VIEWPORT) {
421 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
422 OUT_RING(ctx->se_vport_xscale);
423 OUT_RING(ctx->se_vport_xoffset);
424 OUT_RING(ctx->se_vport_yscale);
425 OUT_RING(ctx->se_vport_yoffset);
426 OUT_RING(ctx->se_vport_zscale);
427 OUT_RING(ctx->se_vport_zoffset);
431 if (dirty & RADEON_UPLOAD_SETUP) {
433 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
434 OUT_RING(ctx->se_cntl);
435 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
436 OUT_RING(ctx->se_cntl_status);
440 if (dirty & RADEON_UPLOAD_MISC) {
442 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
443 OUT_RING(ctx->re_misc);
447 if (dirty & RADEON_UPLOAD_TEX0) {
448 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
449 &tex[0].pp_txoffset)) {
450 DRM_ERROR("Invalid texture offset for unit 0\n");
451 return DRM_ERR(EINVAL);
455 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
456 OUT_RING(tex[0].pp_txfilter);
457 OUT_RING(tex[0].pp_txformat);
458 OUT_RING(tex[0].pp_txoffset);
459 OUT_RING(tex[0].pp_txcblend);
460 OUT_RING(tex[0].pp_txablend);
461 OUT_RING(tex[0].pp_tfactor);
462 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
463 OUT_RING(tex[0].pp_border_color);
467 if (dirty & RADEON_UPLOAD_TEX1) {
468 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
469 &tex[1].pp_txoffset)) {
470 DRM_ERROR("Invalid texture offset for unit 1\n");
471 return DRM_ERR(EINVAL);
475 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
476 OUT_RING(tex[1].pp_txfilter);
477 OUT_RING(tex[1].pp_txformat);
478 OUT_RING(tex[1].pp_txoffset);
479 OUT_RING(tex[1].pp_txcblend);
480 OUT_RING(tex[1].pp_txablend);
481 OUT_RING(tex[1].pp_tfactor);
482 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
483 OUT_RING(tex[1].pp_border_color);
487 if (dirty & RADEON_UPLOAD_TEX2) {
488 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
489 &tex[2].pp_txoffset)) {
490 DRM_ERROR("Invalid texture offset for unit 2\n");
491 return DRM_ERR(EINVAL);
495 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
496 OUT_RING(tex[2].pp_txfilter);
497 OUT_RING(tex[2].pp_txformat);
498 OUT_RING(tex[2].pp_txoffset);
499 OUT_RING(tex[2].pp_txcblend);
500 OUT_RING(tex[2].pp_txablend);
501 OUT_RING(tex[2].pp_tfactor);
502 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
503 OUT_RING(tex[2].pp_border_color);
512 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
513 drm_file_t * filp_priv,
514 drm_radeon_state_t * state)
518 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
520 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
521 OUT_RING(state->context2.se_zbias_factor);
522 OUT_RING(state->context2.se_zbias_constant);
526 return radeon_emit_state(dev_priv, filp_priv, &state->context,
527 state->tex, state->dirty);
530 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
531 * 1.3 cmdbuffers allow all previous state to be updated as well as
532 * the tcl scalar and vector areas.
538 } packet[RADEON_MAX_STATE_PACKETS] = {
539 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
540 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
541 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
542 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
543 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
544 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
545 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
546 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
547 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
548 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
549 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
550 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
551 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
552 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
553 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
554 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
555 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
556 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
557 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
558 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
559 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
560 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
561 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
562 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
563 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
564 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
565 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
566 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
567 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
568 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
569 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
570 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
571 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
572 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
573 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
574 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
575 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
576 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
577 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
578 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
579 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
580 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
581 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
582 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
583 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
584 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
585 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
586 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
587 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
588 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
589 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
590 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
591 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
592 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
593 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
594 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
595 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
596 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
597 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
598 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
599 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
600 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
601 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
602 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
603 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
604 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
605 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
606 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
607 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
608 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
609 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
610 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
611 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
612 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
613 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
614 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
615 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
616 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
617 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
618 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
619 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
620 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
621 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
622 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
623 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
624 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
625 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
626 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
627 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
628 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
629 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
630 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
631 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
632 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
633 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
634 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
635 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
636 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
639 /* ================================================================
640 * Performance monitoring functions
643 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
644 int x, int y, int w, int h, int r, int g, int b)
649 x += dev_priv->sarea_priv->boxes[0].x1;
650 y += dev_priv->sarea_priv->boxes[0].y1;
652 switch (dev_priv->color_fmt) {
653 case RADEON_COLOR_FORMAT_RGB565:
654 color = (((r & 0xf8) << 8) |
655 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
657 case RADEON_COLOR_FORMAT_ARGB8888:
659 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
664 RADEON_WAIT_UNTIL_3D_IDLE();
665 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
666 OUT_RING(0xffffffff);
671 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
672 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
673 RADEON_GMC_BRUSH_SOLID_COLOR |
674 (dev_priv->color_fmt << 8) |
675 RADEON_GMC_SRC_DATATYPE_COLOR |
676 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
678 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
679 OUT_RING(dev_priv->front_pitch_offset);
681 OUT_RING(dev_priv->back_pitch_offset);
686 OUT_RING((x << 16) | y);
687 OUT_RING((w << 16) | h);
692 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
694 /* Collapse various things into a wait flag -- trying to
695 * guess if userspase slept -- better just to have them tell us.
697 if (dev_priv->stats.last_frame_reads > 1 ||
698 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
699 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
702 if (dev_priv->stats.freelist_loops) {
703 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
706 /* Purple box for page flipping
708 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
709 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
711 /* Red box if we have to wait for idle at any point
713 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
714 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
716 /* Blue box: lost context?
719 /* Yellow box for texture swaps
721 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
722 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
724 /* Green box if hardware never idles (as far as we can tell)
726 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
727 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
729 /* Draw bars indicating number of buffers allocated
730 * (not a great measure, easily confused)
732 if (dev_priv->stats.requested_bufs) {
733 if (dev_priv->stats.requested_bufs > 100)
734 dev_priv->stats.requested_bufs = 100;
736 radeon_clear_box(dev_priv, 4, 16,
737 dev_priv->stats.requested_bufs, 4,
741 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
745 /* ================================================================
746 * CP command dispatch functions
749 static void radeon_cp_dispatch_clear(drm_device_t * dev,
750 drm_radeon_clear_t * clear,
751 drm_radeon_clear_rect_t * depth_boxes)
753 drm_radeon_private_t *dev_priv = dev->dev_private;
754 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
755 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
756 int nbox = sarea_priv->nbox;
757 drm_clip_rect_t *pbox = sarea_priv->boxes;
758 unsigned int flags = clear->flags;
759 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
762 DRM_DEBUG("flags = 0x%x\n", flags);
764 dev_priv->stats.clears++;
766 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
767 unsigned int tmp = flags;
769 flags &= ~(RADEON_FRONT | RADEON_BACK);
770 if (tmp & RADEON_FRONT)
771 flags |= RADEON_BACK;
772 if (tmp & RADEON_BACK)
773 flags |= RADEON_FRONT;
776 if (flags & (RADEON_FRONT | RADEON_BACK)) {
780 /* Ensure the 3D stream is idle before doing a
781 * 2D fill to clear the front or back buffer.
783 RADEON_WAIT_UNTIL_3D_IDLE();
785 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
786 OUT_RING(clear->color_mask);
790 /* Make sure we restore the 3D state next time.
792 dev_priv->sarea_priv->ctx_owner = 0;
794 for (i = 0; i < nbox; i++) {
797 int w = pbox[i].x2 - x;
798 int h = pbox[i].y2 - y;
800 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
803 if (flags & RADEON_FRONT) {
807 (RADEON_CNTL_PAINT_MULTI, 4));
808 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
809 RADEON_GMC_BRUSH_SOLID_COLOR |
812 RADEON_GMC_SRC_DATATYPE_COLOR |
814 RADEON_GMC_CLR_CMP_CNTL_DIS);
816 OUT_RING(dev_priv->front_pitch_offset);
817 OUT_RING(clear->clear_color);
819 OUT_RING((x << 16) | y);
820 OUT_RING((w << 16) | h);
825 if (flags & RADEON_BACK) {
829 (RADEON_CNTL_PAINT_MULTI, 4));
830 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
831 RADEON_GMC_BRUSH_SOLID_COLOR |
834 RADEON_GMC_SRC_DATATYPE_COLOR |
836 RADEON_GMC_CLR_CMP_CNTL_DIS);
838 OUT_RING(dev_priv->back_pitch_offset);
839 OUT_RING(clear->clear_color);
841 OUT_RING((x << 16) | y);
842 OUT_RING((w << 16) | h);
850 /* no docs available, based on reverse engeneering by Stephane Marchesin */
851 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
852 && (flags & RADEON_CLEAR_FASTZ)) {
855 int depthpixperline =
856 dev_priv->depth_fmt ==
857 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
863 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
864 ((clear->depth_mask & 0xff) << 24);
866 /* Make sure we restore the 3D state next time.
867 * we haven't touched any "normal" state - still need this?
869 dev_priv->sarea_priv->ctx_owner = 0;
871 if ((dev_priv->flags & RADEON_HAS_HIERZ)
872 && (flags & RADEON_USE_HIERZ)) {
873 /* FIXME : reverse engineer that for Rx00 cards */
874 /* FIXME : the mask supposedly contains low-res z values. So can't set
875 just to the max (0xff? or actually 0x3fff?), need to take z clear
876 value into account? */
877 /* pattern seems to work for r100, though get slight
878 rendering errors with glxgears. If hierz is not enabled for r100,
879 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
880 other ones are ignored, and the same clear mask can be used. That's
881 very different behaviour than R200 which needs different clear mask
882 and different number of tiles to clear if hierz is enabled or not !?!
884 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
886 /* clear mask : chooses the clearing pattern.
887 rv250: could be used to clear only parts of macrotiles
888 (but that would get really complicated...)?
889 bit 0 and 1 (either or both of them ?!?!) are used to
890 not clear tile (or maybe one of the bits indicates if the tile is
891 compressed or not), bit 2 and 3 to not clear tile 1,...,.
892 Pattern is as follows:
893 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
894 bits -------------------------------------------------
895 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
896 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
897 covers 256 pixels ?!?
903 RADEON_WAIT_UNTIL_2D_IDLE();
904 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
905 tempRB3D_DEPTHCLEARVALUE);
906 /* what offset is this exactly ? */
907 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
908 /* need ctlstat, otherwise get some strange black flickering */
909 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
910 RADEON_RB3D_ZC_FLUSH_ALL);
913 for (i = 0; i < nbox; i++) {
914 int tileoffset, nrtilesx, nrtilesy, j;
915 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
916 if ((dev_priv->flags & RADEON_HAS_HIERZ)
917 && !(dev_priv->microcode_version == UCODE_R200)) {
918 /* FIXME : figure this out for r200 (when hierz is enabled). Or
919 maybe r200 actually doesn't need to put the low-res z value into
920 the tile cache like r100, but just needs to clear the hi-level z-buffer?
921 Works for R100, both with hierz and without.
922 R100 seems to operate on 2x1 8x8 tiles, but...
923 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
924 problematic with resolutions which are not 64 pix aligned? */
926 ((pbox[i].y1 >> 3) * depthpixperline +
929 ((pbox[i].x2 & ~63) -
930 (pbox[i].x1 & ~63)) >> 4;
932 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
933 for (j = 0; j <= nrtilesy; j++) {
936 (RADEON_3D_CLEAR_ZMASK, 2));
938 OUT_RING(tileoffset * 8);
939 /* the number of tiles to clear */
940 OUT_RING(nrtilesx + 4);
941 /* clear mask : chooses the clearing pattern. */
944 tileoffset += depthpixperline >> 6;
946 } else if (dev_priv->microcode_version == UCODE_R200) {
947 /* works for rv250. */
948 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
950 ((pbox[i].y1 >> 3) * depthpixperline +
953 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
955 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
956 for (j = 0; j <= nrtilesy; j++) {
959 (RADEON_3D_CLEAR_ZMASK, 2));
961 /* judging by the first tile offset needed, could possibly
962 directly address/clear 4x4 tiles instead of 8x2 * 4x4
963 macro tiles, though would still need clear mask for
964 right/bottom if truely 4x4 granularity is desired ? */
965 OUT_RING(tileoffset * 16);
966 /* the number of tiles to clear */
967 OUT_RING(nrtilesx + 1);
968 /* clear mask : chooses the clearing pattern. */
971 tileoffset += depthpixperline >> 5;
973 } else { /* rv 100 */
974 /* rv100 might not need 64 pix alignment, who knows */
975 /* offsets are, hmm, weird */
977 ((pbox[i].y1 >> 4) * depthpixperline +
980 ((pbox[i].x2 & ~63) -
981 (pbox[i].x1 & ~63)) >> 4;
983 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
984 for (j = 0; j <= nrtilesy; j++) {
987 (RADEON_3D_CLEAR_ZMASK, 2));
988 OUT_RING(tileoffset * 128);
989 /* the number of tiles to clear */
990 OUT_RING(nrtilesx + 4);
991 /* clear mask : chooses the clearing pattern. */
994 tileoffset += depthpixperline >> 6;
999 /* TODO don't always clear all hi-level z tiles */
1000 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1001 && (dev_priv->microcode_version == UCODE_R200)
1002 && (flags & RADEON_USE_HIERZ))
1003 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1004 /* FIXME : the mask supposedly contains low-res z values. So can't set
1005 just to the max (0xff? or actually 0x3fff?), need to take z clear
1006 value into account? */
1009 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1010 OUT_RING(0x0); /* First tile */
1012 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1017 /* We have to clear the depth and/or stencil buffers by
1018 * rendering a quad into just those buffers. Thus, we have to
1019 * make sure the 3D engine is configured correctly.
1021 else if ((dev_priv->microcode_version == UCODE_R200) &&
1022 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1027 int tempRB3D_ZSTENCILCNTL;
1028 int tempRB3D_STENCILREFMASK;
1029 int tempRB3D_PLANEMASK;
1031 int tempSE_VTE_CNTL;
1032 int tempSE_VTX_FMT_0;
1033 int tempSE_VTX_FMT_1;
1034 int tempSE_VAP_CNTL;
1035 int tempRE_AUX_SCISSOR_CNTL;
1040 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1042 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1043 tempRB3D_STENCILREFMASK = 0x0;
1045 tempSE_CNTL = depth_clear->se_cntl;
1049 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1051 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1053 tempRB3D_PLANEMASK = 0x0;
1055 tempRE_AUX_SCISSOR_CNTL = 0x0;
1058 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1060 /* Vertex format (X, Y, Z, W) */
1062 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1063 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1064 tempSE_VTX_FMT_1 = 0x0;
1067 * Depth buffer specific enables
1069 if (flags & RADEON_DEPTH) {
1070 /* Enable depth buffer */
1071 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1073 /* Disable depth buffer */
1074 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1078 * Stencil buffer specific enables
1080 if (flags & RADEON_STENCIL) {
1081 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1082 tempRB3D_STENCILREFMASK = clear->depth_mask;
1084 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1085 tempRB3D_STENCILREFMASK = 0x00000000;
1088 if (flags & RADEON_USE_COMP_ZBUF) {
1089 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090 RADEON_Z_DECOMPRESSION_ENABLE;
1092 if (flags & RADEON_USE_HIERZ) {
1093 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1097 RADEON_WAIT_UNTIL_2D_IDLE();
1099 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1100 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1101 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1102 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1103 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1104 tempRB3D_STENCILREFMASK);
1105 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1106 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1107 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1108 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1109 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1110 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1111 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1114 /* Make sure we restore the 3D state next time.
1116 dev_priv->sarea_priv->ctx_owner = 0;
1118 for (i = 0; i < nbox; i++) {
1120 /* Funny that this should be required --
1123 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1126 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1127 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1128 RADEON_PRIM_WALK_RING |
1129 (3 << RADEON_NUM_VERTICES_SHIFT)));
1130 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1131 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1132 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1133 OUT_RING(0x3f800000);
1134 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1135 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1136 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1137 OUT_RING(0x3f800000);
1138 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1139 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1140 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1141 OUT_RING(0x3f800000);
1144 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1146 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1148 rb3d_cntl = depth_clear->rb3d_cntl;
1150 if (flags & RADEON_DEPTH) {
1151 rb3d_cntl |= RADEON_Z_ENABLE;
1153 rb3d_cntl &= ~RADEON_Z_ENABLE;
1156 if (flags & RADEON_STENCIL) {
1157 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1158 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1160 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1161 rb3d_stencilrefmask = 0x00000000;
1164 if (flags & RADEON_USE_COMP_ZBUF) {
1165 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1166 RADEON_Z_DECOMPRESSION_ENABLE;
1168 if (flags & RADEON_USE_HIERZ) {
1169 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1173 RADEON_WAIT_UNTIL_2D_IDLE();
1175 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1176 OUT_RING(0x00000000);
1177 OUT_RING(rb3d_cntl);
1179 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1180 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1181 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1182 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1185 /* Make sure we restore the 3D state next time.
1187 dev_priv->sarea_priv->ctx_owner = 0;
1189 for (i = 0; i < nbox; i++) {
1191 /* Funny that this should be required --
1194 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1198 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1199 OUT_RING(RADEON_VTX_Z_PRESENT |
1200 RADEON_VTX_PKCOLOR_PRESENT);
1201 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1202 RADEON_PRIM_WALK_RING |
1203 RADEON_MAOS_ENABLE |
1204 RADEON_VTX_FMT_RADEON_MODE |
1205 (3 << RADEON_NUM_VERTICES_SHIFT)));
1207 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1208 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1209 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1212 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1213 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1214 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1217 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1218 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1219 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1226 /* Increment the clear counter. The client-side 3D driver must
1227 * wait on this value before performing the clear ioctl. We
1228 * need this because the card's so damned fast...
1230 dev_priv->sarea_priv->last_clear++;
1234 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1235 RADEON_WAIT_UNTIL_IDLE();
1240 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1242 drm_radeon_private_t *dev_priv = dev->dev_private;
1243 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1244 int nbox = sarea_priv->nbox;
1245 drm_clip_rect_t *pbox = sarea_priv->boxes;
1250 /* Do some trivial performance monitoring...
1252 if (dev_priv->do_boxes)
1253 radeon_cp_performance_boxes(dev_priv);
1255 /* Wait for the 3D stream to idle before dispatching the bitblt.
1256 * This will prevent data corruption between the two streams.
1260 RADEON_WAIT_UNTIL_3D_IDLE();
1264 for (i = 0; i < nbox; i++) {
1267 int w = pbox[i].x2 - x;
1268 int h = pbox[i].y2 - y;
1270 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1274 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1275 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1276 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1277 RADEON_GMC_BRUSH_NONE |
1278 (dev_priv->color_fmt << 8) |
1279 RADEON_GMC_SRC_DATATYPE_COLOR |
1281 RADEON_DP_SRC_SOURCE_MEMORY |
1282 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1284 /* Make this work even if front & back are flipped:
1286 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1287 if (dev_priv->current_page == 0) {
1288 OUT_RING(dev_priv->back_pitch_offset);
1289 OUT_RING(dev_priv->front_pitch_offset);
1291 OUT_RING(dev_priv->front_pitch_offset);
1292 OUT_RING(dev_priv->back_pitch_offset);
1295 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1296 OUT_RING((x << 16) | y);
1297 OUT_RING((x << 16) | y);
1298 OUT_RING((w << 16) | h);
1303 /* Increment the frame counter. The client-side 3D driver must
1304 * throttle the framerate by waiting for this value before
1305 * performing the swapbuffer ioctl.
1307 dev_priv->sarea_priv->last_frame++;
1311 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1312 RADEON_WAIT_UNTIL_2D_IDLE();
1317 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1319 drm_radeon_private_t *dev_priv = dev->dev_private;
1320 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1321 int offset = (dev_priv->current_page == 1)
1322 ? dev_priv->front_offset : dev_priv->back_offset;
1324 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1326 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1328 /* Do some trivial performance monitoring...
1330 if (dev_priv->do_boxes) {
1331 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1332 radeon_cp_performance_boxes(dev_priv);
1335 /* Update the frame offsets for both CRTCs
1339 RADEON_WAIT_UNTIL_3D_IDLE();
1340 OUT_RING_REG(RADEON_CRTC_OFFSET,
1341 ((sarea->frame.y * dev_priv->front_pitch +
1342 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1344 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1349 /* Increment the frame counter. The client-side 3D driver must
1350 * throttle the framerate by waiting for this value before
1351 * performing the swapbuffer ioctl.
1353 dev_priv->sarea_priv->last_frame++;
1354 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1355 1 - dev_priv->current_page;
1359 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1364 static int bad_prim_vertex_nr(int primitive, int nr)
1366 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1367 case RADEON_PRIM_TYPE_NONE:
1368 case RADEON_PRIM_TYPE_POINT:
1370 case RADEON_PRIM_TYPE_LINE:
1371 return (nr & 1) || nr == 0;
1372 case RADEON_PRIM_TYPE_LINE_STRIP:
1374 case RADEON_PRIM_TYPE_TRI_LIST:
1375 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1376 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1377 case RADEON_PRIM_TYPE_RECT_LIST:
1378 return nr % 3 || nr == 0;
1379 case RADEON_PRIM_TYPE_TRI_FAN:
1380 case RADEON_PRIM_TYPE_TRI_STRIP:
1389 unsigned int finish;
1391 unsigned int numverts;
1392 unsigned int offset;
1393 unsigned int vc_format;
1394 } drm_radeon_tcl_prim_t;
1396 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1398 drm_radeon_tcl_prim_t * prim)
1400 drm_radeon_private_t *dev_priv = dev->dev_private;
1401 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1402 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1403 int numverts = (int)prim->numverts;
1404 int nbox = sarea_priv->nbox;
1408 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1410 prim->vc_format, prim->start, prim->finish, prim->numverts);
1412 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1413 DRM_ERROR("bad prim %x numverts %d\n",
1414 prim->prim, prim->numverts);
1419 /* Emit the next cliprect */
1421 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1424 /* Emit the vertex buffer rendering commands */
1427 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1430 OUT_RING(prim->vc_format);
1431 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1432 RADEON_COLOR_ORDER_RGBA |
1433 RADEON_VTX_FMT_RADEON_MODE |
1434 (numverts << RADEON_NUM_VERTICES_SHIFT));
1442 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1444 drm_radeon_private_t *dev_priv = dev->dev_private;
1445 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1448 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1450 /* Emit the vertex buffer age */
1452 RADEON_DISPATCH_AGE(buf_priv->age);
1459 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1460 drm_buf_t * buf, int start, int end)
1462 drm_radeon_private_t *dev_priv = dev->dev_private;
1464 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1467 int offset = (dev_priv->gart_buffers_offset
1468 + buf->offset + start);
1469 int dwords = (end - start + 3) / sizeof(u32);
1471 /* Indirect buffer data must be an even number of
1472 * dwords, so if we've been given an odd number we must
1473 * pad the data with a Type-2 CP packet.
1477 ((char *)dev->agp_buffer_map->handle
1478 + buf->offset + start);
1479 data[dwords++] = RADEON_CP_PACKET2;
1482 /* Fire off the indirect buffer */
1485 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1493 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1494 drm_buf_t * elt_buf,
1495 drm_radeon_tcl_prim_t * prim)
1497 drm_radeon_private_t *dev_priv = dev->dev_private;
1498 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1499 int offset = dev_priv->gart_buffers_offset + prim->offset;
1503 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1504 int count = (prim->finish - start) / sizeof(u16);
1505 int nbox = sarea_priv->nbox;
1507 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1510 prim->start, prim->finish, prim->offset, prim->numverts);
1512 if (bad_prim_vertex_nr(prim->prim, count)) {
1513 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1517 if (start >= prim->finish || (prim->start & 0x7)) {
1518 DRM_ERROR("buffer prim %d\n", prim->prim);
1522 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1524 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1525 elt_buf->offset + prim->start);
1527 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1529 data[2] = prim->numverts;
1530 data[3] = prim->vc_format;
1531 data[4] = (prim->prim |
1532 RADEON_PRIM_WALK_IND |
1533 RADEON_COLOR_ORDER_RGBA |
1534 RADEON_VTX_FMT_RADEON_MODE |
1535 (count << RADEON_NUM_VERTICES_SHIFT));
1539 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1541 radeon_cp_dispatch_indirect(dev, elt_buf,
1542 prim->start, prim->finish);
1549 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1551 static int radeon_cp_dispatch_texture(DRMFILE filp,
1553 drm_radeon_texture_t * tex,
1554 drm_radeon_tex_image_t * image)
1556 drm_radeon_private_t *dev_priv = dev->dev_private;
1557 drm_file_t *filp_priv;
1561 const u8 __user *data;
1562 int size, dwords, tex_width, blit_width, spitch;
1565 u32 texpitch, microtile;
1569 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1571 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1572 DRM_ERROR("Invalid destination offset\n");
1573 return DRM_ERR(EINVAL);
1576 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1578 /* Flush the pixel cache. This ensures no pixel data gets mixed
1579 * up with the texture data from the host data blit, otherwise
1580 * part of the texture image may be corrupted.
1583 RADEON_FLUSH_CACHE();
1584 RADEON_WAIT_UNTIL_IDLE();
1587 /* The compiler won't optimize away a division by a variable,
1588 * even if the only legal values are powers of two. Thus, we'll
1589 * use a shift instead.
1591 switch (tex->format) {
1592 case RADEON_TXFORMAT_ARGB8888:
1593 case RADEON_TXFORMAT_RGBA8888:
1594 format = RADEON_COLOR_FORMAT_ARGB8888;
1595 tex_width = tex->width * 4;
1596 blit_width = image->width * 4;
1598 case RADEON_TXFORMAT_AI88:
1599 case RADEON_TXFORMAT_ARGB1555:
1600 case RADEON_TXFORMAT_RGB565:
1601 case RADEON_TXFORMAT_ARGB4444:
1602 case RADEON_TXFORMAT_VYUY422:
1603 case RADEON_TXFORMAT_YVYU422:
1604 format = RADEON_COLOR_FORMAT_RGB565;
1605 tex_width = tex->width * 2;
1606 blit_width = image->width * 2;
1608 case RADEON_TXFORMAT_I8:
1609 case RADEON_TXFORMAT_RGB332:
1610 format = RADEON_COLOR_FORMAT_CI8;
1611 tex_width = tex->width * 1;
1612 blit_width = image->width * 1;
1615 DRM_ERROR("invalid texture format %d\n", tex->format);
1616 return DRM_ERR(EINVAL);
1618 spitch = blit_width >> 6;
1619 if (spitch == 0 && image->height > 1)
1620 return DRM_ERR(EINVAL);
1622 texpitch = tex->pitch;
1623 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1625 if (tex_width < 64) {
1626 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1627 /* we got tiled coordinates, untile them */
1633 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1636 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1637 tex->offset >> 10, tex->pitch, tex->format,
1638 image->x, image->y, image->width, image->height);
1640 /* Make a copy of some parameters in case we have to
1641 * update them for a multi-pass texture blit.
1643 height = image->height;
1644 data = (const u8 __user *)image->data;
1646 size = height * blit_width;
1648 if (size > RADEON_MAX_TEXTURE_SIZE) {
1649 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1650 size = height * blit_width;
1651 } else if (size < 4 && size > 0) {
1653 } else if (size == 0) {
1657 buf = radeon_freelist_get(dev);
1659 radeon_do_cp_idle(dev_priv);
1660 buf = radeon_freelist_get(dev);
1663 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1664 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1665 return DRM_ERR(EFAULT);
1666 return DRM_ERR(EAGAIN);
1669 /* Dispatch the indirect buffer.
1672 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1675 #define RADEON_COPY_MT(_buf, _data, _width) \
1677 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1678 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1679 return DRM_ERR(EFAULT); \
1684 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1685 however, we cannot use blitter directly for texture width < 64 bytes,
1686 since minimum tex pitch is 64 bytes and we need this to match
1687 the texture width, otherwise the blitter will tile it wrong.
1688 Thus, tiling manually in this case. Additionally, need to special
1689 case tex height = 1, since our actual image will have height 2
1690 and we need to ensure we don't read beyond the texture size
1692 if (tex->height == 1) {
1693 if (tex_width >= 64 || tex_width <= 16) {
1694 RADEON_COPY_MT(buffer, data,
1695 (int)(tex_width * sizeof(u32)));
1696 } else if (tex_width == 32) {
1697 RADEON_COPY_MT(buffer, data, 16);
1698 RADEON_COPY_MT(buffer + 8,
1701 } else if (tex_width >= 64 || tex_width == 16) {
1702 RADEON_COPY_MT(buffer, data,
1703 (int)(dwords * sizeof(u32)));
1704 } else if (tex_width < 16) {
1705 for (i = 0; i < tex->height; i++) {
1706 RADEON_COPY_MT(buffer, data, tex_width);
1710 } else if (tex_width == 32) {
1711 /* TODO: make sure this works when not fitting in one buffer
1712 (i.e. 32bytes x 2048...) */
1713 for (i = 0; i < tex->height; i += 2) {
1714 RADEON_COPY_MT(buffer, data, 16);
1716 RADEON_COPY_MT(buffer + 8, data, 16);
1718 RADEON_COPY_MT(buffer + 4, data, 16);
1720 RADEON_COPY_MT(buffer + 12, data, 16);
1726 if (tex_width >= 32) {
1727 /* Texture image width is larger than the minimum, so we
1728 * can upload it directly.
1730 RADEON_COPY_MT(buffer, data,
1731 (int)(dwords * sizeof(u32)));
1733 /* Texture image width is less than the minimum, so we
1734 * need to pad out each image scanline to the minimum
1737 for (i = 0; i < tex->height; i++) {
1738 RADEON_COPY_MT(buffer, data, tex_width);
1745 #undef RADEON_COPY_MT
1748 offset = dev_priv->gart_buffers_offset + buf->offset;
1750 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1751 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1752 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1753 RADEON_GMC_BRUSH_NONE |
1755 RADEON_GMC_SRC_DATATYPE_COLOR |
1757 RADEON_DP_SRC_SOURCE_MEMORY |
1758 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1759 OUT_RING((spitch << 22) | (offset >> 10));
1760 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1762 OUT_RING((image->x << 16) | image->y);
1763 OUT_RING((image->width << 16) | height);
1764 RADEON_WAIT_UNTIL_2D_IDLE();
1767 radeon_cp_discard_buffer(dev, buf);
1769 /* Update the input parameters for next time */
1771 image->height -= height;
1772 image->data = (const u8 __user *)image->data + size;
1773 } while (image->height > 0);
1775 /* Flush the pixel cache after the blit completes. This ensures
1776 * the texture data is written out to memory before rendering
1780 RADEON_FLUSH_CACHE();
1781 RADEON_WAIT_UNTIL_2D_IDLE();
1786 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1788 drm_radeon_private_t *dev_priv = dev->dev_private;
1795 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1796 OUT_RING(0x00000000);
1798 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1799 for (i = 0; i < 32; i++) {
1800 OUT_RING(stipple[i]);
1806 static void radeon_apply_surface_regs(int surf_index,
1807 drm_radeon_private_t *dev_priv)
1809 if (!dev_priv->mmio)
1812 radeon_do_cp_idle(dev_priv);
1814 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1815 dev_priv->surfaces[surf_index].flags);
1816 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1817 dev_priv->surfaces[surf_index].lower);
1818 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1819 dev_priv->surfaces[surf_index].upper);
1822 /* Allocates a virtual surface
1823 * doesn't always allocate a real surface, will stretch an existing
1824 * surface when possible.
1826 * Note that refcount can be at most 2, since during a free refcount=3
1827 * might mean we have to allocate a new surface which might not always
1829 * For example : we allocate three contigous surfaces ABC. If B is
1830 * freed, we suddenly need two surfaces to store A and C, which might
1831 * not always be available.
1833 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1834 drm_radeon_private_t *dev_priv, DRMFILE filp)
1836 struct radeon_virt_surface *s;
1838 int virt_surface_index;
1839 uint32_t new_upper, new_lower;
1841 new_lower = new->address;
1842 new_upper = new_lower + new->size - 1;
1845 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1846 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1847 RADEON_SURF_ADDRESS_FIXED_MASK)
1848 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1851 /* make sure there is no overlap with existing surfaces */
1852 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1853 if ((dev_priv->surfaces[i].refcount != 0) &&
1854 (((new_lower >= dev_priv->surfaces[i].lower) &&
1855 (new_lower < dev_priv->surfaces[i].upper)) ||
1856 ((new_lower < dev_priv->surfaces[i].lower) &&
1857 (new_upper > dev_priv->surfaces[i].lower)))) {
1862 /* find a virtual surface */
1863 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1864 if (dev_priv->virt_surfaces[i].filp == 0)
1866 if (i == 2 * RADEON_MAX_SURFACES) {
1869 virt_surface_index = i;
1871 /* try to reuse an existing surface */
1872 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1874 if ((dev_priv->surfaces[i].refcount == 1) &&
1875 (new->flags == dev_priv->surfaces[i].flags) &&
1876 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1877 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1878 s->surface_index = i;
1879 s->lower = new_lower;
1880 s->upper = new_upper;
1881 s->flags = new->flags;
1883 dev_priv->surfaces[i].refcount++;
1884 dev_priv->surfaces[i].lower = s->lower;
1885 radeon_apply_surface_regs(s->surface_index, dev_priv);
1886 return virt_surface_index;
1890 if ((dev_priv->surfaces[i].refcount == 1) &&
1891 (new->flags == dev_priv->surfaces[i].flags) &&
1892 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1893 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1894 s->surface_index = i;
1895 s->lower = new_lower;
1896 s->upper = new_upper;
1897 s->flags = new->flags;
1899 dev_priv->surfaces[i].refcount++;
1900 dev_priv->surfaces[i].upper = s->upper;
1901 radeon_apply_surface_regs(s->surface_index, dev_priv);
1902 return virt_surface_index;
1906 /* okay, we need a new one */
1907 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1908 if (dev_priv->surfaces[i].refcount == 0) {
1909 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1910 s->surface_index = i;
1911 s->lower = new_lower;
1912 s->upper = new_upper;
1913 s->flags = new->flags;
1915 dev_priv->surfaces[i].refcount = 1;
1916 dev_priv->surfaces[i].lower = s->lower;
1917 dev_priv->surfaces[i].upper = s->upper;
1918 dev_priv->surfaces[i].flags = s->flags;
1919 radeon_apply_surface_regs(s->surface_index, dev_priv);
1920 return virt_surface_index;
1924 /* we didn't find anything */
1928 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1931 struct radeon_virt_surface *s;
1933 /* find the virtual surface */
1934 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1935 s = &(dev_priv->virt_surfaces[i]);
1937 if ((lower == s->lower) && (filp == s->filp)) {
1938 if (dev_priv->surfaces[s->surface_index].
1940 dev_priv->surfaces[s->surface_index].
1943 if (dev_priv->surfaces[s->surface_index].
1945 dev_priv->surfaces[s->surface_index].
1948 dev_priv->surfaces[s->surface_index].refcount--;
1949 if (dev_priv->surfaces[s->surface_index].
1951 dev_priv->surfaces[s->surface_index].
1954 radeon_apply_surface_regs(s->surface_index,
1963 static void radeon_surfaces_release(DRMFILE filp,
1964 drm_radeon_private_t * dev_priv)
1967 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1968 if (dev_priv->virt_surfaces[i].filp == filp)
1969 free_surface(filp, dev_priv,
1970 dev_priv->virt_surfaces[i].lower);
1974 /* ================================================================
1977 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1980 drm_radeon_private_t *dev_priv = dev->dev_private;
1981 drm_radeon_surface_alloc_t alloc;
1983 DRM_COPY_FROM_USER_IOCTL(alloc,
1984 (drm_radeon_surface_alloc_t __user *) data,
1987 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1988 return DRM_ERR(EINVAL);
1993 static int radeon_surface_free(DRM_IOCTL_ARGS)
1996 drm_radeon_private_t *dev_priv = dev->dev_private;
1997 drm_radeon_surface_free_t memfree;
1999 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
2002 if (free_surface(filp, dev_priv, memfree.address))
2003 return DRM_ERR(EINVAL);
2008 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2011 drm_radeon_private_t *dev_priv = dev->dev_private;
2012 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2013 drm_radeon_clear_t clear;
2014 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2017 LOCK_TEST_WITH_RETURN(dev, filp);
2019 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2022 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2024 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2025 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2027 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2028 sarea_priv->nbox * sizeof(depth_boxes[0])))
2029 return DRM_ERR(EFAULT);
2031 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2037 /* Not sure why this isn't set all the time:
2039 static int radeon_do_init_pageflip(drm_device_t * dev)
2041 drm_radeon_private_t *dev_priv = dev->dev_private;
2047 RADEON_WAIT_UNTIL_3D_IDLE();
2048 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2049 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2050 RADEON_CRTC_OFFSET_FLIP_CNTL);
2051 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2052 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2053 RADEON_CRTC_OFFSET_FLIP_CNTL);
2056 dev_priv->page_flipping = 1;
2057 dev_priv->current_page = 0;
2058 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2063 /* Called whenever a client dies, from drm_release.
2064 * NOTE: Lock isn't necessarily held when this is called!
2066 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2068 drm_radeon_private_t *dev_priv = dev->dev_private;
2071 if (dev_priv->current_page != 0)
2072 radeon_cp_dispatch_flip(dev);
2074 dev_priv->page_flipping = 0;
2078 /* Swapping and flipping are different operations, need different ioctls.
2079 * They can & should be intermixed to support multiple 3d windows.
2081 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2084 drm_radeon_private_t *dev_priv = dev->dev_private;
2087 LOCK_TEST_WITH_RETURN(dev, filp);
2089 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2091 if (!dev_priv->page_flipping)
2092 radeon_do_init_pageflip(dev);
2094 radeon_cp_dispatch_flip(dev);
2100 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2103 drm_radeon_private_t *dev_priv = dev->dev_private;
2104 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2107 LOCK_TEST_WITH_RETURN(dev, filp);
2109 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2111 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2112 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2114 radeon_cp_dispatch_swap(dev);
2115 dev_priv->sarea_priv->ctx_owner = 0;
2121 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2124 drm_radeon_private_t *dev_priv = dev->dev_private;
2125 drm_file_t *filp_priv;
2126 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2127 drm_device_dma_t *dma = dev->dma;
2129 drm_radeon_vertex_t vertex;
2130 drm_radeon_tcl_prim_t prim;
2132 LOCK_TEST_WITH_RETURN(dev, filp);
2134 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2136 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2139 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2140 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2142 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2143 DRM_ERROR("buffer index %d (of %d max)\n",
2144 vertex.idx, dma->buf_count - 1);
2145 return DRM_ERR(EINVAL);
2147 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2148 DRM_ERROR("buffer prim %d\n", vertex.prim);
2149 return DRM_ERR(EINVAL);
2152 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2153 VB_AGE_TEST_WITH_RETURN(dev_priv);
2155 buf = dma->buflist[vertex.idx];
2157 if (buf->filp != filp) {
2158 DRM_ERROR("process %d using buffer owned by %p\n",
2159 DRM_CURRENTPID, buf->filp);
2160 return DRM_ERR(EINVAL);
2163 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2164 return DRM_ERR(EINVAL);
2167 /* Build up a prim_t record:
2170 buf->used = vertex.count; /* not used? */
2172 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2173 if (radeon_emit_state(dev_priv, filp_priv,
2174 &sarea_priv->context_state,
2175 sarea_priv->tex_state,
2176 sarea_priv->dirty)) {
2177 DRM_ERROR("radeon_emit_state failed\n");
2178 return DRM_ERR(EINVAL);
2181 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2182 RADEON_UPLOAD_TEX1IMAGES |
2183 RADEON_UPLOAD_TEX2IMAGES |
2184 RADEON_REQUIRE_QUIESCENCE);
2188 prim.finish = vertex.count; /* unused */
2189 prim.prim = vertex.prim;
2190 prim.numverts = vertex.count;
2191 prim.vc_format = dev_priv->sarea_priv->vc_format;
2193 radeon_cp_dispatch_vertex(dev, buf, &prim);
2196 if (vertex.discard) {
2197 radeon_cp_discard_buffer(dev, buf);
2204 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2207 drm_radeon_private_t *dev_priv = dev->dev_private;
2208 drm_file_t *filp_priv;
2209 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2210 drm_device_dma_t *dma = dev->dma;
2212 drm_radeon_indices_t elts;
2213 drm_radeon_tcl_prim_t prim;
2216 LOCK_TEST_WITH_RETURN(dev, filp);
2218 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2220 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2223 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2224 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2226 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2227 DRM_ERROR("buffer index %d (of %d max)\n",
2228 elts.idx, dma->buf_count - 1);
2229 return DRM_ERR(EINVAL);
2231 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2232 DRM_ERROR("buffer prim %d\n", elts.prim);
2233 return DRM_ERR(EINVAL);
2236 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2237 VB_AGE_TEST_WITH_RETURN(dev_priv);
2239 buf = dma->buflist[elts.idx];
2241 if (buf->filp != filp) {
2242 DRM_ERROR("process %d using buffer owned by %p\n",
2243 DRM_CURRENTPID, buf->filp);
2244 return DRM_ERR(EINVAL);
2247 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2248 return DRM_ERR(EINVAL);
2251 count = (elts.end - elts.start) / sizeof(u16);
2252 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2254 if (elts.start & 0x7) {
2255 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2256 return DRM_ERR(EINVAL);
2258 if (elts.start < buf->used) {
2259 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2260 return DRM_ERR(EINVAL);
2263 buf->used = elts.end;
2265 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2266 if (radeon_emit_state(dev_priv, filp_priv,
2267 &sarea_priv->context_state,
2268 sarea_priv->tex_state,
2269 sarea_priv->dirty)) {
2270 DRM_ERROR("radeon_emit_state failed\n");
2271 return DRM_ERR(EINVAL);
2274 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2275 RADEON_UPLOAD_TEX1IMAGES |
2276 RADEON_UPLOAD_TEX2IMAGES |
2277 RADEON_REQUIRE_QUIESCENCE);
2280 /* Build up a prim_t record:
2282 prim.start = elts.start;
2283 prim.finish = elts.end;
2284 prim.prim = elts.prim;
2285 prim.offset = 0; /* offset from start of dma buffers */
2286 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2287 prim.vc_format = dev_priv->sarea_priv->vc_format;
2289 radeon_cp_dispatch_indices(dev, buf, &prim);
2291 radeon_cp_discard_buffer(dev, buf);
2298 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2301 drm_radeon_private_t *dev_priv = dev->dev_private;
2302 drm_radeon_texture_t tex;
2303 drm_radeon_tex_image_t image;
2306 LOCK_TEST_WITH_RETURN(dev, filp);
2308 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2311 if (tex.image == NULL) {
2312 DRM_ERROR("null texture image!\n");
2313 return DRM_ERR(EINVAL);
2316 if (DRM_COPY_FROM_USER(&image,
2317 (drm_radeon_tex_image_t __user *) tex.image,
2319 return DRM_ERR(EFAULT);
2321 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2322 VB_AGE_TEST_WITH_RETURN(dev_priv);
2324 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2330 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2333 drm_radeon_private_t *dev_priv = dev->dev_private;
2334 drm_radeon_stipple_t stipple;
2337 LOCK_TEST_WITH_RETURN(dev, filp);
2339 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2342 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2343 return DRM_ERR(EFAULT);
2345 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2347 radeon_cp_dispatch_stipple(dev, mask);
2353 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2356 drm_radeon_private_t *dev_priv = dev->dev_private;
2357 drm_device_dma_t *dma = dev->dma;
2359 drm_radeon_indirect_t indirect;
2362 LOCK_TEST_WITH_RETURN(dev, filp);
2364 DRM_COPY_FROM_USER_IOCTL(indirect,
2365 (drm_radeon_indirect_t __user *) data,
2368 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2369 indirect.idx, indirect.start, indirect.end, indirect.discard);
2371 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2372 DRM_ERROR("buffer index %d (of %d max)\n",
2373 indirect.idx, dma->buf_count - 1);
2374 return DRM_ERR(EINVAL);
2377 buf = dma->buflist[indirect.idx];
2379 if (buf->filp != filp) {
2380 DRM_ERROR("process %d using buffer owned by %p\n",
2381 DRM_CURRENTPID, buf->filp);
2382 return DRM_ERR(EINVAL);
2385 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2386 return DRM_ERR(EINVAL);
2389 if (indirect.start < buf->used) {
2390 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2391 indirect.start, buf->used);
2392 return DRM_ERR(EINVAL);
2395 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2396 VB_AGE_TEST_WITH_RETURN(dev_priv);
2398 buf->used = indirect.end;
2400 /* Wait for the 3D stream to idle before the indirect buffer
2401 * containing 2D acceleration commands is processed.
2405 RADEON_WAIT_UNTIL_3D_IDLE();
2409 /* Dispatch the indirect buffer full of commands from the
2410 * X server. This is insecure and is thus only available to
2411 * privileged clients.
2413 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2414 if (indirect.discard) {
2415 radeon_cp_discard_buffer(dev, buf);
2422 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2425 drm_radeon_private_t *dev_priv = dev->dev_private;
2426 drm_file_t *filp_priv;
2427 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2428 drm_device_dma_t *dma = dev->dma;
2430 drm_radeon_vertex2_t vertex;
2432 unsigned char laststate;
2434 LOCK_TEST_WITH_RETURN(dev, filp);
2436 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2438 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2441 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2442 DRM_CURRENTPID, vertex.idx, vertex.discard);
2444 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2445 DRM_ERROR("buffer index %d (of %d max)\n",
2446 vertex.idx, dma->buf_count - 1);
2447 return DRM_ERR(EINVAL);
2450 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2451 VB_AGE_TEST_WITH_RETURN(dev_priv);
2453 buf = dma->buflist[vertex.idx];
2455 if (buf->filp != filp) {
2456 DRM_ERROR("process %d using buffer owned by %p\n",
2457 DRM_CURRENTPID, buf->filp);
2458 return DRM_ERR(EINVAL);
2462 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2463 return DRM_ERR(EINVAL);
2466 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2467 return DRM_ERR(EINVAL);
2469 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2470 drm_radeon_prim_t prim;
2471 drm_radeon_tcl_prim_t tclprim;
2473 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2474 return DRM_ERR(EFAULT);
2476 if (prim.stateidx != laststate) {
2477 drm_radeon_state_t state;
2479 if (DRM_COPY_FROM_USER(&state,
2480 &vertex.state[prim.stateidx],
2482 return DRM_ERR(EFAULT);
2484 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2485 DRM_ERROR("radeon_emit_state2 failed\n");
2486 return DRM_ERR(EINVAL);
2489 laststate = prim.stateidx;
2492 tclprim.start = prim.start;
2493 tclprim.finish = prim.finish;
2494 tclprim.prim = prim.prim;
2495 tclprim.vc_format = prim.vc_format;
2497 if (prim.prim & RADEON_PRIM_WALK_IND) {
2498 tclprim.offset = prim.numverts * 64;
2499 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2501 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2503 tclprim.numverts = prim.numverts;
2504 tclprim.offset = 0; /* not used */
2506 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2509 if (sarea_priv->nbox == 1)
2510 sarea_priv->nbox = 0;
2513 if (vertex.discard) {
2514 radeon_cp_discard_buffer(dev, buf);
2521 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2522 drm_file_t * filp_priv,
2523 drm_radeon_cmd_header_t header,
2524 drm_radeon_kcmd_buffer_t *cmdbuf)
2526 int id = (int)header.packet.packet_id;
2528 int *data = (int *)cmdbuf->buf;
2531 if (id >= RADEON_MAX_STATE_PACKETS)
2532 return DRM_ERR(EINVAL);
2534 sz = packet[id].len;
2535 reg = packet[id].start;
2537 if (sz * sizeof(int) > cmdbuf->bufsz) {
2538 DRM_ERROR("Packet size provided larger than data provided\n");
2539 return DRM_ERR(EINVAL);
2542 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2543 DRM_ERROR("Packet verification failed\n");
2544 return DRM_ERR(EINVAL);
2548 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2549 OUT_RING_TABLE(data, sz);
2552 cmdbuf->buf += sz * sizeof(int);
2553 cmdbuf->bufsz -= sz * sizeof(int);
2557 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2558 drm_radeon_cmd_header_t header,
2559 drm_radeon_kcmd_buffer_t *cmdbuf)
2561 int sz = header.scalars.count;
2562 int start = header.scalars.offset;
2563 int stride = header.scalars.stride;
2567 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2568 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2569 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2570 OUT_RING_TABLE(cmdbuf->buf, sz);
2572 cmdbuf->buf += sz * sizeof(int);
2573 cmdbuf->bufsz -= sz * sizeof(int);
2579 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2580 drm_radeon_cmd_header_t header,
2581 drm_radeon_kcmd_buffer_t *cmdbuf)
2583 int sz = header.scalars.count;
2584 int start = ((unsigned int)header.scalars.offset) + 0x100;
2585 int stride = header.scalars.stride;
2589 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2590 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2591 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2592 OUT_RING_TABLE(cmdbuf->buf, sz);
2594 cmdbuf->buf += sz * sizeof(int);
2595 cmdbuf->bufsz -= sz * sizeof(int);
2599 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2600 drm_radeon_cmd_header_t header,
2601 drm_radeon_kcmd_buffer_t *cmdbuf)
2603 int sz = header.vectors.count;
2604 int start = header.vectors.offset;
2605 int stride = header.vectors.stride;
2609 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2610 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2611 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2612 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2613 OUT_RING_TABLE(cmdbuf->buf, sz);
2616 cmdbuf->buf += sz * sizeof(int);
2617 cmdbuf->bufsz -= sz * sizeof(int);
2621 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2622 drm_radeon_cmd_header_t header,
2623 drm_radeon_kcmd_buffer_t *cmdbuf)
2625 int sz = header.veclinear.count * 4;
2626 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2631 if (sz * 4 > cmdbuf->bufsz)
2632 return DRM_ERR(EINVAL);
2635 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2636 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2637 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2638 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2639 OUT_RING_TABLE(cmdbuf->buf, sz);
2642 cmdbuf->buf += sz * sizeof(int);
2643 cmdbuf->bufsz -= sz * sizeof(int);
2647 static int radeon_emit_packet3(drm_device_t * dev,
2648 drm_file_t * filp_priv,
2649 drm_radeon_kcmd_buffer_t *cmdbuf)
2651 drm_radeon_private_t *dev_priv = dev->dev_private;
2658 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2660 DRM_ERROR("Packet verification failed\n");
2665 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2668 cmdbuf->buf += cmdsz * 4;
2669 cmdbuf->bufsz -= cmdsz * 4;
2673 static int radeon_emit_packet3_cliprect(drm_device_t *dev,
2674 drm_file_t *filp_priv,
2675 drm_radeon_kcmd_buffer_t *cmdbuf,
2678 drm_radeon_private_t *dev_priv = dev->dev_private;
2679 drm_clip_rect_t box;
2682 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2688 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2690 DRM_ERROR("Packet verification failed\n");
2698 if (i < cmdbuf->nbox) {
2699 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2700 return DRM_ERR(EFAULT);
2701 /* FIXME The second and subsequent times round
2702 * this loop, send a WAIT_UNTIL_3D_IDLE before
2703 * calling emit_clip_rect(). This fixes a
2704 * lockup on fast machines when sending
2705 * several cliprects with a cmdbuf, as when
2706 * waving a 2D window over a 3D
2707 * window. Something in the commands from user
2708 * space seems to hang the card when they're
2709 * sent several times in a row. That would be
2710 * the correct place to fix it but this works
2711 * around it until I can figure that out - Tim
2715 RADEON_WAIT_UNTIL_3D_IDLE();
2718 radeon_emit_clip_rect(dev_priv, &box);
2722 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2725 } while (++i < cmdbuf->nbox);
2726 if (cmdbuf->nbox == 1)
2730 cmdbuf->buf += cmdsz * 4;
2731 cmdbuf->bufsz -= cmdsz * 4;
2735 static int radeon_emit_wait(drm_device_t * dev, int flags)
2737 drm_radeon_private_t *dev_priv = dev->dev_private;
2740 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2742 case RADEON_WAIT_2D:
2744 RADEON_WAIT_UNTIL_2D_IDLE();
2747 case RADEON_WAIT_3D:
2749 RADEON_WAIT_UNTIL_3D_IDLE();
2752 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2754 RADEON_WAIT_UNTIL_IDLE();
2758 return DRM_ERR(EINVAL);
2764 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2767 drm_radeon_private_t *dev_priv = dev->dev_private;
2768 drm_file_t *filp_priv;
2769 drm_device_dma_t *dma = dev->dma;
2770 drm_buf_t *buf = NULL;
2772 drm_radeon_kcmd_buffer_t cmdbuf;
2773 drm_radeon_cmd_header_t header;
2774 int orig_nbox, orig_bufsz;
2777 LOCK_TEST_WITH_RETURN(dev, filp);
2779 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2781 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2782 (drm_radeon_cmd_buffer_t __user *) data,
2785 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2786 VB_AGE_TEST_WITH_RETURN(dev_priv);
2788 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2789 return DRM_ERR(EINVAL);
2792 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2793 * races between checking values and using those values in other code,
2794 * and simply to avoid a lot of function calls to copy in data.
2796 orig_bufsz = cmdbuf.bufsz;
2797 if (orig_bufsz != 0) {
2798 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2800 return DRM_ERR(ENOMEM);
2801 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
2803 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2804 return DRM_ERR(EFAULT);
2809 orig_nbox = cmdbuf.nbox;
2811 if (dev_priv->microcode_version == UCODE_R300) {
2813 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2815 if (orig_bufsz != 0)
2816 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2821 /* microcode_version != r300 */
2822 while (cmdbuf.bufsz >= sizeof(header)) {
2824 header.i = *(int *)cmdbuf.buf;
2825 cmdbuf.buf += sizeof(header);
2826 cmdbuf.bufsz -= sizeof(header);
2828 switch (header.header.cmd_type) {
2829 case RADEON_CMD_PACKET:
2830 DRM_DEBUG("RADEON_CMD_PACKET\n");
2831 if (radeon_emit_packets
2832 (dev_priv, filp_priv, header, &cmdbuf)) {
2833 DRM_ERROR("radeon_emit_packets failed\n");
2838 case RADEON_CMD_SCALARS:
2839 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2840 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2841 DRM_ERROR("radeon_emit_scalars failed\n");
2846 case RADEON_CMD_VECTORS:
2847 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2848 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2849 DRM_ERROR("radeon_emit_vectors failed\n");
2854 case RADEON_CMD_DMA_DISCARD:
2855 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2856 idx = header.dma.buf_idx;
2857 if (idx < 0 || idx >= dma->buf_count) {
2858 DRM_ERROR("buffer index %d (of %d max)\n",
2859 idx, dma->buf_count - 1);
2863 buf = dma->buflist[idx];
2864 if (buf->filp != filp || buf->pending) {
2865 DRM_ERROR("bad buffer %p %p %d\n",
2866 buf->filp, filp, buf->pending);
2870 radeon_cp_discard_buffer(dev, buf);
2873 case RADEON_CMD_PACKET3:
2874 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2875 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2876 DRM_ERROR("radeon_emit_packet3 failed\n");
2881 case RADEON_CMD_PACKET3_CLIP:
2882 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2883 if (radeon_emit_packet3_cliprect
2884 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2885 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2890 case RADEON_CMD_SCALARS2:
2891 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2892 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2893 DRM_ERROR("radeon_emit_scalars2 failed\n");
2898 case RADEON_CMD_WAIT:
2899 DRM_DEBUG("RADEON_CMD_WAIT\n");
2900 if (radeon_emit_wait(dev, header.wait.flags)) {
2901 DRM_ERROR("radeon_emit_wait failed\n");
2905 case RADEON_CMD_VECLINEAR:
2906 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2907 if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
2908 DRM_ERROR("radeon_emit_veclinear failed\n");
2914 DRM_ERROR("bad cmd_type %d at %p\n",
2915 header.header.cmd_type,
2916 cmdbuf.buf - sizeof(header));
2921 if (orig_bufsz != 0)
2922 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2924 DRM_DEBUG("DONE\n");
2929 if (orig_bufsz != 0)
2930 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2931 return DRM_ERR(EINVAL);
2934 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2937 drm_radeon_private_t *dev_priv = dev->dev_private;
2938 drm_radeon_getparam_t param;
2941 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2944 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2946 switch (param.param) {
2947 case RADEON_PARAM_GART_BUFFER_OFFSET:
2948 value = dev_priv->gart_buffers_offset;
2950 case RADEON_PARAM_LAST_FRAME:
2951 dev_priv->stats.last_frame_reads++;
2952 value = GET_SCRATCH(0);
2954 case RADEON_PARAM_LAST_DISPATCH:
2955 value = GET_SCRATCH(1);
2957 case RADEON_PARAM_LAST_CLEAR:
2958 dev_priv->stats.last_clear_reads++;
2959 value = GET_SCRATCH(2);
2961 case RADEON_PARAM_IRQ_NR:
2964 case RADEON_PARAM_GART_BASE:
2965 value = dev_priv->gart_vm_start;
2967 case RADEON_PARAM_REGISTER_HANDLE:
2968 value = dev_priv->mmio->offset;
2970 case RADEON_PARAM_STATUS_HANDLE:
2971 value = dev_priv->ring_rptr_offset;
2973 #if BITS_PER_LONG == 32
2975 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2976 * pointer which can't fit into an int-sized variable. According to
2977 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2978 * not supporting it shouldn't be a problem. If the same functionality
2979 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2980 * so backwards-compatibility for the embedded platforms can be
2981 * maintained. --davidm 4-Feb-2004.
2983 case RADEON_PARAM_SAREA_HANDLE:
2984 /* The lock is the first dword in the sarea. */
2985 value = (long)dev->lock.hw_lock;
2988 case RADEON_PARAM_GART_TEX_HANDLE:
2989 value = dev_priv->gart_textures_offset;
2991 case RADEON_PARAM_SCRATCH_OFFSET:
2992 if (!dev_priv->writeback_works)
2993 return DRM_ERR(EINVAL);
2994 value = RADEON_SCRATCH_REG_OFFSET;
2996 case RADEON_PARAM_CARD_TYPE:
2997 if (dev_priv->flags & RADEON_IS_PCIE)
2998 value = RADEON_CARD_PCIE;
2999 else if (dev_priv->flags & RADEON_IS_AGP)
3000 value = RADEON_CARD_AGP;
3002 value = RADEON_CARD_PCI;
3005 DRM_DEBUG("Invalid parameter %d\n", param.param);
3006 return DRM_ERR(EINVAL);
3009 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3010 DRM_ERROR("copy_to_user\n");
3011 return DRM_ERR(EFAULT);
3017 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3020 drm_radeon_private_t *dev_priv = dev->dev_private;
3021 drm_file_t *filp_priv;
3022 drm_radeon_setparam_t sp;
3023 struct drm_radeon_driver_file_fields *radeon_priv;
3025 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3027 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3031 case RADEON_SETPARAM_FB_LOCATION:
3032 radeon_priv = filp_priv->driver_priv;
3033 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3035 case RADEON_SETPARAM_SWITCH_TILING:
3036 if (sp.value == 0) {
3037 DRM_DEBUG("color tiling disabled\n");
3038 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3039 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3040 dev_priv->sarea_priv->tiling_enabled = 0;
3041 } else if (sp.value == 1) {
3042 DRM_DEBUG("color tiling enabled\n");
3043 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3044 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3045 dev_priv->sarea_priv->tiling_enabled = 1;
3048 case RADEON_SETPARAM_PCIGART_LOCATION:
3049 dev_priv->pcigart_offset = sp.value;
3051 case RADEON_SETPARAM_NEW_MEMMAP:
3052 dev_priv->new_memmap = sp.value;
3055 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3056 return DRM_ERR(EINVAL);
3062 /* When a client dies:
3063 * - Check for and clean up flipped page state
3064 * - Free any alloced GART memory.
3065 * - Free any alloced radeon surfaces.
3067 * DRM infrastructure takes care of reclaiming dma buffers.
3069 void radeon_driver_preclose(drm_device_t * dev, DRMFILE filp)
3071 if (dev->dev_private) {
3072 drm_radeon_private_t *dev_priv = dev->dev_private;
3073 if (dev_priv->page_flipping) {
3074 radeon_do_cleanup_pageflip(dev);
3076 radeon_mem_release(filp, dev_priv->gart_heap);
3077 radeon_mem_release(filp, dev_priv->fb_heap);
3078 radeon_surfaces_release(filp, dev_priv);
3082 void radeon_driver_lastclose(drm_device_t * dev)
3084 radeon_do_release(dev);
3087 int radeon_driver_open(drm_device_t * dev, drm_file_t * filp_priv)
3089 drm_radeon_private_t *dev_priv = dev->dev_private;
3090 struct drm_radeon_driver_file_fields *radeon_priv;
3094 (struct drm_radeon_driver_file_fields *)
3095 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3100 filp_priv->driver_priv = radeon_priv;
3103 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3105 radeon_priv->radeon_fb_delta = 0;
3109 void radeon_driver_postclose(drm_device_t * dev, drm_file_t * filp_priv)
3111 struct drm_radeon_driver_file_fields *radeon_priv =
3112 filp_priv->driver_priv;
3114 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3117 drm_ioctl_desc_t radeon_ioctls[] = {
3118 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3119 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3120 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3121 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3122 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
3123 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
3124 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
3125 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
3126 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
3127 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
3128 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
3129 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
3130 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
3131 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
3132 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3133 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
3134 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
3135 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
3136 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
3137 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
3138 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
3139 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
3140 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
3141 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
3142 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
3143 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
3144 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
3147 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);