1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
42 drm_file_t * filp_priv,
46 struct drm_radeon_driver_file_fields *radeon_priv;
48 if (off >= dev_priv->fb_location &&
49 off < (dev_priv->gart_vm_start + dev_priv->gart_size))
52 radeon_priv = filp_priv->driver_priv;
53 off += radeon_priv->radeon_fb_delta;
55 DRM_DEBUG("offset fixed up to 0x%x\n", off);
57 if (off < dev_priv->fb_location ||
58 off >= (dev_priv->gart_vm_start + dev_priv->gart_size))
59 return DRM_ERR(EINVAL);
66 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
68 drm_file_t * filp_priv,
73 case RADEON_EMIT_PP_MISC:
74 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
75 &data[(RADEON_RB3D_DEPTHOFFSET
79 DRM_ERROR("Invalid depth buffer offset\n");
80 return DRM_ERR(EINVAL);
84 case RADEON_EMIT_PP_CNTL:
85 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
86 &data[(RADEON_RB3D_COLOROFFSET
90 DRM_ERROR("Invalid colour buffer offset\n");
91 return DRM_ERR(EINVAL);
95 case R200_EMIT_PP_TXOFFSET_0:
96 case R200_EMIT_PP_TXOFFSET_1:
97 case R200_EMIT_PP_TXOFFSET_2:
98 case R200_EMIT_PP_TXOFFSET_3:
99 case R200_EMIT_PP_TXOFFSET_4:
100 case R200_EMIT_PP_TXOFFSET_5:
101 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
103 DRM_ERROR("Invalid R200 texture offset\n");
104 return DRM_ERR(EINVAL);
108 case RADEON_EMIT_PP_TXFILTER_0:
109 case RADEON_EMIT_PP_TXFILTER_1:
110 case RADEON_EMIT_PP_TXFILTER_2:
111 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
112 &data[(RADEON_PP_TXOFFSET_0
114 RADEON_PP_TXFILTER_0) /
116 DRM_ERROR("Invalid R100 texture offset\n");
117 return DRM_ERR(EINVAL);
121 case R200_EMIT_PP_CUBIC_OFFSETS_0:
122 case R200_EMIT_PP_CUBIC_OFFSETS_1:
123 case R200_EMIT_PP_CUBIC_OFFSETS_2:
124 case R200_EMIT_PP_CUBIC_OFFSETS_3:
125 case R200_EMIT_PP_CUBIC_OFFSETS_4:
126 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
128 for (i = 0; i < 5; i++) {
129 if (radeon_check_and_fixup_offset
130 (dev_priv, filp_priv, &data[i])) {
132 ("Invalid R200 cubic texture offset\n");
133 return DRM_ERR(EINVAL);
139 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
140 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
141 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
143 for (i = 0; i < 5; i++) {
144 if (radeon_check_and_fixup_offset(dev_priv,
148 ("Invalid R100 cubic texture offset\n");
149 return DRM_ERR(EINVAL);
155 case RADEON_EMIT_RB3D_COLORPITCH:
156 case RADEON_EMIT_RE_LINE_PATTERN:
157 case RADEON_EMIT_SE_LINE_WIDTH:
158 case RADEON_EMIT_PP_LUM_MATRIX:
159 case RADEON_EMIT_PP_ROT_MATRIX_0:
160 case RADEON_EMIT_RB3D_STENCILREFMASK:
161 case RADEON_EMIT_SE_VPORT_XSCALE:
162 case RADEON_EMIT_SE_CNTL:
163 case RADEON_EMIT_SE_CNTL_STATUS:
164 case RADEON_EMIT_RE_MISC:
165 case RADEON_EMIT_PP_BORDER_COLOR_0:
166 case RADEON_EMIT_PP_BORDER_COLOR_1:
167 case RADEON_EMIT_PP_BORDER_COLOR_2:
168 case RADEON_EMIT_SE_ZBIAS_FACTOR:
169 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
170 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
171 case R200_EMIT_PP_TXCBLEND_0:
172 case R200_EMIT_PP_TXCBLEND_1:
173 case R200_EMIT_PP_TXCBLEND_2:
174 case R200_EMIT_PP_TXCBLEND_3:
175 case R200_EMIT_PP_TXCBLEND_4:
176 case R200_EMIT_PP_TXCBLEND_5:
177 case R200_EMIT_PP_TXCBLEND_6:
178 case R200_EMIT_PP_TXCBLEND_7:
179 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
180 case R200_EMIT_TFACTOR_0:
181 case R200_EMIT_VTX_FMT_0:
182 case R200_EMIT_VAP_CTL:
183 case R200_EMIT_MATRIX_SELECT_0:
184 case R200_EMIT_TEX_PROC_CTL_2:
185 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
186 case R200_EMIT_PP_TXFILTER_0:
187 case R200_EMIT_PP_TXFILTER_1:
188 case R200_EMIT_PP_TXFILTER_2:
189 case R200_EMIT_PP_TXFILTER_3:
190 case R200_EMIT_PP_TXFILTER_4:
191 case R200_EMIT_PP_TXFILTER_5:
192 case R200_EMIT_VTE_CNTL:
193 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
194 case R200_EMIT_PP_TAM_DEBUG3:
195 case R200_EMIT_PP_CNTL_X:
196 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
197 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
198 case R200_EMIT_RE_SCISSOR_TL_0:
199 case R200_EMIT_RE_SCISSOR_TL_1:
200 case R200_EMIT_RE_SCISSOR_TL_2:
201 case R200_EMIT_SE_VAP_CNTL_STATUS:
202 case R200_EMIT_SE_VTX_STATE_CNTL:
203 case R200_EMIT_RE_POINTSIZE:
204 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
205 case R200_EMIT_PP_CUBIC_FACES_0:
206 case R200_EMIT_PP_CUBIC_FACES_1:
207 case R200_EMIT_PP_CUBIC_FACES_2:
208 case R200_EMIT_PP_CUBIC_FACES_3:
209 case R200_EMIT_PP_CUBIC_FACES_4:
210 case R200_EMIT_PP_CUBIC_FACES_5:
211 case RADEON_EMIT_PP_TEX_SIZE_0:
212 case RADEON_EMIT_PP_TEX_SIZE_1:
213 case RADEON_EMIT_PP_TEX_SIZE_2:
214 case R200_EMIT_RB3D_BLENDCOLOR:
215 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
216 case RADEON_EMIT_PP_CUBIC_FACES_0:
217 case RADEON_EMIT_PP_CUBIC_FACES_1:
218 case RADEON_EMIT_PP_CUBIC_FACES_2:
219 case R200_EMIT_PP_TRI_PERF_CNTL:
220 case R200_EMIT_PP_AFS_0:
221 case R200_EMIT_PP_AFS_1:
222 case R200_EMIT_ATF_TFACTOR:
223 case R200_EMIT_PP_TXCTLALL_0:
224 case R200_EMIT_PP_TXCTLALL_1:
225 case R200_EMIT_PP_TXCTLALL_2:
226 case R200_EMIT_PP_TXCTLALL_3:
227 case R200_EMIT_PP_TXCTLALL_4:
228 case R200_EMIT_PP_TXCTLALL_5:
229 /* These packets don't contain memory offsets */
233 DRM_ERROR("Unknown state packet ID %d\n", id);
234 return DRM_ERR(EINVAL);
240 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
242 drm_file_t * filp_priv,
243 drm_radeon_kcmd_buffer_t *cmdbuf,
246 u32 *cmd = (u32 *) cmdbuf->buf;
248 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
250 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
251 DRM_ERROR("Not a type 3 packet\n");
252 return DRM_ERR(EINVAL);
255 if (4 * *cmdsz > cmdbuf->bufsz) {
256 DRM_ERROR("Packet size larger than size of data provided\n");
257 return DRM_ERR(EINVAL);
260 /* Check client state and fix it up if necessary */
261 if (cmd[0] & 0x8000) { /* MSB of opcode: next DWORD GUI_CNTL */
264 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
265 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
266 offset = cmd[2] << 10;
267 if (radeon_check_and_fixup_offset
268 (dev_priv, filp_priv, &offset)) {
269 DRM_ERROR("Invalid first packet offset\n");
270 return DRM_ERR(EINVAL);
272 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
275 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
276 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
277 offset = cmd[3] << 10;
278 if (radeon_check_and_fixup_offset
279 (dev_priv, filp_priv, &offset)) {
280 DRM_ERROR("Invalid second packet offset\n");
281 return DRM_ERR(EINVAL);
283 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
290 /* ================================================================
291 * CP hardware state programming functions
294 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
295 drm_clip_rect_t * box)
299 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
300 box->x1, box->y1, box->x2, box->y2);
303 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
304 OUT_RING((box->y1 << 16) | box->x1);
305 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
306 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
312 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
313 drm_file_t * filp_priv,
314 drm_radeon_context_regs_t * ctx,
315 drm_radeon_texture_regs_t * tex,
319 DRM_DEBUG("dirty=0x%08x\n", dirty);
321 if (dirty & RADEON_UPLOAD_CONTEXT) {
322 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
323 &ctx->rb3d_depthoffset)) {
324 DRM_ERROR("Invalid depth buffer offset\n");
325 return DRM_ERR(EINVAL);
328 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
329 &ctx->rb3d_coloroffset)) {
330 DRM_ERROR("Invalid depth buffer offset\n");
331 return DRM_ERR(EINVAL);
335 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
336 OUT_RING(ctx->pp_misc);
337 OUT_RING(ctx->pp_fog_color);
338 OUT_RING(ctx->re_solid_color);
339 OUT_RING(ctx->rb3d_blendcntl);
340 OUT_RING(ctx->rb3d_depthoffset);
341 OUT_RING(ctx->rb3d_depthpitch);
342 OUT_RING(ctx->rb3d_zstencilcntl);
343 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
344 OUT_RING(ctx->pp_cntl);
345 OUT_RING(ctx->rb3d_cntl);
346 OUT_RING(ctx->rb3d_coloroffset);
347 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
348 OUT_RING(ctx->rb3d_colorpitch);
352 if (dirty & RADEON_UPLOAD_VERTFMT) {
354 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
355 OUT_RING(ctx->se_coord_fmt);
359 if (dirty & RADEON_UPLOAD_LINE) {
361 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
362 OUT_RING(ctx->re_line_pattern);
363 OUT_RING(ctx->re_line_state);
364 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
365 OUT_RING(ctx->se_line_width);
369 if (dirty & RADEON_UPLOAD_BUMPMAP) {
371 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
372 OUT_RING(ctx->pp_lum_matrix);
373 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
374 OUT_RING(ctx->pp_rot_matrix_0);
375 OUT_RING(ctx->pp_rot_matrix_1);
379 if (dirty & RADEON_UPLOAD_MASKS) {
381 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
382 OUT_RING(ctx->rb3d_stencilrefmask);
383 OUT_RING(ctx->rb3d_ropcntl);
384 OUT_RING(ctx->rb3d_planemask);
388 if (dirty & RADEON_UPLOAD_VIEWPORT) {
390 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
391 OUT_RING(ctx->se_vport_xscale);
392 OUT_RING(ctx->se_vport_xoffset);
393 OUT_RING(ctx->se_vport_yscale);
394 OUT_RING(ctx->se_vport_yoffset);
395 OUT_RING(ctx->se_vport_zscale);
396 OUT_RING(ctx->se_vport_zoffset);
400 if (dirty & RADEON_UPLOAD_SETUP) {
402 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
403 OUT_RING(ctx->se_cntl);
404 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
405 OUT_RING(ctx->se_cntl_status);
409 if (dirty & RADEON_UPLOAD_MISC) {
411 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
412 OUT_RING(ctx->re_misc);
416 if (dirty & RADEON_UPLOAD_TEX0) {
417 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
418 &tex[0].pp_txoffset)) {
419 DRM_ERROR("Invalid texture offset for unit 0\n");
420 return DRM_ERR(EINVAL);
424 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
425 OUT_RING(tex[0].pp_txfilter);
426 OUT_RING(tex[0].pp_txformat);
427 OUT_RING(tex[0].pp_txoffset);
428 OUT_RING(tex[0].pp_txcblend);
429 OUT_RING(tex[0].pp_txablend);
430 OUT_RING(tex[0].pp_tfactor);
431 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
432 OUT_RING(tex[0].pp_border_color);
436 if (dirty & RADEON_UPLOAD_TEX1) {
437 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
438 &tex[1].pp_txoffset)) {
439 DRM_ERROR("Invalid texture offset for unit 1\n");
440 return DRM_ERR(EINVAL);
444 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
445 OUT_RING(tex[1].pp_txfilter);
446 OUT_RING(tex[1].pp_txformat);
447 OUT_RING(tex[1].pp_txoffset);
448 OUT_RING(tex[1].pp_txcblend);
449 OUT_RING(tex[1].pp_txablend);
450 OUT_RING(tex[1].pp_tfactor);
451 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
452 OUT_RING(tex[1].pp_border_color);
456 if (dirty & RADEON_UPLOAD_TEX2) {
457 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
458 &tex[2].pp_txoffset)) {
459 DRM_ERROR("Invalid texture offset for unit 2\n");
460 return DRM_ERR(EINVAL);
464 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
465 OUT_RING(tex[2].pp_txfilter);
466 OUT_RING(tex[2].pp_txformat);
467 OUT_RING(tex[2].pp_txoffset);
468 OUT_RING(tex[2].pp_txcblend);
469 OUT_RING(tex[2].pp_txablend);
470 OUT_RING(tex[2].pp_tfactor);
471 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
472 OUT_RING(tex[2].pp_border_color);
481 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
482 drm_file_t * filp_priv,
483 drm_radeon_state_t * state)
487 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
489 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
490 OUT_RING(state->context2.se_zbias_factor);
491 OUT_RING(state->context2.se_zbias_constant);
495 return radeon_emit_state(dev_priv, filp_priv, &state->context,
496 state->tex, state->dirty);
499 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
500 * 1.3 cmdbuffers allow all previous state to be updated as well as
501 * the tcl scalar and vector areas.
507 } packet[RADEON_MAX_STATE_PACKETS] = {
508 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
509 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
510 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
511 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
512 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
513 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
514 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
515 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
516 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
517 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
518 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
519 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
520 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
521 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
522 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
523 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
524 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
525 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
526 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
527 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
528 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
529 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
530 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
531 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
532 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
533 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
534 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
535 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
536 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
537 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
538 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
539 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
540 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
541 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
542 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
543 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
544 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
545 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
546 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
547 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
548 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
549 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
550 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
551 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
552 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
553 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
554 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
555 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
556 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
557 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
558 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
559 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
560 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
561 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
562 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
563 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
564 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
565 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
566 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
567 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
568 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
569 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
570 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
571 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
572 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
573 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
574 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
575 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
576 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
577 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
578 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
579 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
580 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
581 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
582 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
583 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
584 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
585 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
586 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
587 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
588 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
589 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
590 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
591 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
592 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
593 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
594 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
595 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
596 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
597 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
598 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
599 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
600 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
601 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
602 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
603 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
606 /* ================================================================
607 * Performance monitoring functions
610 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
611 int x, int y, int w, int h, int r, int g, int b)
616 x += dev_priv->sarea_priv->boxes[0].x1;
617 y += dev_priv->sarea_priv->boxes[0].y1;
619 switch (dev_priv->color_fmt) {
620 case RADEON_COLOR_FORMAT_RGB565:
621 color = (((r & 0xf8) << 8) |
622 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
624 case RADEON_COLOR_FORMAT_ARGB8888:
626 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
631 RADEON_WAIT_UNTIL_3D_IDLE();
632 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
633 OUT_RING(0xffffffff);
638 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
639 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
640 RADEON_GMC_BRUSH_SOLID_COLOR |
641 (dev_priv->color_fmt << 8) |
642 RADEON_GMC_SRC_DATATYPE_COLOR |
643 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
645 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
646 OUT_RING(dev_priv->front_pitch_offset);
648 OUT_RING(dev_priv->back_pitch_offset);
653 OUT_RING((x << 16) | y);
654 OUT_RING((w << 16) | h);
659 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
661 /* Collapse various things into a wait flag -- trying to
662 * guess if userspase slept -- better just to have them tell us.
664 if (dev_priv->stats.last_frame_reads > 1 ||
665 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
666 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
669 if (dev_priv->stats.freelist_loops) {
670 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
673 /* Purple box for page flipping
675 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
676 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
678 /* Red box if we have to wait for idle at any point
680 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
681 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
683 /* Blue box: lost context?
686 /* Yellow box for texture swaps
688 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
689 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
691 /* Green box if hardware never idles (as far as we can tell)
693 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
694 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
696 /* Draw bars indicating number of buffers allocated
697 * (not a great measure, easily confused)
699 if (dev_priv->stats.requested_bufs) {
700 if (dev_priv->stats.requested_bufs > 100)
701 dev_priv->stats.requested_bufs = 100;
703 radeon_clear_box(dev_priv, 4, 16,
704 dev_priv->stats.requested_bufs, 4,
708 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
712 /* ================================================================
713 * CP command dispatch functions
716 static void radeon_cp_dispatch_clear(drm_device_t * dev,
717 drm_radeon_clear_t * clear,
718 drm_radeon_clear_rect_t * depth_boxes)
720 drm_radeon_private_t *dev_priv = dev->dev_private;
721 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
722 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
723 int nbox = sarea_priv->nbox;
724 drm_clip_rect_t *pbox = sarea_priv->boxes;
725 unsigned int flags = clear->flags;
726 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
729 DRM_DEBUG("flags = 0x%x\n", flags);
731 dev_priv->stats.clears++;
733 if (dev_priv->page_flipping && dev_priv->current_page == 1) {
734 unsigned int tmp = flags;
736 flags &= ~(RADEON_FRONT | RADEON_BACK);
737 if (tmp & RADEON_FRONT)
738 flags |= RADEON_BACK;
739 if (tmp & RADEON_BACK)
740 flags |= RADEON_FRONT;
743 if (flags & (RADEON_FRONT | RADEON_BACK)) {
747 /* Ensure the 3D stream is idle before doing a
748 * 2D fill to clear the front or back buffer.
750 RADEON_WAIT_UNTIL_3D_IDLE();
752 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
753 OUT_RING(clear->color_mask);
757 /* Make sure we restore the 3D state next time.
759 dev_priv->sarea_priv->ctx_owner = 0;
761 for (i = 0; i < nbox; i++) {
764 int w = pbox[i].x2 - x;
765 int h = pbox[i].y2 - y;
767 DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
770 if (flags & RADEON_FRONT) {
774 (RADEON_CNTL_PAINT_MULTI, 4));
775 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776 RADEON_GMC_BRUSH_SOLID_COLOR |
779 RADEON_GMC_SRC_DATATYPE_COLOR |
781 RADEON_GMC_CLR_CMP_CNTL_DIS);
783 OUT_RING(dev_priv->front_pitch_offset);
784 OUT_RING(clear->clear_color);
786 OUT_RING((x << 16) | y);
787 OUT_RING((w << 16) | h);
792 if (flags & RADEON_BACK) {
796 (RADEON_CNTL_PAINT_MULTI, 4));
797 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
798 RADEON_GMC_BRUSH_SOLID_COLOR |
801 RADEON_GMC_SRC_DATATYPE_COLOR |
803 RADEON_GMC_CLR_CMP_CNTL_DIS);
805 OUT_RING(dev_priv->back_pitch_offset);
806 OUT_RING(clear->clear_color);
808 OUT_RING((x << 16) | y);
809 OUT_RING((w << 16) | h);
817 /* no docs available, based on reverse engeneering by Stephane Marchesin */
818 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
819 && (flags & RADEON_CLEAR_FASTZ)) {
822 int depthpixperline =
823 dev_priv->depth_fmt ==
824 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
830 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
831 ((clear->depth_mask & 0xff) << 24);
833 /* Make sure we restore the 3D state next time.
834 * we haven't touched any "normal" state - still need this?
836 dev_priv->sarea_priv->ctx_owner = 0;
838 if ((dev_priv->flags & CHIP_HAS_HIERZ)
839 && (flags & RADEON_USE_HIERZ)) {
840 /* FIXME : reverse engineer that for Rx00 cards */
841 /* FIXME : the mask supposedly contains low-res z values. So can't set
842 just to the max (0xff? or actually 0x3fff?), need to take z clear
843 value into account? */
844 /* pattern seems to work for r100, though get slight
845 rendering errors with glxgears. If hierz is not enabled for r100,
846 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
847 other ones are ignored, and the same clear mask can be used. That's
848 very different behaviour than R200 which needs different clear mask
849 and different number of tiles to clear if hierz is enabled or not !?!
851 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
853 /* clear mask : chooses the clearing pattern.
854 rv250: could be used to clear only parts of macrotiles
855 (but that would get really complicated...)?
856 bit 0 and 1 (either or both of them ?!?!) are used to
857 not clear tile (or maybe one of the bits indicates if the tile is
858 compressed or not), bit 2 and 3 to not clear tile 1,...,.
859 Pattern is as follows:
860 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
861 bits -------------------------------------------------
862 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
863 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
864 covers 256 pixels ?!?
870 RADEON_WAIT_UNTIL_2D_IDLE();
871 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
872 tempRB3D_DEPTHCLEARVALUE);
873 /* what offset is this exactly ? */
874 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
875 /* need ctlstat, otherwise get some strange black flickering */
876 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
877 RADEON_RB3D_ZC_FLUSH_ALL);
880 for (i = 0; i < nbox; i++) {
881 int tileoffset, nrtilesx, nrtilesy, j;
882 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
883 if ((dev_priv->flags & CHIP_HAS_HIERZ)
884 && !(dev_priv->microcode_version == UCODE_R200)) {
885 /* FIXME : figure this out for r200 (when hierz is enabled). Or
886 maybe r200 actually doesn't need to put the low-res z value into
887 the tile cache like r100, but just needs to clear the hi-level z-buffer?
888 Works for R100, both with hierz and without.
889 R100 seems to operate on 2x1 8x8 tiles, but...
890 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
891 problematic with resolutions which are not 64 pix aligned? */
893 ((pbox[i].y1 >> 3) * depthpixperline +
896 ((pbox[i].x2 & ~63) -
897 (pbox[i].x1 & ~63)) >> 4;
899 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
900 for (j = 0; j <= nrtilesy; j++) {
903 (RADEON_3D_CLEAR_ZMASK, 2));
905 OUT_RING(tileoffset * 8);
906 /* the number of tiles to clear */
907 OUT_RING(nrtilesx + 4);
908 /* clear mask : chooses the clearing pattern. */
911 tileoffset += depthpixperline >> 6;
913 } else if (dev_priv->microcode_version == UCODE_R200) {
914 /* works for rv250. */
915 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
917 ((pbox[i].y1 >> 3) * depthpixperline +
920 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
922 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
923 for (j = 0; j <= nrtilesy; j++) {
926 (RADEON_3D_CLEAR_ZMASK, 2));
928 /* judging by the first tile offset needed, could possibly
929 directly address/clear 4x4 tiles instead of 8x2 * 4x4
930 macro tiles, though would still need clear mask for
931 right/bottom if truely 4x4 granularity is desired ? */
932 OUT_RING(tileoffset * 16);
933 /* the number of tiles to clear */
934 OUT_RING(nrtilesx + 1);
935 /* clear mask : chooses the clearing pattern. */
938 tileoffset += depthpixperline >> 5;
940 } else { /* rv 100 */
941 /* rv100 might not need 64 pix alignment, who knows */
942 /* offsets are, hmm, weird */
944 ((pbox[i].y1 >> 4) * depthpixperline +
947 ((pbox[i].x2 & ~63) -
948 (pbox[i].x1 & ~63)) >> 4;
950 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
951 for (j = 0; j <= nrtilesy; j++) {
954 (RADEON_3D_CLEAR_ZMASK, 2));
955 OUT_RING(tileoffset * 128);
956 /* the number of tiles to clear */
957 OUT_RING(nrtilesx + 4);
958 /* clear mask : chooses the clearing pattern. */
961 tileoffset += depthpixperline >> 6;
966 /* TODO don't always clear all hi-level z tiles */
967 if ((dev_priv->flags & CHIP_HAS_HIERZ)
968 && (dev_priv->microcode_version == UCODE_R200)
969 && (flags & RADEON_USE_HIERZ))
970 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
971 /* FIXME : the mask supposedly contains low-res z values. So can't set
972 just to the max (0xff? or actually 0x3fff?), need to take z clear
973 value into account? */
976 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
977 OUT_RING(0x0); /* First tile */
979 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
984 /* We have to clear the depth and/or stencil buffers by
985 * rendering a quad into just those buffers. Thus, we have to
986 * make sure the 3D engine is configured correctly.
988 if ((dev_priv->microcode_version == UCODE_R200) &&
989 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
994 int tempRB3D_ZSTENCILCNTL;
995 int tempRB3D_STENCILREFMASK;
996 int tempRB3D_PLANEMASK;
999 int tempSE_VTX_FMT_0;
1000 int tempSE_VTX_FMT_1;
1001 int tempSE_VAP_CNTL;
1002 int tempRE_AUX_SCISSOR_CNTL;
1007 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1009 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1010 tempRB3D_STENCILREFMASK = 0x0;
1012 tempSE_CNTL = depth_clear->se_cntl;
1016 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1018 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1020 tempRB3D_PLANEMASK = 0x0;
1022 tempRE_AUX_SCISSOR_CNTL = 0x0;
1025 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1027 /* Vertex format (X, Y, Z, W) */
1029 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1030 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1031 tempSE_VTX_FMT_1 = 0x0;
1034 * Depth buffer specific enables
1036 if (flags & RADEON_DEPTH) {
1037 /* Enable depth buffer */
1038 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1040 /* Disable depth buffer */
1041 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1045 * Stencil buffer specific enables
1047 if (flags & RADEON_STENCIL) {
1048 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1049 tempRB3D_STENCILREFMASK = clear->depth_mask;
1051 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1052 tempRB3D_STENCILREFMASK = 0x00000000;
1055 if (flags & RADEON_USE_COMP_ZBUF) {
1056 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1057 RADEON_Z_DECOMPRESSION_ENABLE;
1059 if (flags & RADEON_USE_HIERZ) {
1060 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1064 RADEON_WAIT_UNTIL_2D_IDLE();
1066 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1067 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1068 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1069 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1070 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1071 tempRB3D_STENCILREFMASK);
1072 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1073 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1074 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1075 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1076 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1077 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1078 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1081 /* Make sure we restore the 3D state next time.
1083 dev_priv->sarea_priv->ctx_owner = 0;
1085 for (i = 0; i < nbox; i++) {
1087 /* Funny that this should be required --
1090 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1093 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1094 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1095 RADEON_PRIM_WALK_RING |
1096 (3 << RADEON_NUM_VERTICES_SHIFT)));
1097 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1098 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1099 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1100 OUT_RING(0x3f800000);
1101 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1102 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1103 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1104 OUT_RING(0x3f800000);
1105 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1106 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1107 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1108 OUT_RING(0x3f800000);
1111 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1113 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1115 rb3d_cntl = depth_clear->rb3d_cntl;
1117 if (flags & RADEON_DEPTH) {
1118 rb3d_cntl |= RADEON_Z_ENABLE;
1120 rb3d_cntl &= ~RADEON_Z_ENABLE;
1123 if (flags & RADEON_STENCIL) {
1124 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1125 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1127 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1128 rb3d_stencilrefmask = 0x00000000;
1131 if (flags & RADEON_USE_COMP_ZBUF) {
1132 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1133 RADEON_Z_DECOMPRESSION_ENABLE;
1135 if (flags & RADEON_USE_HIERZ) {
1136 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1140 RADEON_WAIT_UNTIL_2D_IDLE();
1142 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1143 OUT_RING(0x00000000);
1144 OUT_RING(rb3d_cntl);
1146 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1147 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1148 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1149 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1152 /* Make sure we restore the 3D state next time.
1154 dev_priv->sarea_priv->ctx_owner = 0;
1156 for (i = 0; i < nbox; i++) {
1158 /* Funny that this should be required --
1161 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1165 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1166 OUT_RING(RADEON_VTX_Z_PRESENT |
1167 RADEON_VTX_PKCOLOR_PRESENT);
1168 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1169 RADEON_PRIM_WALK_RING |
1170 RADEON_MAOS_ENABLE |
1171 RADEON_VTX_FMT_RADEON_MODE |
1172 (3 << RADEON_NUM_VERTICES_SHIFT)));
1174 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1175 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1176 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1179 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1180 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1181 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1184 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1185 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1186 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1193 /* Increment the clear counter. The client-side 3D driver must
1194 * wait on this value before performing the clear ioctl. We
1195 * need this because the card's so damned fast...
1197 dev_priv->sarea_priv->last_clear++;
1201 RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
1202 RADEON_WAIT_UNTIL_IDLE();
1207 static void radeon_cp_dispatch_swap(drm_device_t * dev)
1209 drm_radeon_private_t *dev_priv = dev->dev_private;
1210 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1211 int nbox = sarea_priv->nbox;
1212 drm_clip_rect_t *pbox = sarea_priv->boxes;
1217 /* Do some trivial performance monitoring...
1219 if (dev_priv->do_boxes)
1220 radeon_cp_performance_boxes(dev_priv);
1222 /* Wait for the 3D stream to idle before dispatching the bitblt.
1223 * This will prevent data corruption between the two streams.
1227 RADEON_WAIT_UNTIL_3D_IDLE();
1231 for (i = 0; i < nbox; i++) {
1234 int w = pbox[i].x2 - x;
1235 int h = pbox[i].y2 - y;
1237 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
1241 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1242 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1243 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1244 RADEON_GMC_BRUSH_NONE |
1245 (dev_priv->color_fmt << 8) |
1246 RADEON_GMC_SRC_DATATYPE_COLOR |
1248 RADEON_DP_SRC_SOURCE_MEMORY |
1249 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1251 /* Make this work even if front & back are flipped:
1253 if (dev_priv->current_page == 0) {
1254 OUT_RING(dev_priv->back_pitch_offset);
1255 OUT_RING(dev_priv->front_pitch_offset);
1257 OUT_RING(dev_priv->front_pitch_offset);
1258 OUT_RING(dev_priv->back_pitch_offset);
1261 OUT_RING((x << 16) | y);
1262 OUT_RING((x << 16) | y);
1263 OUT_RING((w << 16) | h);
1268 /* Increment the frame counter. The client-side 3D driver must
1269 * throttle the framerate by waiting for this value before
1270 * performing the swapbuffer ioctl.
1272 dev_priv->sarea_priv->last_frame++;
1276 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1277 RADEON_WAIT_UNTIL_2D_IDLE();
1282 static void radeon_cp_dispatch_flip(drm_device_t * dev)
1284 drm_radeon_private_t *dev_priv = dev->dev_private;
1285 drm_sarea_t *sarea = (drm_sarea_t *) dev_priv->sarea->handle;
1286 int offset = (dev_priv->current_page == 1)
1287 ? dev_priv->front_offset : dev_priv->back_offset;
1289 DRM_DEBUG("%s: page=%d pfCurrentPage=%d\n",
1291 dev_priv->current_page, dev_priv->sarea_priv->pfCurrentPage);
1293 /* Do some trivial performance monitoring...
1295 if (dev_priv->do_boxes) {
1296 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1297 radeon_cp_performance_boxes(dev_priv);
1300 /* Update the frame offsets for both CRTCs
1304 RADEON_WAIT_UNTIL_3D_IDLE();
1305 OUT_RING_REG(RADEON_CRTC_OFFSET,
1306 ((sarea->frame.y * dev_priv->front_pitch +
1307 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1309 OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1314 /* Increment the frame counter. The client-side 3D driver must
1315 * throttle the framerate by waiting for this value before
1316 * performing the swapbuffer ioctl.
1318 dev_priv->sarea_priv->last_frame++;
1319 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1320 1 - dev_priv->current_page;
1324 RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
1329 static int bad_prim_vertex_nr(int primitive, int nr)
1331 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1332 case RADEON_PRIM_TYPE_NONE:
1333 case RADEON_PRIM_TYPE_POINT:
1335 case RADEON_PRIM_TYPE_LINE:
1336 return (nr & 1) || nr == 0;
1337 case RADEON_PRIM_TYPE_LINE_STRIP:
1339 case RADEON_PRIM_TYPE_TRI_LIST:
1340 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1341 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1342 case RADEON_PRIM_TYPE_RECT_LIST:
1343 return nr % 3 || nr == 0;
1344 case RADEON_PRIM_TYPE_TRI_FAN:
1345 case RADEON_PRIM_TYPE_TRI_STRIP:
1354 unsigned int finish;
1356 unsigned int numverts;
1357 unsigned int offset;
1358 unsigned int vc_format;
1359 } drm_radeon_tcl_prim_t;
1361 static void radeon_cp_dispatch_vertex(drm_device_t * dev,
1363 drm_radeon_tcl_prim_t * prim)
1365 drm_radeon_private_t *dev_priv = dev->dev_private;
1366 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1367 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1368 int numverts = (int)prim->numverts;
1369 int nbox = sarea_priv->nbox;
1373 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1375 prim->vc_format, prim->start, prim->finish, prim->numverts);
1377 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1378 DRM_ERROR("bad prim %x numverts %d\n",
1379 prim->prim, prim->numverts);
1384 /* Emit the next cliprect */
1386 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1389 /* Emit the vertex buffer rendering commands */
1392 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1395 OUT_RING(prim->vc_format);
1396 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1397 RADEON_COLOR_ORDER_RGBA |
1398 RADEON_VTX_FMT_RADEON_MODE |
1399 (numverts << RADEON_NUM_VERTICES_SHIFT));
1407 static void radeon_cp_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
1409 drm_radeon_private_t *dev_priv = dev->dev_private;
1410 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1413 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1415 /* Emit the vertex buffer age */
1417 RADEON_DISPATCH_AGE(buf_priv->age);
1424 static void radeon_cp_dispatch_indirect(drm_device_t * dev,
1425 drm_buf_t * buf, int start, int end)
1427 drm_radeon_private_t *dev_priv = dev->dev_private;
1429 DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1432 int offset = (dev_priv->gart_buffers_offset
1433 + buf->offset + start);
1434 int dwords = (end - start + 3) / sizeof(u32);
1436 /* Indirect buffer data must be an even number of
1437 * dwords, so if we've been given an odd number we must
1438 * pad the data with a Type-2 CP packet.
1442 ((char *)dev->agp_buffer_map->handle
1443 + buf->offset + start);
1444 data[dwords++] = RADEON_CP_PACKET2;
1447 /* Fire off the indirect buffer */
1450 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1458 static void radeon_cp_dispatch_indices(drm_device_t * dev,
1459 drm_buf_t * elt_buf,
1460 drm_radeon_tcl_prim_t * prim)
1462 drm_radeon_private_t *dev_priv = dev->dev_private;
1463 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1464 int offset = dev_priv->gart_buffers_offset + prim->offset;
1468 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1469 int count = (prim->finish - start) / sizeof(u16);
1470 int nbox = sarea_priv->nbox;
1472 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1475 prim->start, prim->finish, prim->offset, prim->numverts);
1477 if (bad_prim_vertex_nr(prim->prim, count)) {
1478 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1482 if (start >= prim->finish || (prim->start & 0x7)) {
1483 DRM_ERROR("buffer prim %d\n", prim->prim);
1487 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1489 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1490 elt_buf->offset + prim->start);
1492 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1494 data[2] = prim->numverts;
1495 data[3] = prim->vc_format;
1496 data[4] = (prim->prim |
1497 RADEON_PRIM_WALK_IND |
1498 RADEON_COLOR_ORDER_RGBA |
1499 RADEON_VTX_FMT_RADEON_MODE |
1500 (count << RADEON_NUM_VERTICES_SHIFT));
1504 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1506 radeon_cp_dispatch_indirect(dev, elt_buf,
1507 prim->start, prim->finish);
1514 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1516 static int radeon_cp_dispatch_texture(DRMFILE filp,
1518 drm_radeon_texture_t * tex,
1519 drm_radeon_tex_image_t * image)
1521 drm_radeon_private_t *dev_priv = dev->dev_private;
1522 drm_file_t *filp_priv;
1526 const u8 __user *data;
1527 int size, dwords, tex_width, blit_width, spitch;
1530 u32 texpitch, microtile;
1534 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
1536 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
1537 DRM_ERROR("Invalid destination offset\n");
1538 return DRM_ERR(EINVAL);
1541 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1543 /* Flush the pixel cache. This ensures no pixel data gets mixed
1544 * up with the texture data from the host data blit, otherwise
1545 * part of the texture image may be corrupted.
1548 RADEON_FLUSH_CACHE();
1549 RADEON_WAIT_UNTIL_IDLE();
1552 /* The compiler won't optimize away a division by a variable,
1553 * even if the only legal values are powers of two. Thus, we'll
1554 * use a shift instead.
1556 switch (tex->format) {
1557 case RADEON_TXFORMAT_ARGB8888:
1558 case RADEON_TXFORMAT_RGBA8888:
1559 format = RADEON_COLOR_FORMAT_ARGB8888;
1560 tex_width = tex->width * 4;
1561 blit_width = image->width * 4;
1563 case RADEON_TXFORMAT_AI88:
1564 case RADEON_TXFORMAT_ARGB1555:
1565 case RADEON_TXFORMAT_RGB565:
1566 case RADEON_TXFORMAT_ARGB4444:
1567 case RADEON_TXFORMAT_VYUY422:
1568 case RADEON_TXFORMAT_YVYU422:
1569 format = RADEON_COLOR_FORMAT_RGB565;
1570 tex_width = tex->width * 2;
1571 blit_width = image->width * 2;
1573 case RADEON_TXFORMAT_I8:
1574 case RADEON_TXFORMAT_RGB332:
1575 format = RADEON_COLOR_FORMAT_CI8;
1576 tex_width = tex->width * 1;
1577 blit_width = image->width * 1;
1580 DRM_ERROR("invalid texture format %d\n", tex->format);
1581 return DRM_ERR(EINVAL);
1583 spitch = blit_width >> 6;
1584 if (spitch == 0 && image->height > 1)
1585 return DRM_ERR(EINVAL);
1587 texpitch = tex->pitch;
1588 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1590 if (tex_width < 64) {
1591 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1592 /* we got tiled coordinates, untile them */
1598 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1601 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1602 tex->offset >> 10, tex->pitch, tex->format,
1603 image->x, image->y, image->width, image->height);
1605 /* Make a copy of some parameters in case we have to
1606 * update them for a multi-pass texture blit.
1608 height = image->height;
1609 data = (const u8 __user *)image->data;
1611 size = height * blit_width;
1613 if (size > RADEON_MAX_TEXTURE_SIZE) {
1614 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1615 size = height * blit_width;
1616 } else if (size < 4 && size > 0) {
1618 } else if (size == 0) {
1622 buf = radeon_freelist_get(dev);
1624 radeon_do_cp_idle(dev_priv);
1625 buf = radeon_freelist_get(dev);
1628 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1629 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1630 return DRM_ERR(EFAULT);
1631 return DRM_ERR(EAGAIN);
1634 /* Dispatch the indirect buffer.
1637 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1641 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1642 however, we cannot use blitter directly for texture width < 64 bytes,
1643 since minimum tex pitch is 64 bytes and we need this to match
1644 the texture width, otherwise the blitter will tile it wrong.
1645 Thus, tiling manually in this case. Additionally, need to special
1646 case tex height = 1, since our actual image will have height 2
1647 and we need to ensure we don't read beyond the texture size
1649 if (tex->height == 1) {
1650 if (tex_width >= 64 || tex_width <= 16) {
1651 if (DRM_COPY_FROM_USER(buffer, data,
1655 ("EFAULT on pad, %d bytes\n",
1657 return DRM_ERR(EFAULT);
1659 } else if (tex_width == 32) {
1660 if (DRM_COPY_FROM_USER
1661 (buffer, data, 16)) {
1663 ("EFAULT on pad, %d bytes\n",
1665 return DRM_ERR(EFAULT);
1667 if (DRM_COPY_FROM_USER
1668 (buffer + 8, data + 16, 16)) {
1670 ("EFAULT on pad, %d bytes\n",
1672 return DRM_ERR(EFAULT);
1675 } else if (tex_width >= 64 || tex_width == 16) {
1676 if (DRM_COPY_FROM_USER(buffer, data,
1677 dwords * sizeof(u32))) {
1678 DRM_ERROR("EFAULT on data, %d dwords\n",
1680 return DRM_ERR(EFAULT);
1682 } else if (tex_width < 16) {
1683 for (i = 0; i < tex->height; i++) {
1684 if (DRM_COPY_FROM_USER
1685 (buffer, data, tex_width)) {
1687 ("EFAULT on pad, %d bytes\n",
1689 return DRM_ERR(EFAULT);
1694 } else if (tex_width == 32) {
1695 /* TODO: make sure this works when not fitting in one buffer
1696 (i.e. 32bytes x 2048...) */
1697 for (i = 0; i < tex->height; i += 2) {
1698 if (DRM_COPY_FROM_USER
1699 (buffer, data, 16)) {
1701 ("EFAULT on pad, %d bytes\n",
1703 return DRM_ERR(EFAULT);
1706 if (DRM_COPY_FROM_USER
1707 (buffer + 8, data, 16)) {
1709 ("EFAULT on pad, %d bytes\n",
1711 return DRM_ERR(EFAULT);
1714 if (DRM_COPY_FROM_USER
1715 (buffer + 4, data, 16)) {
1717 ("EFAULT on pad, %d bytes\n",
1719 return DRM_ERR(EFAULT);
1722 if (DRM_COPY_FROM_USER
1723 (buffer + 12, data, 16)) {
1725 ("EFAULT on pad, %d bytes\n",
1727 return DRM_ERR(EFAULT);
1734 if (tex_width >= 32) {
1735 /* Texture image width is larger than the minimum, so we
1736 * can upload it directly.
1738 if (DRM_COPY_FROM_USER(buffer, data,
1739 dwords * sizeof(u32))) {
1740 DRM_ERROR("EFAULT on data, %d dwords\n",
1742 return DRM_ERR(EFAULT);
1745 /* Texture image width is less than the minimum, so we
1746 * need to pad out each image scanline to the minimum
1749 for (i = 0; i < tex->height; i++) {
1750 if (DRM_COPY_FROM_USER
1751 (buffer, data, tex_width)) {
1753 ("EFAULT on pad, %d bytes\n",
1755 return DRM_ERR(EFAULT);
1765 offset = dev_priv->gart_buffers_offset + buf->offset;
1767 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1768 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1769 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1770 RADEON_GMC_BRUSH_NONE |
1772 RADEON_GMC_SRC_DATATYPE_COLOR |
1774 RADEON_DP_SRC_SOURCE_MEMORY |
1775 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1776 OUT_RING((spitch << 22) | (offset >> 10));
1777 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1779 OUT_RING((image->x << 16) | image->y);
1780 OUT_RING((image->width << 16) | height);
1781 RADEON_WAIT_UNTIL_2D_IDLE();
1784 radeon_cp_discard_buffer(dev, buf);
1786 /* Update the input parameters for next time */
1788 image->height -= height;
1789 image->data = (const u8 __user *)image->data + size;
1790 } while (image->height > 0);
1792 /* Flush the pixel cache after the blit completes. This ensures
1793 * the texture data is written out to memory before rendering
1797 RADEON_FLUSH_CACHE();
1798 RADEON_WAIT_UNTIL_2D_IDLE();
1803 static void radeon_cp_dispatch_stipple(drm_device_t * dev, u32 * stipple)
1805 drm_radeon_private_t *dev_priv = dev->dev_private;
1812 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1813 OUT_RING(0x00000000);
1815 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1816 for (i = 0; i < 32; i++) {
1817 OUT_RING(stipple[i]);
1823 static void radeon_apply_surface_regs(int surf_index,
1824 drm_radeon_private_t * dev_priv)
1826 if (!dev_priv->mmio)
1829 radeon_do_cp_idle(dev_priv);
1831 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1832 dev_priv->surfaces[surf_index].flags);
1833 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1834 dev_priv->surfaces[surf_index].lower);
1835 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1836 dev_priv->surfaces[surf_index].upper);
1839 /* Allocates a virtual surface
1840 * doesn't always allocate a real surface, will stretch an existing
1841 * surface when possible.
1843 * Note that refcount can be at most 2, since during a free refcount=3
1844 * might mean we have to allocate a new surface which might not always
1846 * For example : we allocate three contigous surfaces ABC. If B is
1847 * freed, we suddenly need two surfaces to store A and C, which might
1848 * not always be available.
1850 static int alloc_surface(drm_radeon_surface_alloc_t * new,
1851 drm_radeon_private_t * dev_priv, DRMFILE filp)
1853 struct radeon_virt_surface *s;
1855 int virt_surface_index;
1856 uint32_t new_upper, new_lower;
1858 new_lower = new->address;
1859 new_upper = new_lower + new->size - 1;
1862 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1863 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1864 RADEON_SURF_ADDRESS_FIXED_MASK)
1865 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1868 /* make sure there is no overlap with existing surfaces */
1869 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1870 if ((dev_priv->surfaces[i].refcount != 0) &&
1871 (((new_lower >= dev_priv->surfaces[i].lower) &&
1872 (new_lower < dev_priv->surfaces[i].upper)) ||
1873 ((new_lower < dev_priv->surfaces[i].lower) &&
1874 (new_upper > dev_priv->surfaces[i].lower)))) {
1879 /* find a virtual surface */
1880 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1881 if (dev_priv->virt_surfaces[i].filp == 0)
1883 if (i == 2 * RADEON_MAX_SURFACES) {
1886 virt_surface_index = i;
1888 /* try to reuse an existing surface */
1889 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1891 if ((dev_priv->surfaces[i].refcount == 1) &&
1892 (new->flags == dev_priv->surfaces[i].flags) &&
1893 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1894 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1895 s->surface_index = i;
1896 s->lower = new_lower;
1897 s->upper = new_upper;
1898 s->flags = new->flags;
1900 dev_priv->surfaces[i].refcount++;
1901 dev_priv->surfaces[i].lower = s->lower;
1902 radeon_apply_surface_regs(s->surface_index, dev_priv);
1903 return virt_surface_index;
1907 if ((dev_priv->surfaces[i].refcount == 1) &&
1908 (new->flags == dev_priv->surfaces[i].flags) &&
1909 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1910 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1911 s->surface_index = i;
1912 s->lower = new_lower;
1913 s->upper = new_upper;
1914 s->flags = new->flags;
1916 dev_priv->surfaces[i].refcount++;
1917 dev_priv->surfaces[i].upper = s->upper;
1918 radeon_apply_surface_regs(s->surface_index, dev_priv);
1919 return virt_surface_index;
1923 /* okay, we need a new one */
1924 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1925 if (dev_priv->surfaces[i].refcount == 0) {
1926 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1927 s->surface_index = i;
1928 s->lower = new_lower;
1929 s->upper = new_upper;
1930 s->flags = new->flags;
1932 dev_priv->surfaces[i].refcount = 1;
1933 dev_priv->surfaces[i].lower = s->lower;
1934 dev_priv->surfaces[i].upper = s->upper;
1935 dev_priv->surfaces[i].flags = s->flags;
1936 radeon_apply_surface_regs(s->surface_index, dev_priv);
1937 return virt_surface_index;
1941 /* we didn't find anything */
1945 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
1948 struct radeon_virt_surface *s;
1950 /* find the virtual surface */
1951 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1952 s = &(dev_priv->virt_surfaces[i]);
1954 if ((lower == s->lower) && (filp == s->filp)) {
1955 if (dev_priv->surfaces[s->surface_index].
1957 dev_priv->surfaces[s->surface_index].
1960 if (dev_priv->surfaces[s->surface_index].
1962 dev_priv->surfaces[s->surface_index].
1965 dev_priv->surfaces[s->surface_index].refcount--;
1966 if (dev_priv->surfaces[s->surface_index].
1968 dev_priv->surfaces[s->surface_index].
1971 radeon_apply_surface_regs(s->surface_index,
1980 static void radeon_surfaces_release(DRMFILE filp,
1981 drm_radeon_private_t * dev_priv)
1984 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
1985 if (dev_priv->virt_surfaces[i].filp == filp)
1986 free_surface(filp, dev_priv,
1987 dev_priv->virt_surfaces[i].lower);
1991 /* ================================================================
1994 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1997 drm_radeon_private_t *dev_priv = dev->dev_private;
1998 drm_radeon_surface_alloc_t alloc;
2001 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2002 return DRM_ERR(EINVAL);
2005 DRM_COPY_FROM_USER_IOCTL(alloc,
2006 (drm_radeon_surface_alloc_t __user *) data,
2009 if (alloc_surface(&alloc, dev_priv, filp) == -1)
2010 return DRM_ERR(EINVAL);
2015 static int radeon_surface_free(DRM_IOCTL_ARGS)
2018 drm_radeon_private_t *dev_priv = dev->dev_private;
2019 drm_radeon_surface_free_t memfree;
2022 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2023 return DRM_ERR(EINVAL);
2026 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *) data,
2029 if (free_surface(filp, dev_priv, memfree.address))
2030 return DRM_ERR(EINVAL);
2035 static int radeon_cp_clear(DRM_IOCTL_ARGS)
2038 drm_radeon_private_t *dev_priv = dev->dev_private;
2039 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2040 drm_radeon_clear_t clear;
2041 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2044 LOCK_TEST_WITH_RETURN(dev, filp);
2046 DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
2049 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2051 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2052 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2054 if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
2055 sarea_priv->nbox * sizeof(depth_boxes[0])))
2056 return DRM_ERR(EFAULT);
2058 radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
2064 /* Not sure why this isn't set all the time:
2066 static int radeon_do_init_pageflip(drm_device_t * dev)
2068 drm_radeon_private_t *dev_priv = dev->dev_private;
2074 RADEON_WAIT_UNTIL_3D_IDLE();
2075 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2076 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2077 RADEON_CRTC_OFFSET_FLIP_CNTL);
2078 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2079 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2080 RADEON_CRTC_OFFSET_FLIP_CNTL);
2083 dev_priv->page_flipping = 1;
2084 dev_priv->current_page = 0;
2085 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2090 /* Called whenever a client dies, from drm_release.
2091 * NOTE: Lock isn't necessarily held when this is called!
2093 static int radeon_do_cleanup_pageflip(drm_device_t * dev)
2095 drm_radeon_private_t *dev_priv = dev->dev_private;
2098 if (dev_priv->current_page != 0)
2099 radeon_cp_dispatch_flip(dev);
2101 dev_priv->page_flipping = 0;
2105 /* Swapping and flipping are different operations, need different ioctls.
2106 * They can & should be intermixed to support multiple 3d windows.
2108 static int radeon_cp_flip(DRM_IOCTL_ARGS)
2111 drm_radeon_private_t *dev_priv = dev->dev_private;
2114 LOCK_TEST_WITH_RETURN(dev, filp);
2116 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2118 if (!dev_priv->page_flipping)
2119 radeon_do_init_pageflip(dev);
2121 radeon_cp_dispatch_flip(dev);
2127 static int radeon_cp_swap(DRM_IOCTL_ARGS)
2130 drm_radeon_private_t *dev_priv = dev->dev_private;
2131 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2134 LOCK_TEST_WITH_RETURN(dev, filp);
2136 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2138 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2139 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2141 radeon_cp_dispatch_swap(dev);
2142 dev_priv->sarea_priv->ctx_owner = 0;
2148 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
2151 drm_radeon_private_t *dev_priv = dev->dev_private;
2152 drm_file_t *filp_priv;
2153 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2154 drm_device_dma_t *dma = dev->dma;
2156 drm_radeon_vertex_t vertex;
2157 drm_radeon_tcl_prim_t prim;
2159 LOCK_TEST_WITH_RETURN(dev, filp);
2161 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2163 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
2166 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2167 DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
2169 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2170 DRM_ERROR("buffer index %d (of %d max)\n",
2171 vertex.idx, dma->buf_count - 1);
2172 return DRM_ERR(EINVAL);
2174 if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2175 DRM_ERROR("buffer prim %d\n", vertex.prim);
2176 return DRM_ERR(EINVAL);
2179 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2180 VB_AGE_TEST_WITH_RETURN(dev_priv);
2182 buf = dma->buflist[vertex.idx];
2184 if (buf->filp != filp) {
2185 DRM_ERROR("process %d using buffer owned by %p\n",
2186 DRM_CURRENTPID, buf->filp);
2187 return DRM_ERR(EINVAL);
2190 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2191 return DRM_ERR(EINVAL);
2194 /* Build up a prim_t record:
2197 buf->used = vertex.count; /* not used? */
2199 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2200 if (radeon_emit_state(dev_priv, filp_priv,
2201 &sarea_priv->context_state,
2202 sarea_priv->tex_state,
2203 sarea_priv->dirty)) {
2204 DRM_ERROR("radeon_emit_state failed\n");
2205 return DRM_ERR(EINVAL);
2208 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2209 RADEON_UPLOAD_TEX1IMAGES |
2210 RADEON_UPLOAD_TEX2IMAGES |
2211 RADEON_REQUIRE_QUIESCENCE);
2215 prim.finish = vertex.count; /* unused */
2216 prim.prim = vertex.prim;
2217 prim.numverts = vertex.count;
2218 prim.vc_format = dev_priv->sarea_priv->vc_format;
2220 radeon_cp_dispatch_vertex(dev, buf, &prim);
2223 if (vertex.discard) {
2224 radeon_cp_discard_buffer(dev, buf);
2231 static int radeon_cp_indices(DRM_IOCTL_ARGS)
2234 drm_radeon_private_t *dev_priv = dev->dev_private;
2235 drm_file_t *filp_priv;
2236 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2237 drm_device_dma_t *dma = dev->dma;
2239 drm_radeon_indices_t elts;
2240 drm_radeon_tcl_prim_t prim;
2243 LOCK_TEST_WITH_RETURN(dev, filp);
2246 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2247 return DRM_ERR(EINVAL);
2250 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2252 DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
2255 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2256 DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
2258 if (elts.idx < 0 || elts.idx >= dma->buf_count) {
2259 DRM_ERROR("buffer index %d (of %d max)\n",
2260 elts.idx, dma->buf_count - 1);
2261 return DRM_ERR(EINVAL);
2263 if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2264 DRM_ERROR("buffer prim %d\n", elts.prim);
2265 return DRM_ERR(EINVAL);
2268 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2269 VB_AGE_TEST_WITH_RETURN(dev_priv);
2271 buf = dma->buflist[elts.idx];
2273 if (buf->filp != filp) {
2274 DRM_ERROR("process %d using buffer owned by %p\n",
2275 DRM_CURRENTPID, buf->filp);
2276 return DRM_ERR(EINVAL);
2279 DRM_ERROR("sending pending buffer %d\n", elts.idx);
2280 return DRM_ERR(EINVAL);
2283 count = (elts.end - elts.start) / sizeof(u16);
2284 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2286 if (elts.start & 0x7) {
2287 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
2288 return DRM_ERR(EINVAL);
2290 if (elts.start < buf->used) {
2291 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
2292 return DRM_ERR(EINVAL);
2295 buf->used = elts.end;
2297 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2298 if (radeon_emit_state(dev_priv, filp_priv,
2299 &sarea_priv->context_state,
2300 sarea_priv->tex_state,
2301 sarea_priv->dirty)) {
2302 DRM_ERROR("radeon_emit_state failed\n");
2303 return DRM_ERR(EINVAL);
2306 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2307 RADEON_UPLOAD_TEX1IMAGES |
2308 RADEON_UPLOAD_TEX2IMAGES |
2309 RADEON_REQUIRE_QUIESCENCE);
2312 /* Build up a prim_t record:
2314 prim.start = elts.start;
2315 prim.finish = elts.end;
2316 prim.prim = elts.prim;
2317 prim.offset = 0; /* offset from start of dma buffers */
2318 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2319 prim.vc_format = dev_priv->sarea_priv->vc_format;
2321 radeon_cp_dispatch_indices(dev, buf, &prim);
2323 radeon_cp_discard_buffer(dev, buf);
2330 static int radeon_cp_texture(DRM_IOCTL_ARGS)
2333 drm_radeon_private_t *dev_priv = dev->dev_private;
2334 drm_radeon_texture_t tex;
2335 drm_radeon_tex_image_t image;
2338 LOCK_TEST_WITH_RETURN(dev, filp);
2340 DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
2343 if (tex.image == NULL) {
2344 DRM_ERROR("null texture image!\n");
2345 return DRM_ERR(EINVAL);
2348 if (DRM_COPY_FROM_USER(&image,
2349 (drm_radeon_tex_image_t __user *) tex.image,
2351 return DRM_ERR(EFAULT);
2353 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2354 VB_AGE_TEST_WITH_RETURN(dev_priv);
2356 ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
2362 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
2365 drm_radeon_private_t *dev_priv = dev->dev_private;
2366 drm_radeon_stipple_t stipple;
2369 LOCK_TEST_WITH_RETURN(dev, filp);
2371 DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
2374 if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
2375 return DRM_ERR(EFAULT);
2377 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2379 radeon_cp_dispatch_stipple(dev, mask);
2385 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
2388 drm_radeon_private_t *dev_priv = dev->dev_private;
2389 drm_device_dma_t *dma = dev->dma;
2391 drm_radeon_indirect_t indirect;
2394 LOCK_TEST_WITH_RETURN(dev, filp);
2397 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2398 return DRM_ERR(EINVAL);
2401 DRM_COPY_FROM_USER_IOCTL(indirect,
2402 (drm_radeon_indirect_t __user *) data,
2405 DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
2406 indirect.idx, indirect.start, indirect.end, indirect.discard);
2408 if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
2409 DRM_ERROR("buffer index %d (of %d max)\n",
2410 indirect.idx, dma->buf_count - 1);
2411 return DRM_ERR(EINVAL);
2414 buf = dma->buflist[indirect.idx];
2416 if (buf->filp != filp) {
2417 DRM_ERROR("process %d using buffer owned by %p\n",
2418 DRM_CURRENTPID, buf->filp);
2419 return DRM_ERR(EINVAL);
2422 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
2423 return DRM_ERR(EINVAL);
2426 if (indirect.start < buf->used) {
2427 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2428 indirect.start, buf->used);
2429 return DRM_ERR(EINVAL);
2432 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2433 VB_AGE_TEST_WITH_RETURN(dev_priv);
2435 buf->used = indirect.end;
2437 /* Wait for the 3D stream to idle before the indirect buffer
2438 * containing 2D acceleration commands is processed.
2442 RADEON_WAIT_UNTIL_3D_IDLE();
2446 /* Dispatch the indirect buffer full of commands from the
2447 * X server. This is insecure and is thus only available to
2448 * privileged clients.
2450 radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
2451 if (indirect.discard) {
2452 radeon_cp_discard_buffer(dev, buf);
2459 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
2462 drm_radeon_private_t *dev_priv = dev->dev_private;
2463 drm_file_t *filp_priv;
2464 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2465 drm_device_dma_t *dma = dev->dma;
2467 drm_radeon_vertex2_t vertex;
2469 unsigned char laststate;
2471 LOCK_TEST_WITH_RETURN(dev, filp);
2474 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2475 return DRM_ERR(EINVAL);
2478 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2480 DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
2483 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2484 DRM_CURRENTPID, vertex.idx, vertex.discard);
2486 if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
2487 DRM_ERROR("buffer index %d (of %d max)\n",
2488 vertex.idx, dma->buf_count - 1);
2489 return DRM_ERR(EINVAL);
2492 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2493 VB_AGE_TEST_WITH_RETURN(dev_priv);
2495 buf = dma->buflist[vertex.idx];
2497 if (buf->filp != filp) {
2498 DRM_ERROR("process %d using buffer owned by %p\n",
2499 DRM_CURRENTPID, buf->filp);
2500 return DRM_ERR(EINVAL);
2504 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
2505 return DRM_ERR(EINVAL);
2508 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2509 return DRM_ERR(EINVAL);
2511 for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
2512 drm_radeon_prim_t prim;
2513 drm_radeon_tcl_prim_t tclprim;
2515 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
2516 return DRM_ERR(EFAULT);
2518 if (prim.stateidx != laststate) {
2519 drm_radeon_state_t state;
2521 if (DRM_COPY_FROM_USER(&state,
2522 &vertex.state[prim.stateidx],
2524 return DRM_ERR(EFAULT);
2526 if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
2527 DRM_ERROR("radeon_emit_state2 failed\n");
2528 return DRM_ERR(EINVAL);
2531 laststate = prim.stateidx;
2534 tclprim.start = prim.start;
2535 tclprim.finish = prim.finish;
2536 tclprim.prim = prim.prim;
2537 tclprim.vc_format = prim.vc_format;
2539 if (prim.prim & RADEON_PRIM_WALK_IND) {
2540 tclprim.offset = prim.numverts * 64;
2541 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2543 radeon_cp_dispatch_indices(dev, buf, &tclprim);
2545 tclprim.numverts = prim.numverts;
2546 tclprim.offset = 0; /* not used */
2548 radeon_cp_dispatch_vertex(dev, buf, &tclprim);
2551 if (sarea_priv->nbox == 1)
2552 sarea_priv->nbox = 0;
2555 if (vertex.discard) {
2556 radeon_cp_discard_buffer(dev, buf);
2563 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2564 drm_file_t * filp_priv,
2565 drm_radeon_cmd_header_t header,
2566 drm_radeon_kcmd_buffer_t *cmdbuf)
2568 int id = (int)header.packet.packet_id;
2570 int *data = (int *)cmdbuf->buf;
2573 if (id >= RADEON_MAX_STATE_PACKETS)
2574 return DRM_ERR(EINVAL);
2576 sz = packet[id].len;
2577 reg = packet[id].start;
2579 if (sz * sizeof(int) > cmdbuf->bufsz) {
2580 DRM_ERROR("Packet size provided larger than data provided\n");
2581 return DRM_ERR(EINVAL);
2584 if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
2585 DRM_ERROR("Packet verification failed\n");
2586 return DRM_ERR(EINVAL);
2590 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2591 OUT_RING_TABLE(data, sz);
2594 cmdbuf->buf += sz * sizeof(int);
2595 cmdbuf->bufsz -= sz * sizeof(int);
2599 static __inline__ int radeon_emit_scalars(drm_radeon_private_t * dev_priv,
2600 drm_radeon_cmd_header_t header,
2601 drm_radeon_kcmd_buffer_t * cmdbuf)
2603 int sz = header.scalars.count;
2604 int start = header.scalars.offset;
2605 int stride = header.scalars.stride;
2609 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2610 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2611 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2612 OUT_RING_TABLE(cmdbuf->buf, sz);
2614 cmdbuf->buf += sz * sizeof(int);
2615 cmdbuf->bufsz -= sz * sizeof(int);
2621 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t * dev_priv,
2622 drm_radeon_cmd_header_t header,
2623 drm_radeon_kcmd_buffer_t * cmdbuf)
2625 int sz = header.scalars.count;
2626 int start = ((unsigned int)header.scalars.offset) + 0x100;
2627 int stride = header.scalars.stride;
2631 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2632 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2633 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2634 OUT_RING_TABLE(cmdbuf->buf, sz);
2636 cmdbuf->buf += sz * sizeof(int);
2637 cmdbuf->bufsz -= sz * sizeof(int);
2641 static __inline__ int radeon_emit_vectors(drm_radeon_private_t * dev_priv,
2642 drm_radeon_cmd_header_t header,
2643 drm_radeon_kcmd_buffer_t * cmdbuf)
2645 int sz = header.vectors.count;
2646 int start = header.vectors.offset;
2647 int stride = header.vectors.stride;
2651 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2652 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2653 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2654 OUT_RING_TABLE(cmdbuf->buf, sz);
2657 cmdbuf->buf += sz * sizeof(int);
2658 cmdbuf->bufsz -= sz * sizeof(int);
2662 static int radeon_emit_packet3(drm_device_t * dev,
2663 drm_file_t * filp_priv,
2664 drm_radeon_kcmd_buffer_t *cmdbuf)
2666 drm_radeon_private_t *dev_priv = dev->dev_private;
2673 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2675 DRM_ERROR("Packet verification failed\n");
2680 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2683 cmdbuf->buf += cmdsz * 4;
2684 cmdbuf->bufsz -= cmdsz * 4;
2688 static int radeon_emit_packet3_cliprect(drm_device_t * dev,
2689 drm_file_t * filp_priv,
2690 drm_radeon_kcmd_buffer_t *cmdbuf,
2693 drm_radeon_private_t *dev_priv = dev->dev_private;
2694 drm_clip_rect_t box;
2697 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2703 if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
2705 DRM_ERROR("Packet verification failed\n");
2713 if (i < cmdbuf->nbox) {
2714 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2715 return DRM_ERR(EFAULT);
2716 /* FIXME The second and subsequent times round
2717 * this loop, send a WAIT_UNTIL_3D_IDLE before
2718 * calling emit_clip_rect(). This fixes a
2719 * lockup on fast machines when sending
2720 * several cliprects with a cmdbuf, as when
2721 * waving a 2D window over a 3D
2722 * window. Something in the commands from user
2723 * space seems to hang the card when they're
2724 * sent several times in a row. That would be
2725 * the correct place to fix it but this works
2726 * around it until I can figure that out - Tim
2730 RADEON_WAIT_UNTIL_3D_IDLE();
2733 radeon_emit_clip_rect(dev_priv, &box);
2737 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2740 } while (++i < cmdbuf->nbox);
2741 if (cmdbuf->nbox == 1)
2745 cmdbuf->buf += cmdsz * 4;
2746 cmdbuf->bufsz -= cmdsz * 4;
2750 static int radeon_emit_wait(drm_device_t * dev, int flags)
2752 drm_radeon_private_t *dev_priv = dev->dev_private;
2755 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2757 case RADEON_WAIT_2D:
2759 RADEON_WAIT_UNTIL_2D_IDLE();
2762 case RADEON_WAIT_3D:
2764 RADEON_WAIT_UNTIL_3D_IDLE();
2767 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2769 RADEON_WAIT_UNTIL_IDLE();
2773 return DRM_ERR(EINVAL);
2779 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
2782 drm_radeon_private_t *dev_priv = dev->dev_private;
2783 drm_file_t *filp_priv;
2784 drm_device_dma_t *dma = dev->dma;
2785 drm_buf_t *buf = NULL;
2787 drm_radeon_kcmd_buffer_t cmdbuf;
2788 drm_radeon_cmd_header_t header;
2789 int orig_nbox, orig_bufsz;
2792 LOCK_TEST_WITH_RETURN(dev, filp);
2795 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2796 return DRM_ERR(EINVAL);
2799 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
2801 DRM_COPY_FROM_USER_IOCTL(cmdbuf,
2802 (drm_radeon_cmd_buffer_t __user *) data,
2805 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2806 VB_AGE_TEST_WITH_RETURN(dev_priv);
2808 if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
2809 return DRM_ERR(EINVAL);
2812 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2813 * races between checking values and using those values in other code,
2814 * and simply to avoid a lot of function calls to copy in data.
2816 orig_bufsz = cmdbuf.bufsz;
2817 if (orig_bufsz != 0) {
2818 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2820 return DRM_ERR(ENOMEM);
2821 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf, cmdbuf.bufsz)) {
2822 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2823 return DRM_ERR(EFAULT);
2828 orig_nbox = cmdbuf.nbox;
2830 if (dev_priv->microcode_version == UCODE_R300) {
2832 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2834 if (orig_bufsz != 0)
2835 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2840 /* microcode_version != r300 */
2841 while (cmdbuf.bufsz >= sizeof(header)) {
2843 header.i = *(int *)cmdbuf.buf;
2844 cmdbuf.buf += sizeof(header);
2845 cmdbuf.bufsz -= sizeof(header);
2847 switch (header.header.cmd_type) {
2848 case RADEON_CMD_PACKET:
2849 DRM_DEBUG("RADEON_CMD_PACKET\n");
2850 if (radeon_emit_packets
2851 (dev_priv, filp_priv, header, &cmdbuf)) {
2852 DRM_ERROR("radeon_emit_packets failed\n");
2857 case RADEON_CMD_SCALARS:
2858 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2859 if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
2860 DRM_ERROR("radeon_emit_scalars failed\n");
2865 case RADEON_CMD_VECTORS:
2866 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2867 if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
2868 DRM_ERROR("radeon_emit_vectors failed\n");
2873 case RADEON_CMD_DMA_DISCARD:
2874 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2875 idx = header.dma.buf_idx;
2876 if (idx < 0 || idx >= dma->buf_count) {
2877 DRM_ERROR("buffer index %d (of %d max)\n",
2878 idx, dma->buf_count - 1);
2882 buf = dma->buflist[idx];
2883 if (buf->filp != filp || buf->pending) {
2884 DRM_ERROR("bad buffer %p %p %d\n",
2885 buf->filp, filp, buf->pending);
2889 radeon_cp_discard_buffer(dev, buf);
2892 case RADEON_CMD_PACKET3:
2893 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2894 if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
2895 DRM_ERROR("radeon_emit_packet3 failed\n");
2900 case RADEON_CMD_PACKET3_CLIP:
2901 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2902 if (radeon_emit_packet3_cliprect
2903 (dev, filp_priv, &cmdbuf, orig_nbox)) {
2904 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2909 case RADEON_CMD_SCALARS2:
2910 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2911 if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
2912 DRM_ERROR("radeon_emit_scalars2 failed\n");
2917 case RADEON_CMD_WAIT:
2918 DRM_DEBUG("RADEON_CMD_WAIT\n");
2919 if (radeon_emit_wait(dev, header.wait.flags)) {
2920 DRM_ERROR("radeon_emit_wait failed\n");
2925 DRM_ERROR("bad cmd_type %d at %p\n",
2926 header.header.cmd_type,
2927 cmdbuf.buf - sizeof(header));
2932 if (orig_bufsz != 0)
2933 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2935 DRM_DEBUG("DONE\n");
2940 if (orig_bufsz != 0)
2941 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2942 return DRM_ERR(EINVAL);
2945 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
2948 drm_radeon_private_t *dev_priv = dev->dev_private;
2949 drm_radeon_getparam_t param;
2953 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
2954 return DRM_ERR(EINVAL);
2957 DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
2960 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
2962 switch (param.param) {
2963 case RADEON_PARAM_GART_BUFFER_OFFSET:
2964 value = dev_priv->gart_buffers_offset;
2966 case RADEON_PARAM_LAST_FRAME:
2967 dev_priv->stats.last_frame_reads++;
2968 value = GET_SCRATCH(0);
2970 case RADEON_PARAM_LAST_DISPATCH:
2971 value = GET_SCRATCH(1);
2973 case RADEON_PARAM_LAST_CLEAR:
2974 dev_priv->stats.last_clear_reads++;
2975 value = GET_SCRATCH(2);
2977 case RADEON_PARAM_IRQ_NR:
2980 case RADEON_PARAM_GART_BASE:
2981 value = dev_priv->gart_vm_start;
2983 case RADEON_PARAM_REGISTER_HANDLE:
2984 value = dev_priv->mmio_offset;
2986 case RADEON_PARAM_STATUS_HANDLE:
2987 value = dev_priv->ring_rptr_offset;
2989 #if BITS_PER_LONG == 32
2991 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2992 * pointer which can't fit into an int-sized variable. According to
2993 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2994 * not supporting it shouldn't be a problem. If the same functionality
2995 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2996 * so backwards-compatibility for the embedded platforms can be
2997 * maintained. --davidm 4-Feb-2004.
2999 case RADEON_PARAM_SAREA_HANDLE:
3000 /* The lock is the first dword in the sarea. */
3001 value = (long)dev->lock.hw_lock;
3004 case RADEON_PARAM_GART_TEX_HANDLE:
3005 value = dev_priv->gart_textures_offset;
3008 return DRM_ERR(EINVAL);
3011 if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
3012 DRM_ERROR("copy_to_user\n");
3013 return DRM_ERR(EFAULT);
3019 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
3022 drm_radeon_private_t *dev_priv = dev->dev_private;
3023 drm_file_t *filp_priv;
3024 drm_radeon_setparam_t sp;
3025 struct drm_radeon_driver_file_fields *radeon_priv;
3028 DRM_ERROR("%s called with no initialization\n", __FUNCTION__);
3029 return DRM_ERR(EINVAL);
3032 DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
3034 DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
3038 case RADEON_SETPARAM_FB_LOCATION:
3039 radeon_priv = filp_priv->driver_priv;
3040 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3042 case RADEON_SETPARAM_SWITCH_TILING:
3043 if (sp.value == 0) {
3044 DRM_DEBUG("color tiling disabled\n");
3045 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3046 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3047 dev_priv->sarea_priv->tiling_enabled = 0;
3048 } else if (sp.value == 1) {
3049 DRM_DEBUG("color tiling enabled\n");
3050 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3051 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3052 dev_priv->sarea_priv->tiling_enabled = 1;
3055 case RADEON_SETPARAM_PCIGART_LOCATION:
3056 dev_priv->pcigart_offset = sp.value;
3059 DRM_DEBUG("Invalid parameter %d\n", sp.param);
3060 return DRM_ERR(EINVAL);
3066 /* When a client dies:
3067 * - Check for and clean up flipped page state
3068 * - Free any alloced GART memory.
3070 * DRM infrastructure takes care of reclaiming dma buffers.
3072 void radeon_driver_prerelease(drm_device_t * dev, DRMFILE filp)
3074 if (dev->dev_private) {
3075 drm_radeon_private_t *dev_priv = dev->dev_private;
3076 if (dev_priv->page_flipping) {
3077 radeon_do_cleanup_pageflip(dev);
3079 radeon_mem_release(filp, dev_priv->gart_heap);
3080 radeon_mem_release(filp, dev_priv->fb_heap);
3081 radeon_surfaces_release(filp, dev_priv);
3085 void radeon_driver_pretakedown(drm_device_t * dev)
3087 radeon_do_release(dev);
3090 int radeon_driver_open_helper(drm_device_t * dev, drm_file_t * filp_priv)
3092 drm_radeon_private_t *dev_priv = dev->dev_private;
3093 struct drm_radeon_driver_file_fields *radeon_priv;
3096 (struct drm_radeon_driver_file_fields *)
3097 drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3102 filp_priv->driver_priv = radeon_priv;
3104 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3106 radeon_priv->radeon_fb_delta = 0;
3110 void radeon_driver_free_filp_priv(drm_device_t * dev, drm_file_t * filp_priv)
3112 struct drm_radeon_driver_file_fields *radeon_priv =
3113 filp_priv->driver_priv;
3115 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3118 drm_ioctl_desc_t radeon_ioctls[] = {
3119 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, 1, 1},
3120 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, 1, 1},
3121 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, 1, 1},
3122 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, 1, 1},
3123 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, 1, 0},
3124 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, 1, 0},
3125 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, 1, 0},
3126 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, 1, 0},
3127 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, 1, 0},
3128 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, 1, 0},
3129 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, 1, 0},
3130 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, 1, 0},
3131 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, 1, 0},
3132 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, 1, 0},
3133 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, 1, 1},
3134 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, 1, 0},
3135 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, 1, 0},
3136 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, 1, 0},
3137 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, 1, 0},
3138 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, 1, 0},
3139 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, 1, 0},
3140 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, 1, 1},
3141 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, 1, 0},
3142 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, 1, 0},
3143 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, 1, 0},
3144 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, 1, 0},
3145 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, 1, 0}
3148 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);