1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
41 drm_file_t *filp_priv,
44 struct drm_radeon_driver_file_fields *radeon_priv;
46 if ( off >= dev_priv->fb_location &&
47 off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
50 radeon_priv = filp_priv->driver_priv;
51 off += radeon_priv->radeon_fb_delta;
53 DRM_DEBUG( "offset fixed up to 0x%x\n", off );
55 if ( off < dev_priv->fb_location ||
56 off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57 return DRM_ERR( EINVAL );
64 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
65 drm_file_t *filp_priv,
70 case RADEON_EMIT_PP_MISC:
71 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
72 &data[( RADEON_RB3D_DEPTHOFFSET
73 - RADEON_PP_MISC ) / 4] ) ) {
74 DRM_ERROR( "Invalid depth buffer offset\n" );
75 return DRM_ERR( EINVAL );
79 case RADEON_EMIT_PP_CNTL:
80 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
81 &data[( RADEON_RB3D_COLOROFFSET
82 - RADEON_PP_CNTL ) / 4] ) ) {
83 DRM_ERROR( "Invalid colour buffer offset\n" );
84 return DRM_ERR( EINVAL );
88 case R200_EMIT_PP_TXOFFSET_0:
89 case R200_EMIT_PP_TXOFFSET_1:
90 case R200_EMIT_PP_TXOFFSET_2:
91 case R200_EMIT_PP_TXOFFSET_3:
92 case R200_EMIT_PP_TXOFFSET_4:
93 case R200_EMIT_PP_TXOFFSET_5:
94 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
96 DRM_ERROR( "Invalid R200 texture offset\n" );
97 return DRM_ERR( EINVAL );
101 case RADEON_EMIT_PP_TXFILTER_0:
102 case RADEON_EMIT_PP_TXFILTER_1:
103 case RADEON_EMIT_PP_TXFILTER_2:
104 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
105 &data[( RADEON_PP_TXOFFSET_0
106 - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
107 DRM_ERROR( "Invalid R100 texture offset\n" );
108 return DRM_ERR( EINVAL );
112 case R200_EMIT_PP_CUBIC_OFFSETS_0:
113 case R200_EMIT_PP_CUBIC_OFFSETS_1:
114 case R200_EMIT_PP_CUBIC_OFFSETS_2:
115 case R200_EMIT_PP_CUBIC_OFFSETS_3:
116 case R200_EMIT_PP_CUBIC_OFFSETS_4:
117 case R200_EMIT_PP_CUBIC_OFFSETS_5: {
119 for ( i = 0; i < 5; i++ ) {
120 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
122 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
123 return DRM_ERR( EINVAL );
129 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
130 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
131 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
133 for (i = 0; i < 5; i++) {
134 if (radeon_check_and_fixup_offset(dev_priv,
138 ("Invalid R100 cubic texture offset\n");
139 return DRM_ERR(EINVAL);
145 case RADEON_EMIT_RB3D_COLORPITCH:
146 case RADEON_EMIT_RE_LINE_PATTERN:
147 case RADEON_EMIT_SE_LINE_WIDTH:
148 case RADEON_EMIT_PP_LUM_MATRIX:
149 case RADEON_EMIT_PP_ROT_MATRIX_0:
150 case RADEON_EMIT_RB3D_STENCILREFMASK:
151 case RADEON_EMIT_SE_VPORT_XSCALE:
152 case RADEON_EMIT_SE_CNTL:
153 case RADEON_EMIT_SE_CNTL_STATUS:
154 case RADEON_EMIT_RE_MISC:
155 case RADEON_EMIT_PP_BORDER_COLOR_0:
156 case RADEON_EMIT_PP_BORDER_COLOR_1:
157 case RADEON_EMIT_PP_BORDER_COLOR_2:
158 case RADEON_EMIT_SE_ZBIAS_FACTOR:
159 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161 case R200_EMIT_PP_TXCBLEND_0:
162 case R200_EMIT_PP_TXCBLEND_1:
163 case R200_EMIT_PP_TXCBLEND_2:
164 case R200_EMIT_PP_TXCBLEND_3:
165 case R200_EMIT_PP_TXCBLEND_4:
166 case R200_EMIT_PP_TXCBLEND_5:
167 case R200_EMIT_PP_TXCBLEND_6:
168 case R200_EMIT_PP_TXCBLEND_7:
169 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170 case R200_EMIT_TFACTOR_0:
171 case R200_EMIT_VTX_FMT_0:
172 case R200_EMIT_VAP_CTL:
173 case R200_EMIT_MATRIX_SELECT_0:
174 case R200_EMIT_TEX_PROC_CTL_2:
175 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176 case R200_EMIT_PP_TXFILTER_0:
177 case R200_EMIT_PP_TXFILTER_1:
178 case R200_EMIT_PP_TXFILTER_2:
179 case R200_EMIT_PP_TXFILTER_3:
180 case R200_EMIT_PP_TXFILTER_4:
181 case R200_EMIT_PP_TXFILTER_5:
182 case R200_EMIT_VTE_CNTL:
183 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184 case R200_EMIT_PP_TAM_DEBUG3:
185 case R200_EMIT_PP_CNTL_X:
186 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188 case R200_EMIT_RE_SCISSOR_TL_0:
189 case R200_EMIT_RE_SCISSOR_TL_1:
190 case R200_EMIT_RE_SCISSOR_TL_2:
191 case R200_EMIT_SE_VAP_CNTL_STATUS:
192 case R200_EMIT_SE_VTX_STATE_CNTL:
193 case R200_EMIT_RE_POINTSIZE:
194 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195 case R200_EMIT_PP_CUBIC_FACES_0:
196 case R200_EMIT_PP_CUBIC_FACES_1:
197 case R200_EMIT_PP_CUBIC_FACES_2:
198 case R200_EMIT_PP_CUBIC_FACES_3:
199 case R200_EMIT_PP_CUBIC_FACES_4:
200 case R200_EMIT_PP_CUBIC_FACES_5:
201 case RADEON_EMIT_PP_TEX_SIZE_0:
202 case RADEON_EMIT_PP_TEX_SIZE_1:
203 case RADEON_EMIT_PP_TEX_SIZE_2:
204 case R200_EMIT_RB3D_BLENDCOLOR:
205 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
206 case RADEON_EMIT_PP_CUBIC_FACES_0:
207 case RADEON_EMIT_PP_CUBIC_FACES_1:
208 case RADEON_EMIT_PP_CUBIC_FACES_2:
209 case R200_EMIT_PP_TRI_PERF_CNTL:
210 /* These packets don't contain memory offsets */
214 DRM_ERROR( "Unknown state packet ID %d\n", id );
215 return DRM_ERR( EINVAL );
221 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
222 drm_file_t *filp_priv,
223 drm_radeon_cmd_buffer_t *cmdbuf,
224 unsigned int *cmdsz ) {
225 u32 *cmd = (u32 *) cmdbuf->buf;
227 *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
229 if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230 DRM_ERROR( "Not a type 3 packet\n" );
231 return DRM_ERR( EINVAL );
234 if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235 DRM_ERROR( "Packet size larger than size of data provided\n" );
236 return DRM_ERR( EINVAL );
239 /* Check client state and fix it up if necessary */
240 if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
243 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244 | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245 offset = cmd[2] << 10;
246 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247 DRM_ERROR( "Invalid first packet offset\n" );
248 return DRM_ERR( EINVAL );
250 cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
253 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254 ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255 offset = cmd[3] << 10;
256 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257 DRM_ERROR( "Invalid second packet offset\n" );
258 return DRM_ERR( EINVAL );
260 cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
268 /* ================================================================
269 * CP hardware state programming functions
272 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
273 drm_clip_rect_t *box )
277 DRM_DEBUG( " box: x1=%d y1=%d x2=%d y2=%d\n",
278 box->x1, box->y1, box->x2, box->y2 );
281 OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
282 OUT_RING( (box->y1 << 16) | box->x1 );
283 OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
284 OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
290 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
291 drm_file_t *filp_priv,
292 drm_radeon_context_regs_t *ctx,
293 drm_radeon_texture_regs_t *tex,
297 DRM_DEBUG( "dirty=0x%08x\n", dirty );
299 if ( dirty & RADEON_UPLOAD_CONTEXT ) {
300 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
301 &ctx->rb3d_depthoffset ) ) {
302 DRM_ERROR( "Invalid depth buffer offset\n" );
303 return DRM_ERR( EINVAL );
306 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
307 &ctx->rb3d_coloroffset ) ) {
308 DRM_ERROR( "Invalid depth buffer offset\n" );
309 return DRM_ERR( EINVAL );
313 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
314 OUT_RING( ctx->pp_misc );
315 OUT_RING( ctx->pp_fog_color );
316 OUT_RING( ctx->re_solid_color );
317 OUT_RING( ctx->rb3d_blendcntl );
318 OUT_RING( ctx->rb3d_depthoffset );
319 OUT_RING( ctx->rb3d_depthpitch );
320 OUT_RING( ctx->rb3d_zstencilcntl );
321 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
322 OUT_RING( ctx->pp_cntl );
323 OUT_RING( ctx->rb3d_cntl );
324 OUT_RING( ctx->rb3d_coloroffset );
325 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
326 OUT_RING( ctx->rb3d_colorpitch );
330 if ( dirty & RADEON_UPLOAD_VERTFMT ) {
332 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
333 OUT_RING( ctx->se_coord_fmt );
337 if ( dirty & RADEON_UPLOAD_LINE ) {
339 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
340 OUT_RING( ctx->re_line_pattern );
341 OUT_RING( ctx->re_line_state );
342 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
343 OUT_RING( ctx->se_line_width );
347 if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
349 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
350 OUT_RING( ctx->pp_lum_matrix );
351 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
352 OUT_RING( ctx->pp_rot_matrix_0 );
353 OUT_RING( ctx->pp_rot_matrix_1 );
357 if ( dirty & RADEON_UPLOAD_MASKS ) {
359 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
360 OUT_RING( ctx->rb3d_stencilrefmask );
361 OUT_RING( ctx->rb3d_ropcntl );
362 OUT_RING( ctx->rb3d_planemask );
366 if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
368 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
369 OUT_RING( ctx->se_vport_xscale );
370 OUT_RING( ctx->se_vport_xoffset );
371 OUT_RING( ctx->se_vport_yscale );
372 OUT_RING( ctx->se_vport_yoffset );
373 OUT_RING( ctx->se_vport_zscale );
374 OUT_RING( ctx->se_vport_zoffset );
378 if ( dirty & RADEON_UPLOAD_SETUP ) {
380 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
381 OUT_RING( ctx->se_cntl );
382 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
383 OUT_RING( ctx->se_cntl_status );
387 if ( dirty & RADEON_UPLOAD_MISC ) {
389 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
390 OUT_RING( ctx->re_misc );
394 if ( dirty & RADEON_UPLOAD_TEX0 ) {
395 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
396 &tex[0].pp_txoffset ) ) {
397 DRM_ERROR( "Invalid texture offset for unit 0\n" );
398 return DRM_ERR( EINVAL );
402 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
403 OUT_RING( tex[0].pp_txfilter );
404 OUT_RING( tex[0].pp_txformat );
405 OUT_RING( tex[0].pp_txoffset );
406 OUT_RING( tex[0].pp_txcblend );
407 OUT_RING( tex[0].pp_txablend );
408 OUT_RING( tex[0].pp_tfactor );
409 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
410 OUT_RING( tex[0].pp_border_color );
414 if ( dirty & RADEON_UPLOAD_TEX1 ) {
415 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
416 &tex[1].pp_txoffset ) ) {
417 DRM_ERROR( "Invalid texture offset for unit 1\n" );
418 return DRM_ERR( EINVAL );
422 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
423 OUT_RING( tex[1].pp_txfilter );
424 OUT_RING( tex[1].pp_txformat );
425 OUT_RING( tex[1].pp_txoffset );
426 OUT_RING( tex[1].pp_txcblend );
427 OUT_RING( tex[1].pp_txablend );
428 OUT_RING( tex[1].pp_tfactor );
429 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
430 OUT_RING( tex[1].pp_border_color );
434 if ( dirty & RADEON_UPLOAD_TEX2 ) {
435 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
436 &tex[2].pp_txoffset ) ) {
437 DRM_ERROR( "Invalid texture offset for unit 2\n" );
438 return DRM_ERR( EINVAL );
442 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
443 OUT_RING( tex[2].pp_txfilter );
444 OUT_RING( tex[2].pp_txformat );
445 OUT_RING( tex[2].pp_txoffset );
446 OUT_RING( tex[2].pp_txcblend );
447 OUT_RING( tex[2].pp_txablend );
448 OUT_RING( tex[2].pp_tfactor );
449 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
450 OUT_RING( tex[2].pp_border_color );
459 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
460 drm_file_t *filp_priv,
461 drm_radeon_state_t *state )
465 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
467 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
468 OUT_RING( state->context2.se_zbias_factor );
469 OUT_RING( state->context2.se_zbias_constant );
473 return radeon_emit_state( dev_priv, filp_priv, &state->context,
474 state->tex, state->dirty );
477 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
478 * 1.3 cmdbuffers allow all previous state to be updated as well as
479 * the tcl scalar and vector areas.
485 } packet[RADEON_MAX_STATE_PACKETS] = {
486 { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
487 { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
488 { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
489 { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
490 { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
491 { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
492 { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
493 { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
494 { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
495 { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
496 { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
497 { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
498 { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
499 { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
500 { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
501 { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
502 { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
503 { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
504 { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
505 { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
506 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
507 { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
508 { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
509 { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
510 { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
511 { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
512 { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
513 { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
514 { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
515 { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
516 { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
517 { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
518 { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
519 { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
520 { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
521 { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
522 { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
523 { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
524 { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
525 { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
526 { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
527 { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
528 { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
529 { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
530 { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
531 { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
532 { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
533 { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
534 { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
535 { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
536 { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
537 { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },
538 { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },
539 { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },
540 { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },
541 { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },
542 { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },
543 { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },
544 { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },
545 { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },
546 { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
547 { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
548 { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
549 { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
550 { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
551 { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
552 { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
553 { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
554 { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
555 { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
556 { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
557 { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
558 { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
559 { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
560 { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
561 { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
562 { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
563 { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
564 { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
565 { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
566 { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
567 { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
568 { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
569 { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
570 { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
575 /* ================================================================
576 * Performance monitoring functions
579 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
580 int x, int y, int w, int h,
581 int r, int g, int b )
586 x += dev_priv->sarea_priv->boxes[0].x1;
587 y += dev_priv->sarea_priv->boxes[0].y1;
589 switch ( dev_priv->color_fmt ) {
590 case RADEON_COLOR_FORMAT_RGB565:
591 color = (((r & 0xf8) << 8) |
595 case RADEON_COLOR_FORMAT_ARGB8888:
597 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
602 RADEON_WAIT_UNTIL_3D_IDLE();
603 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
604 OUT_RING( 0xffffffff );
609 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
610 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
611 RADEON_GMC_BRUSH_SOLID_COLOR |
612 (dev_priv->color_fmt << 8) |
613 RADEON_GMC_SRC_DATATYPE_COLOR |
615 RADEON_GMC_CLR_CMP_CNTL_DIS );
617 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
618 OUT_RING( dev_priv->front_pitch_offset );
620 OUT_RING( dev_priv->back_pitch_offset );
625 OUT_RING( (x << 16) | y );
626 OUT_RING( (w << 16) | h );
631 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
633 /* Collapse various things into a wait flag -- trying to
634 * guess if userspase slept -- better just to have them tell us.
636 if (dev_priv->stats.last_frame_reads > 1 ||
637 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
638 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
641 if (dev_priv->stats.freelist_loops) {
642 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
645 /* Purple box for page flipping
647 if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
648 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
650 /* Red box if we have to wait for idle at any point
652 if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )
653 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
655 /* Blue box: lost context?
658 /* Yellow box for texture swaps
660 if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )
661 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
663 /* Green box if hardware never idles (as far as we can tell)
665 if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )
666 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
669 /* Draw bars indicating number of buffers allocated
670 * (not a great measure, easily confused)
672 if (dev_priv->stats.requested_bufs) {
673 if (dev_priv->stats.requested_bufs > 100)
674 dev_priv->stats.requested_bufs = 100;
676 radeon_clear_box( dev_priv, 4, 16,
677 dev_priv->stats.requested_bufs, 4,
681 memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
684 /* ================================================================
685 * CP command dispatch functions
688 static void radeon_cp_dispatch_clear( drm_device_t *dev,
689 drm_radeon_clear_t *clear,
690 drm_radeon_clear_rect_t *depth_boxes )
692 drm_radeon_private_t *dev_priv = dev->dev_private;
693 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
694 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
695 int nbox = sarea_priv->nbox;
696 drm_clip_rect_t *pbox = sarea_priv->boxes;
697 unsigned int flags = clear->flags;
698 u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
701 DRM_DEBUG( "flags = 0x%x\n", flags );
703 dev_priv->stats.clears++;
705 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
706 unsigned int tmp = flags;
708 flags &= ~(RADEON_FRONT | RADEON_BACK);
709 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
710 if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT;
713 if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
717 /* Ensure the 3D stream is idle before doing a
718 * 2D fill to clear the front or back buffer.
720 RADEON_WAIT_UNTIL_3D_IDLE();
722 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
723 OUT_RING( clear->color_mask );
727 /* Make sure we restore the 3D state next time.
729 dev_priv->sarea_priv->ctx_owner = 0;
731 for ( i = 0 ; i < nbox ; i++ ) {
734 int w = pbox[i].x2 - x;
735 int h = pbox[i].y2 - y;
737 DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
740 if ( flags & RADEON_FRONT ) {
743 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
744 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
745 RADEON_GMC_BRUSH_SOLID_COLOR |
746 (dev_priv->color_fmt << 8) |
747 RADEON_GMC_SRC_DATATYPE_COLOR |
749 RADEON_GMC_CLR_CMP_CNTL_DIS );
751 OUT_RING( dev_priv->front_pitch_offset );
752 OUT_RING( clear->clear_color );
754 OUT_RING( (x << 16) | y );
755 OUT_RING( (w << 16) | h );
760 if ( flags & RADEON_BACK ) {
763 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
764 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
765 RADEON_GMC_BRUSH_SOLID_COLOR |
766 (dev_priv->color_fmt << 8) |
767 RADEON_GMC_SRC_DATATYPE_COLOR |
769 RADEON_GMC_CLR_CMP_CNTL_DIS );
771 OUT_RING( dev_priv->back_pitch_offset );
772 OUT_RING( clear->clear_color );
774 OUT_RING( (x << 16) | y );
775 OUT_RING( (w << 16) | h );
783 /* no docs available, based on reverse engeneering by Stephane Marchesin */
784 if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
787 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
788 (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
792 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
793 ((clear->depth_mask & 0xff) << 24);
796 /* Make sure we restore the 3D state next time.
797 * we haven't touched any "normal" state - still need this?
799 dev_priv->sarea_priv->ctx_owner = 0;
801 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
802 /* FIXME : reverse engineer that for Rx00 cards */
803 /* FIXME : the mask supposedly contains low-res z values. So can't set
804 just to the max (0xff? or actually 0x3fff?), need to take z clear
805 value into account? */
806 /* pattern seems to work for r100, though get slight
807 rendering errors with glxgears. If hierz is not enabled for r100,
808 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
809 other ones are ignored, and the same clear mask can be used. That's
810 very different behaviour than R200 which needs different clear mask
811 and different number of tiles to clear if hierz is enabled or not !?!
813 clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
816 /* clear mask : chooses the clearing pattern.
817 rv250: could be used to clear only parts of macrotiles
818 (but that would get really complicated...)?
819 bit 0 and 1 (either or both of them ?!?!) are used to
820 not clear tile (or maybe one of the bits indicates if the tile is
821 compressed or not), bit 2 and 3 to not clear tile 1,...,.
822 Pattern is as follows:
823 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
824 bits -------------------------------------------------
825 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
826 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
827 covers 256 pixels ?!?
833 RADEON_WAIT_UNTIL_2D_IDLE();
834 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
835 tempRB3D_DEPTHCLEARVALUE);
836 /* what offset is this exactly ? */
837 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
838 /* need ctlstat, otherwise get some strange black flickering */
839 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
842 for (i = 0; i < nbox; i++) {
843 int tileoffset, nrtilesx, nrtilesy, j;
844 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
845 if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
846 /* FIXME : figure this out for r200 (when hierz is enabled). Or
847 maybe r200 actually doesn't need to put the low-res z value into
848 the tile cache like r100, but just needs to clear the hi-level z-buffer?
849 Works for R100, both with hierz and without.
850 R100 seems to operate on 2x1 8x8 tiles, but...
851 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
852 problematic with resolutions which are not 64 pix aligned? */
853 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
854 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
855 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
856 for (j = 0; j <= nrtilesy; j++) {
858 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
860 OUT_RING( tileoffset * 8 );
861 /* the number of tiles to clear */
862 OUT_RING( nrtilesx + 4 );
863 /* clear mask : chooses the clearing pattern. */
864 OUT_RING( clearmask );
866 tileoffset += depthpixperline >> 6;
869 else if (dev_priv->microcode_version==UCODE_R200) {
870 /* works for rv250. */
871 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
872 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
873 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
874 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875 for (j = 0; j <= nrtilesy; j++) {
877 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
879 /* judging by the first tile offset needed, could possibly
880 directly address/clear 4x4 tiles instead of 8x2 * 4x4
881 macro tiles, though would still need clear mask for
882 right/bottom if truely 4x4 granularity is desired ? */
883 OUT_RING( tileoffset * 16 );
884 /* the number of tiles to clear */
885 OUT_RING( nrtilesx + 1 );
886 /* clear mask : chooses the clearing pattern. */
887 OUT_RING( clearmask );
889 tileoffset += depthpixperline >> 5;
893 /* rv100 might not need 64 pix alignment, who knows */
894 /* offsets are, hmm, weird */
895 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
896 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
897 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
898 for (j = 0; j <= nrtilesy; j++) {
900 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
901 OUT_RING( tileoffset * 128 );
902 /* the number of tiles to clear */
903 OUT_RING( nrtilesx + 4 );
904 /* clear mask : chooses the clearing pattern. */
905 OUT_RING( clearmask );
907 tileoffset += depthpixperline >> 6;
912 /* TODO don't always clear all hi-level z tiles */
913 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
914 && (flags & RADEON_USE_HIERZ))
915 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
916 /* FIXME : the mask supposedly contains low-res z values. So can't set
917 just to the max (0xff? or actually 0x3fff?), need to take z clear
918 value into account? */
921 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
922 OUT_RING( 0x0 ); /* First tile */
924 OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
929 /* We have to clear the depth and/or stencil buffers by
930 * rendering a quad into just those buffers. Thus, we have to
931 * make sure the 3D engine is configured correctly.
933 if ((dev_priv->microcode_version == UCODE_R200) &&
934 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
939 int tempRB3D_ZSTENCILCNTL;
940 int tempRB3D_STENCILREFMASK;
941 int tempRB3D_PLANEMASK;
944 int tempSE_VTX_FMT_0;
945 int tempSE_VTX_FMT_1;
947 int tempRE_AUX_SCISSOR_CNTL;
952 tempRB3D_CNTL = depth_clear->rb3d_cntl;
954 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
955 tempRB3D_STENCILREFMASK = 0x0;
957 tempSE_CNTL = depth_clear->se_cntl;
963 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
964 (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
966 tempRB3D_PLANEMASK = 0x0;
968 tempRE_AUX_SCISSOR_CNTL = 0x0;
971 SE_VTE_CNTL__VTX_XY_FMT_MASK |
972 SE_VTE_CNTL__VTX_Z_FMT_MASK;
974 /* Vertex format (X, Y, Z, W)*/
976 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
977 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
978 tempSE_VTX_FMT_1 = 0x0;
982 * Depth buffer specific enables
984 if (flags & RADEON_DEPTH) {
985 /* Enable depth buffer */
986 tempRB3D_CNTL |= RADEON_Z_ENABLE;
988 /* Disable depth buffer */
989 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
993 * Stencil buffer specific enables
995 if ( flags & RADEON_STENCIL ) {
996 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
997 tempRB3D_STENCILREFMASK = clear->depth_mask;
999 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000 tempRB3D_STENCILREFMASK = 0x00000000;
1003 if (flags & RADEON_USE_COMP_ZBUF) {
1004 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005 RADEON_Z_DECOMPRESSION_ENABLE;
1007 if (flags & RADEON_USE_HIERZ) {
1008 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1012 RADEON_WAIT_UNTIL_2D_IDLE();
1014 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018 tempRB3D_ZSTENCILCNTL );
1019 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1020 tempRB3D_STENCILREFMASK );
1021 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,
1028 tempRE_AUX_SCISSOR_CNTL );
1031 /* Make sure we restore the 3D state next time.
1033 dev_priv->sarea_priv->ctx_owner = 0;
1035 for ( i = 0 ; i < nbox ; i++ ) {
1037 /* Funny that this should be required --
1040 radeon_emit_clip_rect( dev_priv,
1041 &sarea_priv->boxes[i] );
1044 OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046 RADEON_PRIM_WALK_RING |
1047 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051 OUT_RING( 0x3f800000 );
1052 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055 OUT_RING( 0x3f800000 );
1056 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059 OUT_RING( 0x3f800000 );
1063 else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1065 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1067 rb3d_cntl = depth_clear->rb3d_cntl;
1069 if ( flags & RADEON_DEPTH ) {
1070 rb3d_cntl |= RADEON_Z_ENABLE;
1072 rb3d_cntl &= ~RADEON_Z_ENABLE;
1075 if ( flags & RADEON_STENCIL ) {
1076 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1077 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1079 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080 rb3d_stencilrefmask = 0x00000000;
1083 if (flags & RADEON_USE_COMP_ZBUF) {
1084 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085 RADEON_Z_DECOMPRESSION_ENABLE;
1087 if (flags & RADEON_USE_HIERZ) {
1088 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1092 RADEON_WAIT_UNTIL_2D_IDLE();
1094 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095 OUT_RING( 0x00000000 );
1096 OUT_RING( rb3d_cntl );
1098 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100 rb3d_stencilrefmask );
1101 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1103 OUT_RING_REG( RADEON_SE_CNTL,
1104 depth_clear->se_cntl );
1107 /* Make sure we restore the 3D state next time.
1109 dev_priv->sarea_priv->ctx_owner = 0;
1111 for ( i = 0 ; i < nbox ; i++ ) {
1113 /* Funny that this should be required --
1116 radeon_emit_clip_rect( dev_priv,
1117 &sarea_priv->boxes[i] );
1121 OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122 OUT_RING( RADEON_VTX_Z_PRESENT |
1123 RADEON_VTX_PKCOLOR_PRESENT);
1124 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125 RADEON_PRIM_WALK_RING |
1126 RADEON_MAOS_ENABLE |
1127 RADEON_VTX_FMT_RADEON_MODE |
1128 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1131 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1136 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1141 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1150 /* Increment the clear counter. The client-side 3D driver must
1151 * wait on this value before performing the clear ioctl. We
1152 * need this because the card's so damned fast...
1154 dev_priv->sarea_priv->last_clear++;
1158 RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159 RADEON_WAIT_UNTIL_IDLE();
1164 static void radeon_cp_dispatch_swap( drm_device_t *dev )
1166 drm_radeon_private_t *dev_priv = dev->dev_private;
1167 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168 int nbox = sarea_priv->nbox;
1169 drm_clip_rect_t *pbox = sarea_priv->boxes;
1174 /* Do some trivial performance monitoring...
1176 if (dev_priv->do_boxes)
1177 radeon_cp_performance_boxes( dev_priv );
1180 /* Wait for the 3D stream to idle before dispatching the bitblt.
1181 * This will prevent data corruption between the two streams.
1185 RADEON_WAIT_UNTIL_3D_IDLE();
1189 for ( i = 0 ; i < nbox ; i++ ) {
1192 int w = pbox[i].x2 - x;
1193 int h = pbox[i].y2 - y;
1195 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1200 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203 RADEON_GMC_BRUSH_NONE |
1204 (dev_priv->color_fmt << 8) |
1205 RADEON_GMC_SRC_DATATYPE_COLOR |
1207 RADEON_DP_SRC_SOURCE_MEMORY |
1208 RADEON_GMC_CLR_CMP_CNTL_DIS |
1209 RADEON_GMC_WR_MSK_DIS );
1211 /* Make this work even if front & back are flipped:
1213 if (dev_priv->current_page == 0) {
1214 OUT_RING( dev_priv->back_pitch_offset );
1215 OUT_RING( dev_priv->front_pitch_offset );
1218 OUT_RING( dev_priv->front_pitch_offset );
1219 OUT_RING( dev_priv->back_pitch_offset );
1222 OUT_RING( (x << 16) | y );
1223 OUT_RING( (x << 16) | y );
1224 OUT_RING( (w << 16) | h );
1229 /* Increment the frame counter. The client-side 3D driver must
1230 * throttle the framerate by waiting for this value before
1231 * performing the swapbuffer ioctl.
1233 dev_priv->sarea_priv->last_frame++;
1237 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238 RADEON_WAIT_UNTIL_2D_IDLE();
1243 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1245 drm_radeon_private_t *dev_priv = dev->dev_private;
1246 drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247 int offset = (dev_priv->current_page == 1)
1248 ? dev_priv->front_offset : dev_priv->back_offset;
1250 DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",
1252 dev_priv->current_page,
1253 dev_priv->sarea_priv->pfCurrentPage);
1255 /* Do some trivial performance monitoring...
1257 if (dev_priv->do_boxes) {
1258 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259 radeon_cp_performance_boxes( dev_priv );
1262 /* Update the frame offsets for both CRTCs
1266 RADEON_WAIT_UNTIL_3D_IDLE();
1267 OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1269 * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1271 OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1276 /* Increment the frame counter. The client-side 3D driver must
1277 * throttle the framerate by waiting for this value before
1278 * performing the swapbuffer ioctl.
1280 dev_priv->sarea_priv->last_frame++;
1281 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282 1 - dev_priv->current_page;
1286 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1291 static int bad_prim_vertex_nr( int primitive, int nr )
1293 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294 case RADEON_PRIM_TYPE_NONE:
1295 case RADEON_PRIM_TYPE_POINT:
1297 case RADEON_PRIM_TYPE_LINE:
1298 return (nr & 1) || nr == 0;
1299 case RADEON_PRIM_TYPE_LINE_STRIP:
1301 case RADEON_PRIM_TYPE_TRI_LIST:
1302 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304 case RADEON_PRIM_TYPE_RECT_LIST:
1305 return nr % 3 || nr == 0;
1306 case RADEON_PRIM_TYPE_TRI_FAN:
1307 case RADEON_PRIM_TYPE_TRI_STRIP:
1318 unsigned int finish;
1320 unsigned int numverts;
1321 unsigned int offset;
1322 unsigned int vc_format;
1323 } drm_radeon_tcl_prim_t;
1325 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1327 drm_radeon_tcl_prim_t *prim )
1330 drm_radeon_private_t *dev_priv = dev->dev_private;
1331 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333 int numverts = (int)prim->numverts;
1334 int nbox = sarea_priv->nbox;
1338 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1345 if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346 DRM_ERROR( "bad prim %x numverts %d\n",
1347 prim->prim, prim->numverts );
1352 /* Emit the next cliprect */
1354 radeon_emit_clip_rect( dev_priv,
1355 &sarea_priv->boxes[i] );
1358 /* Emit the vertex buffer rendering commands */
1361 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1363 OUT_RING( numverts );
1364 OUT_RING( prim->vc_format );
1365 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366 RADEON_COLOR_ORDER_RGBA |
1367 RADEON_VTX_FMT_RADEON_MODE |
1368 (numverts << RADEON_NUM_VERTICES_SHIFT) );
1373 } while ( i < nbox );
1378 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1380 drm_radeon_private_t *dev_priv = dev->dev_private;
1381 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1384 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1386 /* Emit the vertex buffer age */
1388 RADEON_DISPATCH_AGE( buf_priv->age );
1395 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1397 int start, int end )
1399 drm_radeon_private_t *dev_priv = dev->dev_private;
1401 DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402 buf->idx, start, end );
1404 if ( start != end ) {
1405 int offset = (dev_priv->gart_buffers_offset
1406 + buf->offset + start);
1407 int dwords = (end - start + 3) / sizeof(u32);
1409 /* Indirect buffer data must be an even number of
1410 * dwords, so if we've been given an odd number we must
1411 * pad the data with a Type-2 CP packet.
1415 ((char *)dev->agp_buffer_map->handle
1416 + buf->offset + start);
1417 data[dwords++] = RADEON_CP_PACKET2;
1420 /* Fire off the indirect buffer */
1423 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1432 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1434 drm_radeon_tcl_prim_t *prim )
1436 drm_radeon_private_t *dev_priv = dev->dev_private;
1437 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438 int offset = dev_priv->gart_buffers_offset + prim->offset;
1442 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443 int count = (prim->finish - start) / sizeof(u16);
1444 int nbox = sarea_priv->nbox;
1446 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1454 if (bad_prim_vertex_nr( prim->prim, count )) {
1455 DRM_ERROR( "bad prim %x count %d\n",
1456 prim->prim, count );
1461 if ( start >= prim->finish ||
1462 (prim->start & 0x7) ) {
1463 DRM_ERROR( "buffer prim %d\n", prim->prim );
1467 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1469 data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470 elt_buf->offset + prim->start);
1472 data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1474 data[2] = prim->numverts;
1475 data[3] = prim->vc_format;
1476 data[4] = (prim->prim |
1477 RADEON_PRIM_WALK_IND |
1478 RADEON_COLOR_ORDER_RGBA |
1479 RADEON_VTX_FMT_RADEON_MODE |
1480 (count << RADEON_NUM_VERTICES_SHIFT) );
1484 radeon_emit_clip_rect( dev_priv,
1485 &sarea_priv->boxes[i] );
1487 radeon_cp_dispatch_indirect( dev, elt_buf,
1492 } while ( i < nbox );
1496 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1498 static int radeon_cp_dispatch_texture( DRMFILE filp,
1500 drm_radeon_texture_t *tex,
1501 drm_radeon_tex_image_t *image )
1503 drm_radeon_private_t *dev_priv = dev->dev_private;
1504 drm_file_t *filp_priv;
1508 const u8 __user *data;
1509 int size, dwords, tex_width, blit_width, spitch;
1512 u32 texpitch, microtile;
1516 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1518 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1519 DRM_ERROR( "Invalid destination offset\n" );
1520 return DRM_ERR( EINVAL );
1523 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1525 /* Flush the pixel cache. This ensures no pixel data gets mixed
1526 * up with the texture data from the host data blit, otherwise
1527 * part of the texture image may be corrupted.
1530 RADEON_FLUSH_CACHE();
1531 RADEON_WAIT_UNTIL_IDLE();
1534 /* The compiler won't optimize away a division by a variable,
1535 * even if the only legal values are powers of two. Thus, we'll
1536 * use a shift instead.
1538 switch ( tex->format ) {
1539 case RADEON_TXFORMAT_ARGB8888:
1540 case RADEON_TXFORMAT_RGBA8888:
1541 format = RADEON_COLOR_FORMAT_ARGB8888;
1542 tex_width = tex->width * 4;
1543 blit_width = image->width * 4;
1545 case RADEON_TXFORMAT_AI88:
1546 case RADEON_TXFORMAT_ARGB1555:
1547 case RADEON_TXFORMAT_RGB565:
1548 case RADEON_TXFORMAT_ARGB4444:
1549 case RADEON_TXFORMAT_VYUY422:
1550 case RADEON_TXFORMAT_YVYU422:
1551 format = RADEON_COLOR_FORMAT_RGB565;
1552 tex_width = tex->width * 2;
1553 blit_width = image->width * 2;
1555 case RADEON_TXFORMAT_I8:
1556 case RADEON_TXFORMAT_RGB332:
1557 format = RADEON_COLOR_FORMAT_CI8;
1558 tex_width = tex->width * 1;
1559 blit_width = image->width * 1;
1562 DRM_ERROR( "invalid texture format %d\n", tex->format );
1563 return DRM_ERR(EINVAL);
1565 spitch = blit_width >> 6;
1566 if (spitch == 0 && image->height > 1)
1567 return DRM_ERR(EINVAL);
1569 texpitch = tex->pitch;
1570 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1572 if (tex_width < 64) {
1573 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1574 /* we got tiled coordinates, untile them */
1580 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1583 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1584 tex->offset >> 10, tex->pitch, tex->format,
1585 image->x, image->y, image->width, image->height );
1587 /* Make a copy of some parameters in case we have to
1588 * update them for a multi-pass texture blit.
1590 height = image->height;
1591 data = (const u8 __user *)image->data;
1593 size = height * blit_width;
1595 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1596 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1597 size = height * blit_width;
1598 } else if ( size < 4 && size > 0 ) {
1600 } else if ( size == 0 ) {
1604 buf = radeon_freelist_get( dev );
1606 radeon_do_cp_idle( dev_priv );
1607 buf = radeon_freelist_get( dev );
1610 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1611 if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1612 return DRM_ERR(EFAULT);
1613 return DRM_ERR(EAGAIN);
1617 /* Dispatch the indirect buffer.
1619 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1623 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1624 however, we cannot use blitter directly for texture width < 64 bytes,
1625 since minimum tex pitch is 64 bytes and we need this to match
1626 the texture width, otherwise the blitter will tile it wrong.
1627 Thus, tiling manually in this case. Additionally, need to special
1628 case tex height = 1, since our actual image will have height 2
1629 and we need to ensure we don't read beyond the texture size
1631 if (tex->height == 1) {
1632 if (tex_width >= 64 || tex_width <= 16) {
1633 if (DRM_COPY_FROM_USER(buffer, data,
1634 tex_width * sizeof(u32))) {
1635 DRM_ERROR("EFAULT on pad, %d bytes\n",
1637 return DRM_ERR(EFAULT);
1639 } else if (tex_width == 32) {
1640 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1641 DRM_ERROR("EFAULT on pad, %d bytes\n",
1643 return DRM_ERR(EFAULT);
1645 if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1646 DRM_ERROR("EFAULT on pad, %d bytes\n",
1648 return DRM_ERR(EFAULT);
1651 } else if (tex_width >= 64 || tex_width == 16) {
1652 if (DRM_COPY_FROM_USER(buffer, data,
1653 dwords * sizeof(u32))) {
1654 DRM_ERROR("EFAULT on data, %d dwords\n",
1656 return DRM_ERR(EFAULT);
1658 } else if (tex_width < 16) {
1659 for (i = 0; i < tex->height; i++) {
1660 if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1661 DRM_ERROR("EFAULT on pad, %d bytes\n",
1663 return DRM_ERR(EFAULT);
1668 } else if (tex_width == 32) {
1669 /* TODO: make sure this works when not fitting in one buffer
1670 (i.e. 32bytes x 2048...) */
1671 for (i = 0; i < tex->height; i += 2) {
1672 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1673 DRM_ERROR("EFAULT on pad, %d bytes\n",
1675 return DRM_ERR(EFAULT);
1678 if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1679 DRM_ERROR("EFAULT on pad, %d bytes\n",
1681 return DRM_ERR(EFAULT);
1684 if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1685 DRM_ERROR("EFAULT on pad, %d bytes\n",
1687 return DRM_ERR(EFAULT);
1690 if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1691 DRM_ERROR("EFAULT on pad, %d bytes\n",
1693 return DRM_ERR(EFAULT);
1701 if (tex_width >= 32) {
1702 /* Texture image width is larger than the minimum, so we
1703 * can upload it directly.
1705 if (DRM_COPY_FROM_USER(buffer, data,
1706 dwords * sizeof(u32))) {
1707 DRM_ERROR("EFAULT on data, %d dwords\n",
1709 return DRM_ERR(EFAULT);
1712 /* Texture image width is less than the minimum, so we
1713 * need to pad out each image scanline to the minimum
1716 for (i = 0 ; i < tex->height ; i++) {
1717 if (DRM_COPY_FROM_USER(buffer, data, tex_width )) {
1718 DRM_ERROR("EFAULT on pad, %d bytes\n", tex_width);
1719 return DRM_ERR(EFAULT);
1729 offset = dev_priv->gart_buffers_offset + buf->offset;
1731 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1732 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1733 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1734 RADEON_GMC_BRUSH_NONE |
1736 RADEON_GMC_SRC_DATATYPE_COLOR |
1738 RADEON_DP_SRC_SOURCE_MEMORY |
1739 RADEON_GMC_CLR_CMP_CNTL_DIS |
1740 RADEON_GMC_WR_MSK_DIS );
1741 OUT_RING((spitch << 22) | (offset >> 10));
1742 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1744 OUT_RING((image->x << 16) | image->y);
1745 OUT_RING((image->width << 16) | height);
1746 RADEON_WAIT_UNTIL_2D_IDLE();
1749 radeon_cp_discard_buffer(dev, buf);
1751 /* Update the input parameters for next time */
1753 image->height -= height;
1754 image->data = (const u8 __user *)image->data + size;
1755 } while (image->height > 0);
1757 /* Flush the pixel cache after the blit completes. This ensures
1758 * the texture data is written out to memory before rendering
1762 RADEON_FLUSH_CACHE();
1763 RADEON_WAIT_UNTIL_2D_IDLE();
1769 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1771 drm_radeon_private_t *dev_priv = dev->dev_private;
1778 OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1779 OUT_RING( 0x00000000 );
1781 OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1782 for ( i = 0 ; i < 32 ; i++ ) {
1783 OUT_RING( stipple[i] );
1789 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1791 if (!dev_priv->mmio)
1794 radeon_do_cp_idle(dev_priv);
1796 RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1797 dev_priv->surfaces[surf_index].flags);
1798 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1799 dev_priv->surfaces[surf_index].lower);
1800 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1801 dev_priv->surfaces[surf_index].upper);
1805 /* Allocates a virtual surface
1806 * doesn't always allocate a real surface, will stretch an existing
1807 * surface when possible.
1809 * Note that refcount can be at most 2, since during a free refcount=3
1810 * might mean we have to allocate a new surface which might not always
1812 * For example : we allocate three contigous surfaces ABC. If B is
1813 * freed, we suddenly need two surfaces to store A and C, which might
1814 * not always be available.
1816 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1818 struct radeon_virt_surface *s;
1820 int virt_surface_index;
1821 uint32_t new_upper, new_lower;
1823 new_lower = new->address;
1824 new_upper = new_lower + new->size - 1;
1827 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1828 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1829 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1832 /* make sure there is no overlap with existing surfaces */
1833 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1834 if ((dev_priv->surfaces[i].refcount != 0) &&
1835 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1836 (new_lower < dev_priv->surfaces[i].upper) ) ||
1837 ( (new_lower < dev_priv->surfaces[i].lower) &&
1838 (new_upper > dev_priv->surfaces[i].lower) )) ){
1842 /* find a virtual surface */
1843 for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1844 if (dev_priv->virt_surfaces[i].filp == 0)
1846 if (i == 2*RADEON_MAX_SURFACES) {
1848 virt_surface_index = i;
1850 /* try to reuse an existing surface */
1851 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1853 if ((dev_priv->surfaces[i].refcount == 1) &&
1854 (new->flags == dev_priv->surfaces[i].flags) &&
1855 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1856 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1857 s->surface_index = i;
1858 s->lower = new_lower;
1859 s->upper = new_upper;
1860 s->flags = new->flags;
1862 dev_priv->surfaces[i].refcount++;
1863 dev_priv->surfaces[i].lower = s->lower;
1864 radeon_apply_surface_regs(s->surface_index, dev_priv);
1865 return virt_surface_index;
1869 if ((dev_priv->surfaces[i].refcount == 1) &&
1870 (new->flags == dev_priv->surfaces[i].flags) &&
1871 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1872 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1873 s->surface_index = i;
1874 s->lower = new_lower;
1875 s->upper = new_upper;
1876 s->flags = new->flags;
1878 dev_priv->surfaces[i].refcount++;
1879 dev_priv->surfaces[i].upper = s->upper;
1880 radeon_apply_surface_regs(s->surface_index, dev_priv);
1881 return virt_surface_index;
1885 /* okay, we need a new one */
1886 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1887 if (dev_priv->surfaces[i].refcount == 0) {
1888 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1889 s->surface_index = i;
1890 s->lower = new_lower;
1891 s->upper = new_upper;
1892 s->flags = new->flags;
1894 dev_priv->surfaces[i].refcount = 1;
1895 dev_priv->surfaces[i].lower = s->lower;
1896 dev_priv->surfaces[i].upper = s->upper;
1897 dev_priv->surfaces[i].flags = s->flags;
1898 radeon_apply_surface_regs(s->surface_index, dev_priv);
1899 return virt_surface_index;
1903 /* we didn't find anything */
1907 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1909 struct radeon_virt_surface *s;
1911 /* find the virtual surface */
1912 for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1913 s = &(dev_priv->virt_surfaces[i]);
1915 if ((lower == s->lower) && (filp == s->filp)) {
1916 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1917 dev_priv->surfaces[s->surface_index].lower = s->upper;
1919 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1920 dev_priv->surfaces[s->surface_index].upper = s->lower;
1922 dev_priv->surfaces[s->surface_index].refcount--;
1923 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1924 dev_priv->surfaces[s->surface_index].flags = 0;
1926 radeon_apply_surface_regs(s->surface_index, dev_priv);
1934 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1937 for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1939 if (dev_priv->virt_surfaces[i].filp == filp)
1940 free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1944 /* ================================================================
1947 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1950 drm_radeon_private_t *dev_priv = dev->dev_private;
1951 drm_radeon_surface_alloc_t alloc;
1954 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1955 return DRM_ERR(EINVAL);
1958 DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1961 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1962 return DRM_ERR(EINVAL);
1967 static int radeon_surface_free(DRM_IOCTL_ARGS)
1970 drm_radeon_private_t *dev_priv = dev->dev_private;
1971 drm_radeon_surface_free_t memfree;
1974 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1975 return DRM_ERR(EINVAL);
1978 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1981 if (free_surface(filp, dev_priv, memfree.address))
1982 return DRM_ERR(EINVAL);
1987 static int radeon_cp_clear( DRM_IOCTL_ARGS )
1990 drm_radeon_private_t *dev_priv = dev->dev_private;
1991 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1992 drm_radeon_clear_t clear;
1993 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1996 LOCK_TEST_WITH_RETURN( dev, filp );
1998 DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
2001 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2003 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2004 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2006 if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
2007 sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
2008 return DRM_ERR(EFAULT);
2010 radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
2017 /* Not sure why this isn't set all the time:
2019 static int radeon_do_init_pageflip( drm_device_t *dev )
2021 drm_radeon_private_t *dev_priv = dev->dev_private;
2027 RADEON_WAIT_UNTIL_3D_IDLE();
2028 OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
2029 OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2030 OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
2031 OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2034 dev_priv->page_flipping = 1;
2035 dev_priv->current_page = 0;
2036 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2041 /* Called whenever a client dies, from drm_release.
2042 * NOTE: Lock isn't necessarily held when this is called!
2044 static int radeon_do_cleanup_pageflip( drm_device_t *dev )
2046 drm_radeon_private_t *dev_priv = dev->dev_private;
2049 if (dev_priv->current_page != 0)
2050 radeon_cp_dispatch_flip( dev );
2052 dev_priv->page_flipping = 0;
2056 /* Swapping and flipping are different operations, need different ioctls.
2057 * They can & should be intermixed to support multiple 3d windows.
2059 static int radeon_cp_flip( DRM_IOCTL_ARGS )
2062 drm_radeon_private_t *dev_priv = dev->dev_private;
2065 LOCK_TEST_WITH_RETURN( dev, filp );
2067 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2069 if (!dev_priv->page_flipping)
2070 radeon_do_init_pageflip( dev );
2072 radeon_cp_dispatch_flip( dev );
2078 static int radeon_cp_swap( DRM_IOCTL_ARGS )
2081 drm_radeon_private_t *dev_priv = dev->dev_private;
2082 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2085 LOCK_TEST_WITH_RETURN( dev, filp );
2087 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2089 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2090 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2092 radeon_cp_dispatch_swap( dev );
2093 dev_priv->sarea_priv->ctx_owner = 0;
2099 static int radeon_cp_vertex( DRM_IOCTL_ARGS )
2102 drm_radeon_private_t *dev_priv = dev->dev_private;
2103 drm_file_t *filp_priv;
2104 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105 drm_device_dma_t *dma = dev->dma;
2107 drm_radeon_vertex_t vertex;
2108 drm_radeon_tcl_prim_t prim;
2110 LOCK_TEST_WITH_RETURN( dev, filp );
2112 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2114 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2117 DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2119 vertex.idx, vertex.count, vertex.discard );
2121 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2122 DRM_ERROR( "buffer index %d (of %d max)\n",
2123 vertex.idx, dma->buf_count - 1 );
2124 return DRM_ERR(EINVAL);
2126 if ( vertex.prim < 0 ||
2127 vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2128 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2129 return DRM_ERR(EINVAL);
2132 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2133 VB_AGE_TEST_WITH_RETURN( dev_priv );
2135 buf = dma->buflist[vertex.idx];
2137 if ( buf->filp != filp ) {
2138 DRM_ERROR( "process %d using buffer owned by %p\n",
2139 DRM_CURRENTPID, buf->filp );
2140 return DRM_ERR(EINVAL);
2142 if ( buf->pending ) {
2143 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2144 return DRM_ERR(EINVAL);
2147 /* Build up a prim_t record:
2150 buf->used = vertex.count; /* not used? */
2152 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2153 if ( radeon_emit_state( dev_priv, filp_priv,
2154 &sarea_priv->context_state,
2155 sarea_priv->tex_state,
2156 sarea_priv->dirty ) ) {
2157 DRM_ERROR( "radeon_emit_state failed\n" );
2158 return DRM_ERR( EINVAL );
2161 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2162 RADEON_UPLOAD_TEX1IMAGES |
2163 RADEON_UPLOAD_TEX2IMAGES |
2164 RADEON_REQUIRE_QUIESCENCE);
2168 prim.finish = vertex.count; /* unused */
2169 prim.prim = vertex.prim;
2170 prim.numverts = vertex.count;
2171 prim.vc_format = dev_priv->sarea_priv->vc_format;
2173 radeon_cp_dispatch_vertex( dev, buf, &prim );
2176 if (vertex.discard) {
2177 radeon_cp_discard_buffer( dev, buf );
2184 static int radeon_cp_indices( DRM_IOCTL_ARGS )
2187 drm_radeon_private_t *dev_priv = dev->dev_private;
2188 drm_file_t *filp_priv;
2189 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2190 drm_device_dma_t *dma = dev->dma;
2192 drm_radeon_indices_t elts;
2193 drm_radeon_tcl_prim_t prim;
2196 LOCK_TEST_WITH_RETURN( dev, filp );
2199 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2200 return DRM_ERR(EINVAL);
2203 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2205 DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2208 DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2210 elts.idx, elts.start, elts.end, elts.discard );
2212 if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2213 DRM_ERROR( "buffer index %d (of %d max)\n",
2214 elts.idx, dma->buf_count - 1 );
2215 return DRM_ERR(EINVAL);
2217 if ( elts.prim < 0 ||
2218 elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2219 DRM_ERROR( "buffer prim %d\n", elts.prim );
2220 return DRM_ERR(EINVAL);
2223 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2224 VB_AGE_TEST_WITH_RETURN( dev_priv );
2226 buf = dma->buflist[elts.idx];
2228 if ( buf->filp != filp ) {
2229 DRM_ERROR( "process %d using buffer owned by %p\n",
2230 DRM_CURRENTPID, buf->filp );
2231 return DRM_ERR(EINVAL);
2233 if ( buf->pending ) {
2234 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2235 return DRM_ERR(EINVAL);
2238 count = (elts.end - elts.start) / sizeof(u16);
2239 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2241 if ( elts.start & 0x7 ) {
2242 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2243 return DRM_ERR(EINVAL);
2245 if ( elts.start < buf->used ) {
2246 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2247 return DRM_ERR(EINVAL);
2250 buf->used = elts.end;
2252 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2253 if ( radeon_emit_state( dev_priv, filp_priv,
2254 &sarea_priv->context_state,
2255 sarea_priv->tex_state,
2256 sarea_priv->dirty ) ) {
2257 DRM_ERROR( "radeon_emit_state failed\n" );
2258 return DRM_ERR( EINVAL );
2261 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2262 RADEON_UPLOAD_TEX1IMAGES |
2263 RADEON_UPLOAD_TEX2IMAGES |
2264 RADEON_REQUIRE_QUIESCENCE);
2268 /* Build up a prim_t record:
2270 prim.start = elts.start;
2271 prim.finish = elts.end;
2272 prim.prim = elts.prim;
2273 prim.offset = 0; /* offset from start of dma buffers */
2274 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2275 prim.vc_format = dev_priv->sarea_priv->vc_format;
2277 radeon_cp_dispatch_indices( dev, buf, &prim );
2279 radeon_cp_discard_buffer( dev, buf );
2286 static int radeon_cp_texture( DRM_IOCTL_ARGS )
2289 drm_radeon_private_t *dev_priv = dev->dev_private;
2290 drm_radeon_texture_t tex;
2291 drm_radeon_tex_image_t image;
2294 LOCK_TEST_WITH_RETURN( dev, filp );
2296 DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2298 if ( tex.image == NULL ) {
2299 DRM_ERROR( "null texture image!\n" );
2300 return DRM_ERR(EINVAL);
2303 if ( DRM_COPY_FROM_USER( &image,
2304 (drm_radeon_tex_image_t __user *)tex.image,
2306 return DRM_ERR(EFAULT);
2308 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2309 VB_AGE_TEST_WITH_RETURN( dev_priv );
2311 ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2317 static int radeon_cp_stipple( DRM_IOCTL_ARGS )
2320 drm_radeon_private_t *dev_priv = dev->dev_private;
2321 drm_radeon_stipple_t stipple;
2324 LOCK_TEST_WITH_RETURN( dev, filp );
2326 DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2329 if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2330 return DRM_ERR(EFAULT);
2332 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2334 radeon_cp_dispatch_stipple( dev, mask );
2340 static int radeon_cp_indirect( DRM_IOCTL_ARGS )
2343 drm_radeon_private_t *dev_priv = dev->dev_private;
2344 drm_device_dma_t *dma = dev->dma;
2346 drm_radeon_indirect_t indirect;
2349 LOCK_TEST_WITH_RETURN( dev, filp );
2352 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2353 return DRM_ERR(EINVAL);
2356 DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2359 DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2360 indirect.idx, indirect.start,
2361 indirect.end, indirect.discard );
2363 if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2364 DRM_ERROR( "buffer index %d (of %d max)\n",
2365 indirect.idx, dma->buf_count - 1 );
2366 return DRM_ERR(EINVAL);
2369 buf = dma->buflist[indirect.idx];
2371 if ( buf->filp != filp ) {
2372 DRM_ERROR( "process %d using buffer owned by %p\n",
2373 DRM_CURRENTPID, buf->filp );
2374 return DRM_ERR(EINVAL);
2376 if ( buf->pending ) {
2377 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2378 return DRM_ERR(EINVAL);
2381 if ( indirect.start < buf->used ) {
2382 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2383 indirect.start, buf->used );
2384 return DRM_ERR(EINVAL);
2387 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2388 VB_AGE_TEST_WITH_RETURN( dev_priv );
2390 buf->used = indirect.end;
2392 /* Wait for the 3D stream to idle before the indirect buffer
2393 * containing 2D acceleration commands is processed.
2397 RADEON_WAIT_UNTIL_3D_IDLE();
2401 /* Dispatch the indirect buffer full of commands from the
2402 * X server. This is insecure and is thus only available to
2403 * privileged clients.
2405 radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2406 if (indirect.discard) {
2407 radeon_cp_discard_buffer( dev, buf );
2415 static int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2418 drm_radeon_private_t *dev_priv = dev->dev_private;
2419 drm_file_t *filp_priv;
2420 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2421 drm_device_dma_t *dma = dev->dma;
2423 drm_radeon_vertex2_t vertex;
2425 unsigned char laststate;
2427 LOCK_TEST_WITH_RETURN( dev, filp );
2430 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2431 return DRM_ERR(EINVAL);
2434 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2436 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2439 DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2441 vertex.idx, vertex.discard );
2443 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2444 DRM_ERROR( "buffer index %d (of %d max)\n",
2445 vertex.idx, dma->buf_count - 1 );
2446 return DRM_ERR(EINVAL);
2449 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2450 VB_AGE_TEST_WITH_RETURN( dev_priv );
2452 buf = dma->buflist[vertex.idx];
2454 if ( buf->filp != filp ) {
2455 DRM_ERROR( "process %d using buffer owned by %p\n",
2456 DRM_CURRENTPID, buf->filp );
2457 return DRM_ERR(EINVAL);
2460 if ( buf->pending ) {
2461 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2462 return DRM_ERR(EINVAL);
2465 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466 return DRM_ERR(EINVAL);
2468 for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2469 drm_radeon_prim_t prim;
2470 drm_radeon_tcl_prim_t tclprim;
2472 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2473 return DRM_ERR(EFAULT);
2475 if ( prim.stateidx != laststate ) {
2476 drm_radeon_state_t state;
2478 if ( DRM_COPY_FROM_USER( &state,
2479 &vertex.state[prim.stateidx],
2481 return DRM_ERR(EFAULT);
2483 if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2484 DRM_ERROR( "radeon_emit_state2 failed\n" );
2485 return DRM_ERR( EINVAL );
2488 laststate = prim.stateidx;
2491 tclprim.start = prim.start;
2492 tclprim.finish = prim.finish;
2493 tclprim.prim = prim.prim;
2494 tclprim.vc_format = prim.vc_format;
2496 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2497 tclprim.offset = prim.numverts * 64;
2498 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2500 radeon_cp_dispatch_indices( dev, buf, &tclprim );
2502 tclprim.numverts = prim.numverts;
2503 tclprim.offset = 0; /* not used */
2505 radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2508 if (sarea_priv->nbox == 1)
2509 sarea_priv->nbox = 0;
2512 if ( vertex.discard ) {
2513 radeon_cp_discard_buffer( dev, buf );
2521 static int radeon_emit_packets(
2522 drm_radeon_private_t *dev_priv,
2523 drm_file_t *filp_priv,
2524 drm_radeon_cmd_header_t header,
2525 drm_radeon_cmd_buffer_t *cmdbuf )
2527 int id = (int)header.packet.packet_id;
2529 int *data = (int *)cmdbuf->buf;
2532 if (id >= RADEON_MAX_STATE_PACKETS)
2533 return DRM_ERR(EINVAL);
2535 sz = packet[id].len;
2536 reg = packet[id].start;
2538 if (sz * sizeof(int) > cmdbuf->bufsz) {
2539 DRM_ERROR( "Packet size provided larger than data provided\n" );
2540 return DRM_ERR(EINVAL);
2543 if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2544 DRM_ERROR( "Packet verification failed\n" );
2545 return DRM_ERR( EINVAL );
2549 OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2550 OUT_RING_TABLE( data, sz );
2553 cmdbuf->buf += sz * sizeof(int);
2554 cmdbuf->bufsz -= sz * sizeof(int);
2558 static __inline__ int radeon_emit_scalars(
2559 drm_radeon_private_t *dev_priv,
2560 drm_radeon_cmd_header_t header,
2561 drm_radeon_cmd_buffer_t *cmdbuf )
2563 int sz = header.scalars.count;
2564 int start = header.scalars.offset;
2565 int stride = header.scalars.stride;
2569 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2570 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2571 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2572 OUT_RING_TABLE( cmdbuf->buf, sz );
2574 cmdbuf->buf += sz * sizeof(int);
2575 cmdbuf->bufsz -= sz * sizeof(int);
2581 static __inline__ int radeon_emit_scalars2(
2582 drm_radeon_private_t *dev_priv,
2583 drm_radeon_cmd_header_t header,
2584 drm_radeon_cmd_buffer_t *cmdbuf )
2586 int sz = header.scalars.count;
2587 int start = ((unsigned int)header.scalars.offset) + 0x100;
2588 int stride = header.scalars.stride;
2592 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2593 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2594 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2595 OUT_RING_TABLE( cmdbuf->buf, sz );
2597 cmdbuf->buf += sz * sizeof(int);
2598 cmdbuf->bufsz -= sz * sizeof(int);
2602 static __inline__ int radeon_emit_vectors(
2603 drm_radeon_private_t *dev_priv,
2604 drm_radeon_cmd_header_t header,
2605 drm_radeon_cmd_buffer_t *cmdbuf )
2607 int sz = header.vectors.count;
2608 int start = header.vectors.offset;
2609 int stride = header.vectors.stride;
2613 OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2614 OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2615 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2616 OUT_RING_TABLE( cmdbuf->buf, sz );
2619 cmdbuf->buf += sz * sizeof(int);
2620 cmdbuf->bufsz -= sz * sizeof(int);
2625 static int radeon_emit_packet3( drm_device_t *dev,
2626 drm_file_t *filp_priv,
2627 drm_radeon_cmd_buffer_t *cmdbuf )
2629 drm_radeon_private_t *dev_priv = dev->dev_private;
2636 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2637 cmdbuf, &cmdsz ) ) ) {
2638 DRM_ERROR( "Packet verification failed\n" );
2642 BEGIN_RING( cmdsz );
2643 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2646 cmdbuf->buf += cmdsz * 4;
2647 cmdbuf->bufsz -= cmdsz * 4;
2652 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2653 drm_file_t *filp_priv,
2654 drm_radeon_cmd_buffer_t *cmdbuf,
2657 drm_radeon_private_t *dev_priv = dev->dev_private;
2658 drm_clip_rect_t box;
2661 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2667 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2668 cmdbuf, &cmdsz ) ) ) {
2669 DRM_ERROR( "Packet verification failed\n" );
2677 if ( i < cmdbuf->nbox ) {
2678 if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2679 return DRM_ERR(EFAULT);
2680 /* FIXME The second and subsequent times round
2681 * this loop, send a WAIT_UNTIL_3D_IDLE before
2682 * calling emit_clip_rect(). This fixes a
2683 * lockup on fast machines when sending
2684 * several cliprects with a cmdbuf, as when
2685 * waving a 2D window over a 3D
2686 * window. Something in the commands from user
2687 * space seems to hang the card when they're
2688 * sent several times in a row. That would be
2689 * the correct place to fix it but this works
2690 * around it until I can figure that out - Tim
2694 RADEON_WAIT_UNTIL_3D_IDLE();
2697 radeon_emit_clip_rect( dev_priv, &box );
2700 BEGIN_RING( cmdsz );
2701 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2704 } while ( ++i < cmdbuf->nbox );
2705 if (cmdbuf->nbox == 1)
2709 cmdbuf->buf += cmdsz * 4;
2710 cmdbuf->bufsz -= cmdsz * 4;
2715 static int radeon_emit_wait( drm_device_t *dev, int flags )
2717 drm_radeon_private_t *dev_priv = dev->dev_private;
2720 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2722 case RADEON_WAIT_2D:
2724 RADEON_WAIT_UNTIL_2D_IDLE();
2727 case RADEON_WAIT_3D:
2729 RADEON_WAIT_UNTIL_3D_IDLE();
2732 case RADEON_WAIT_2D|RADEON_WAIT_3D:
2734 RADEON_WAIT_UNTIL_IDLE();
2738 return DRM_ERR(EINVAL);
2744 static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2747 drm_radeon_private_t *dev_priv = dev->dev_private;
2748 drm_file_t *filp_priv;
2749 drm_device_dma_t *dma = dev->dma;
2750 drm_buf_t *buf = NULL;
2752 drm_radeon_cmd_buffer_t cmdbuf;
2753 drm_radeon_cmd_header_t header;
2754 int orig_nbox, orig_bufsz;
2757 LOCK_TEST_WITH_RETURN( dev, filp );
2760 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2761 return DRM_ERR(EINVAL);
2764 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2766 DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2769 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2770 VB_AGE_TEST_WITH_RETURN( dev_priv );
2772 if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2773 return DRM_ERR(EINVAL);
2776 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2777 * races between checking values and using those values in other code,
2778 * and simply to avoid a lot of function calls to copy in data.
2780 orig_bufsz = cmdbuf.bufsz;
2781 if (orig_bufsz != 0) {
2782 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2784 return DRM_ERR(ENOMEM);
2785 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2786 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2787 return DRM_ERR(EFAULT);
2792 orig_nbox = cmdbuf.nbox;
2794 if(dev_priv->microcode_version == UCODE_R300) {
2796 temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2798 if (orig_bufsz != 0)
2799 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2804 /* microcode_version != r300 */
2805 while ( cmdbuf.bufsz >= sizeof(header) ) {
2807 header.i = *(int *)cmdbuf.buf;
2808 cmdbuf.buf += sizeof(header);
2809 cmdbuf.bufsz -= sizeof(header);
2811 switch (header.header.cmd_type) {
2812 case RADEON_CMD_PACKET:
2813 DRM_DEBUG("RADEON_CMD_PACKET\n");
2814 if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2815 DRM_ERROR("radeon_emit_packets failed\n");
2820 case RADEON_CMD_SCALARS:
2821 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2822 if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2823 DRM_ERROR("radeon_emit_scalars failed\n");
2828 case RADEON_CMD_VECTORS:
2829 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2830 if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2831 DRM_ERROR("radeon_emit_vectors failed\n");
2836 case RADEON_CMD_DMA_DISCARD:
2837 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2838 idx = header.dma.buf_idx;
2839 if ( idx < 0 || idx >= dma->buf_count ) {
2840 DRM_ERROR( "buffer index %d (of %d max)\n",
2841 idx, dma->buf_count - 1 );
2845 buf = dma->buflist[idx];
2846 if ( buf->filp != filp || buf->pending ) {
2847 DRM_ERROR( "bad buffer %p %p %d\n",
2848 buf->filp, filp, buf->pending);
2852 radeon_cp_discard_buffer( dev, buf );
2855 case RADEON_CMD_PACKET3:
2856 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2857 if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2858 DRM_ERROR("radeon_emit_packet3 failed\n");
2863 case RADEON_CMD_PACKET3_CLIP:
2864 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2865 if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2866 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2871 case RADEON_CMD_SCALARS2:
2872 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2873 if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2874 DRM_ERROR("radeon_emit_scalars2 failed\n");
2879 case RADEON_CMD_WAIT:
2880 DRM_DEBUG("RADEON_CMD_WAIT\n");
2881 if (radeon_emit_wait( dev, header.wait.flags )) {
2882 DRM_ERROR("radeon_emit_wait failed\n");
2887 DRM_ERROR("bad cmd_type %d at %p\n",
2888 header.header.cmd_type,
2889 cmdbuf.buf - sizeof(header));
2894 if (orig_bufsz != 0)
2895 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2897 DRM_DEBUG("DONE\n");
2902 if (orig_bufsz != 0)
2903 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2904 return DRM_ERR(EINVAL);
2909 static int radeon_cp_getparam( DRM_IOCTL_ARGS )
2912 drm_radeon_private_t *dev_priv = dev->dev_private;
2913 drm_radeon_getparam_t param;
2917 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2918 return DRM_ERR(EINVAL);
2921 DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2924 DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2926 switch( param.param ) {
2927 case RADEON_PARAM_GART_BUFFER_OFFSET:
2928 value = dev_priv->gart_buffers_offset;
2930 case RADEON_PARAM_LAST_FRAME:
2931 dev_priv->stats.last_frame_reads++;
2932 value = GET_SCRATCH( 0 );
2934 case RADEON_PARAM_LAST_DISPATCH:
2935 value = GET_SCRATCH( 1 );
2937 case RADEON_PARAM_LAST_CLEAR:
2938 dev_priv->stats.last_clear_reads++;
2939 value = GET_SCRATCH( 2 );
2941 case RADEON_PARAM_IRQ_NR:
2944 case RADEON_PARAM_GART_BASE:
2945 value = dev_priv->gart_vm_start;
2947 case RADEON_PARAM_REGISTER_HANDLE:
2948 value = dev_priv->mmio_offset;
2950 case RADEON_PARAM_STATUS_HANDLE:
2951 value = dev_priv->ring_rptr_offset;
2953 #if BITS_PER_LONG == 32
2955 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2956 * pointer which can't fit into an int-sized variable. According to
2957 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2958 * not supporting it shouldn't be a problem. If the same functionality
2959 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2960 * so backwards-compatibility for the embedded platforms can be
2961 * maintained. --davidm 4-Feb-2004.
2963 case RADEON_PARAM_SAREA_HANDLE:
2964 /* The lock is the first dword in the sarea. */
2965 value = (long)dev->lock.hw_lock;
2968 case RADEON_PARAM_GART_TEX_HANDLE:
2969 value = dev_priv->gart_textures_offset;
2972 return DRM_ERR(EINVAL);
2975 if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2976 DRM_ERROR( "copy_to_user\n" );
2977 return DRM_ERR(EFAULT);
2983 static int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2985 drm_radeon_private_t *dev_priv = dev->dev_private;
2986 drm_file_t *filp_priv;
2987 drm_radeon_setparam_t sp;
2988 struct drm_radeon_driver_file_fields *radeon_priv;
2991 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2992 return DRM_ERR( EINVAL );
2995 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2997 DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
3000 switch( sp.param ) {
3001 case RADEON_SETPARAM_FB_LOCATION:
3002 radeon_priv = filp_priv->driver_priv;
3003 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3005 case RADEON_SETPARAM_SWITCH_TILING:
3006 if (sp.value == 0) {
3007 DRM_DEBUG( "color tiling disabled\n" );
3008 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3009 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3010 dev_priv->sarea_priv->tiling_enabled = 0;
3012 else if (sp.value == 1) {
3013 DRM_DEBUG( "color tiling enabled\n" );
3014 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3015 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3016 dev_priv->sarea_priv->tiling_enabled = 1;
3020 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
3021 return DRM_ERR( EINVAL );
3027 /* When a client dies:
3028 * - Check for and clean up flipped page state
3029 * - Free any alloced GART memory.
3031 * DRM infrastructure takes care of reclaiming dma buffers.
3033 void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
3035 if ( dev->dev_private ) {
3036 drm_radeon_private_t *dev_priv = dev->dev_private;
3037 if ( dev_priv->page_flipping ) {
3038 radeon_do_cleanup_pageflip( dev );
3040 radeon_mem_release( filp, dev_priv->gart_heap );
3041 radeon_mem_release( filp, dev_priv->fb_heap );
3042 radeon_surfaces_release(filp, dev_priv);
3046 void radeon_driver_pretakedown(drm_device_t *dev)
3048 radeon_do_release(dev);
3051 int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
3053 drm_radeon_private_t *dev_priv = dev->dev_private;
3054 struct drm_radeon_driver_file_fields *radeon_priv;
3056 radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3061 filp_priv->driver_priv = radeon_priv;
3063 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3065 radeon_priv->radeon_fb_delta = 0;
3070 void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
3072 struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
3074 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3077 drm_ioctl_desc_t radeon_ioctls[] = {
3078 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = { radeon_cp_init, 1, 1 },
3079 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = { radeon_cp_start, 1, 1 },
3080 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = { radeon_cp_stop, 1, 1 },
3081 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = { radeon_cp_reset, 1, 1 },
3082 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = { radeon_cp_idle, 1, 0 },
3083 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = { radeon_cp_resume, 1, 0 },
3084 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = { radeon_engine_reset, 1, 0 },
3085 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen, 1, 0 },
3086 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = { radeon_cp_swap, 1, 0 },
3087 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = { radeon_cp_clear, 1, 0 },
3088 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = { radeon_cp_vertex, 1, 0 },
3089 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = { radeon_cp_indices, 1, 0 },
3090 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = { radeon_cp_texture, 1, 0 },
3091 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = { radeon_cp_stipple, 1, 0 },
3092 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = { radeon_cp_indirect, 1, 1 },
3093 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 },
3094 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = { radeon_cp_cmdbuf, 1, 0 },
3095 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = { radeon_cp_getparam, 1, 0 },
3096 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = { radeon_cp_flip, 1, 0 },
3097 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = { radeon_mem_alloc, 1, 0 },
3098 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = { radeon_mem_free, 1, 0 },
3099 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = { radeon_mem_init_heap,1, 1 },
3100 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 },
3101 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 },
3102 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = { radeon_cp_setparam, 1, 0 },
3103 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
3104 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = { radeon_surface_free, 1, 0 }
3107 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);