1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
 
   3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
 
   6  * Permission is hereby granted, free of charge, to any person obtaining a
 
   7  * copy of this software and associated documentation files (the "Software"),
 
   8  * to deal in the Software without restriction, including without limitation
 
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 
  10  * and/or sell copies of the Software, and to permit persons to whom the
 
  11  * Software is furnished to do so, subject to the following conditions:
 
  13  * The above copyright notice and this permission notice (including the next
 
  14  * paragraph) shall be included in all copies or substantial portions of the
 
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 
  20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 
  21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 
  22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 
  23  * DEALINGS IN THE SOFTWARE.
 
  26  *    Gareth Hughes <gareth@valinux.com>
 
  27  *    Kevin E. Martin <martin@valinux.com>
 
  32 #include "drm_sarea.h"
 
  33 #include "radeon_drm.h"
 
  34 #include "radeon_drv.h"
 
  36 /* ================================================================
 
  37  * Helper functions for client state checking and fixup
 
  40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
 
  42                                                     struct drm_file * file_priv,
 
  46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
 
  47         struct drm_radeon_driver_file_fields *radeon_priv;
 
  49         /* Hrm ... the story of the offset ... So this function converts
 
  50          * the various ideas of what userland clients might have for an
 
  51          * offset in the card address space into an offset into the card
 
  52          * address space :) So with a sane client, it should just keep
 
  53          * the value intact and just do some boundary checking. However,
 
  54          * not all clients are sane. Some older clients pass us 0 based
 
  55          * offsets relative to the start of the framebuffer and some may
 
  56          * assume the AGP aperture it appended to the framebuffer, so we
 
  57          * try to detect those cases and fix them up.
 
  59          * Note: It might be a good idea here to make sure the offset lands
 
  60          * in some "allowed" area to protect things like the PCIE GART...
 
  63         /* First, the best case, the offset already lands in either the
 
  64          * framebuffer or the GART mapped space
 
  66         if (radeon_check_offset(dev_priv, off))
 
  69         /* Ok, that didn't happen... now check if we have a zero based
 
  70          * offset that fits in the framebuffer + gart space, apply the
 
  71          * magic offset we get from SETPARAM or calculated from fb_location
 
  73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
 
  74                 radeon_priv = file_priv->driver_priv;
 
  75                 off += radeon_priv->radeon_fb_delta;
 
  78         /* Finally, assume we aimed at a GART offset if beyond the fb */
 
  80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
 
  82         /* Now recheck and fail if out of bounds */
 
  83         if (radeon_check_offset(dev_priv, off)) {
 
  84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
 
  91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 
  93                                                      struct drm_file *file_priv,
 
  98         case RADEON_EMIT_PP_MISC:
 
  99                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
 
 101                         DRM_ERROR("Invalid depth buffer offset\n");
 
 106         case RADEON_EMIT_PP_CNTL:
 
 107                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
 
 109                         DRM_ERROR("Invalid colour buffer offset\n");
 
 114         case R200_EMIT_PP_TXOFFSET_0:
 
 115         case R200_EMIT_PP_TXOFFSET_1:
 
 116         case R200_EMIT_PP_TXOFFSET_2:
 
 117         case R200_EMIT_PP_TXOFFSET_3:
 
 118         case R200_EMIT_PP_TXOFFSET_4:
 
 119         case R200_EMIT_PP_TXOFFSET_5:
 
 120                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 122                         DRM_ERROR("Invalid R200 texture offset\n");
 
 127         case RADEON_EMIT_PP_TXFILTER_0:
 
 128         case RADEON_EMIT_PP_TXFILTER_1:
 
 129         case RADEON_EMIT_PP_TXFILTER_2:
 
 130                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
 
 132                         DRM_ERROR("Invalid R100 texture offset\n");
 
 137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
 
 138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
 
 139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
 
 140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
 
 141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
 
 142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
 
 144                         for (i = 0; i < 5; i++) {
 
 145                                 if (radeon_check_and_fixup_offset(dev_priv,
 
 149                                             ("Invalid R200 cubic texture offset\n");
 
 156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
 
 157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
 
 158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
 
 160                         for (i = 0; i < 5; i++) {
 
 161                                 if (radeon_check_and_fixup_offset(dev_priv,
 
 165                                             ("Invalid R100 cubic texture offset\n");
 
 172         case R200_EMIT_VAP_CTL:{
 
 175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
 180         case RADEON_EMIT_RB3D_COLORPITCH:
 
 181         case RADEON_EMIT_RE_LINE_PATTERN:
 
 182         case RADEON_EMIT_SE_LINE_WIDTH:
 
 183         case RADEON_EMIT_PP_LUM_MATRIX:
 
 184         case RADEON_EMIT_PP_ROT_MATRIX_0:
 
 185         case RADEON_EMIT_RB3D_STENCILREFMASK:
 
 186         case RADEON_EMIT_SE_VPORT_XSCALE:
 
 187         case RADEON_EMIT_SE_CNTL:
 
 188         case RADEON_EMIT_SE_CNTL_STATUS:
 
 189         case RADEON_EMIT_RE_MISC:
 
 190         case RADEON_EMIT_PP_BORDER_COLOR_0:
 
 191         case RADEON_EMIT_PP_BORDER_COLOR_1:
 
 192         case RADEON_EMIT_PP_BORDER_COLOR_2:
 
 193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
 
 194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
 
 195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
 
 196         case R200_EMIT_PP_TXCBLEND_0:
 
 197         case R200_EMIT_PP_TXCBLEND_1:
 
 198         case R200_EMIT_PP_TXCBLEND_2:
 
 199         case R200_EMIT_PP_TXCBLEND_3:
 
 200         case R200_EMIT_PP_TXCBLEND_4:
 
 201         case R200_EMIT_PP_TXCBLEND_5:
 
 202         case R200_EMIT_PP_TXCBLEND_6:
 
 203         case R200_EMIT_PP_TXCBLEND_7:
 
 204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
 
 205         case R200_EMIT_TFACTOR_0:
 
 206         case R200_EMIT_VTX_FMT_0:
 
 207         case R200_EMIT_MATRIX_SELECT_0:
 
 208         case R200_EMIT_TEX_PROC_CTL_2:
 
 209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
 
 210         case R200_EMIT_PP_TXFILTER_0:
 
 211         case R200_EMIT_PP_TXFILTER_1:
 
 212         case R200_EMIT_PP_TXFILTER_2:
 
 213         case R200_EMIT_PP_TXFILTER_3:
 
 214         case R200_EMIT_PP_TXFILTER_4:
 
 215         case R200_EMIT_PP_TXFILTER_5:
 
 216         case R200_EMIT_VTE_CNTL:
 
 217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
 
 218         case R200_EMIT_PP_TAM_DEBUG3:
 
 219         case R200_EMIT_PP_CNTL_X:
 
 220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
 
 221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
 
 222         case R200_EMIT_RE_SCISSOR_TL_0:
 
 223         case R200_EMIT_RE_SCISSOR_TL_1:
 
 224         case R200_EMIT_RE_SCISSOR_TL_2:
 
 225         case R200_EMIT_SE_VAP_CNTL_STATUS:
 
 226         case R200_EMIT_SE_VTX_STATE_CNTL:
 
 227         case R200_EMIT_RE_POINTSIZE:
 
 228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
 
 229         case R200_EMIT_PP_CUBIC_FACES_0:
 
 230         case R200_EMIT_PP_CUBIC_FACES_1:
 
 231         case R200_EMIT_PP_CUBIC_FACES_2:
 
 232         case R200_EMIT_PP_CUBIC_FACES_3:
 
 233         case R200_EMIT_PP_CUBIC_FACES_4:
 
 234         case R200_EMIT_PP_CUBIC_FACES_5:
 
 235         case RADEON_EMIT_PP_TEX_SIZE_0:
 
 236         case RADEON_EMIT_PP_TEX_SIZE_1:
 
 237         case RADEON_EMIT_PP_TEX_SIZE_2:
 
 238         case R200_EMIT_RB3D_BLENDCOLOR:
 
 239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
 
 240         case RADEON_EMIT_PP_CUBIC_FACES_0:
 
 241         case RADEON_EMIT_PP_CUBIC_FACES_1:
 
 242         case RADEON_EMIT_PP_CUBIC_FACES_2:
 
 243         case R200_EMIT_PP_TRI_PERF_CNTL:
 
 244         case R200_EMIT_PP_AFS_0:
 
 245         case R200_EMIT_PP_AFS_1:
 
 246         case R200_EMIT_ATF_TFACTOR:
 
 247         case R200_EMIT_PP_TXCTLALL_0:
 
 248         case R200_EMIT_PP_TXCTLALL_1:
 
 249         case R200_EMIT_PP_TXCTLALL_2:
 
 250         case R200_EMIT_PP_TXCTLALL_3:
 
 251         case R200_EMIT_PP_TXCTLALL_4:
 
 252         case R200_EMIT_PP_TXCTLALL_5:
 
 253         case R200_EMIT_VAP_PVS_CNTL:
 
 254                 /* These packets don't contain memory offsets */
 
 258                 DRM_ERROR("Unknown state packet ID %d\n", id);
 
 265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
 
 267                                                      struct drm_file *file_priv,
 
 268                                                      drm_radeon_kcmd_buffer_t *
 
 272         u32 *cmd = (u32 *) cmdbuf->buf;
 
 276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
 
 278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
 
 279                 DRM_ERROR("Not a type 3 packet\n");
 
 283         if (4 * *cmdsz > cmdbuf->bufsz) {
 
 284                 DRM_ERROR("Packet size larger than size of data provided\n");
 
 288         switch(cmd[0] & 0xff00) {
 
 289         /* XXX Are there old drivers needing other packets? */
 
 291         case RADEON_3D_DRAW_IMMD:
 
 292         case RADEON_3D_DRAW_VBUF:
 
 293         case RADEON_3D_DRAW_INDX:
 
 294         case RADEON_WAIT_FOR_IDLE:
 
 296         case RADEON_3D_CLEAR_ZMASK:
 
 297 /*      case RADEON_CP_NEXT_CHAR:
 
 298         case RADEON_CP_PLY_NEXTSCAN:
 
 299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
 
 300                 /* these packets are safe */
 
 303         case RADEON_CP_3D_DRAW_IMMD_2:
 
 304         case RADEON_CP_3D_DRAW_VBUF_2:
 
 305         case RADEON_CP_3D_DRAW_INDX_2:
 
 306         case RADEON_3D_CLEAR_HIZ:
 
 307                 /* safe but r200 only */
 
 308                 if (dev_priv->microcode_version != UCODE_R200) {
 
 309                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
 
 314         case RADEON_3D_LOAD_VBPNTR:
 
 315                 count = (cmd[0] >> 16) & 0x3fff;
 
 317                 if (count > 18) { /* 12 arrays max */
 
 318                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 
 323                 /* carefully check packet contents */
 
 324                 narrays = cmd[1] & ~0xc000;
 
 327                 while ((k < narrays) && (i < (count + 2))) {
 
 328                         i++;            /* skip attribute field */
 
 329                         if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 332                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
 
 340                         /* have one more to process, they come in pairs */
 
 341                         if (radeon_check_and_fixup_offset(dev_priv,
 
 345                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
 
 352                 /* do the counts match what we expect ? */
 
 353                 if ((k != narrays) || (i != (count + 2))) {
 
 355                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
 
 356                               k, i, narrays, count + 1);
 
 361         case RADEON_3D_RNDR_GEN_INDX_PRIM:
 
 362                 if (dev_priv->microcode_version != UCODE_R100) {
 
 363                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
 
 366                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
 
 367                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
 
 372         case RADEON_CP_INDX_BUFFER:
 
 373                 if (dev_priv->microcode_version != UCODE_R200) {
 
 374                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
 
 377                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
 
 378                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
 
 381                 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
 
 382                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
 
 387         case RADEON_CNTL_HOSTDATA_BLT:
 
 388         case RADEON_CNTL_PAINT_MULTI:
 
 389         case RADEON_CNTL_BITBLT_MULTI:
 
 390                 /* MSB of opcode: next DWORD GUI_CNTL */
 
 391                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 
 392                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 
 393                         offset = cmd[2] << 10;
 
 394                         if (radeon_check_and_fixup_offset
 
 395                             (dev_priv, file_priv, &offset)) {
 
 396                                 DRM_ERROR("Invalid first packet offset\n");
 
 399                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
 
 402                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
 
 403                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 
 404                         offset = cmd[3] << 10;
 
 405                         if (radeon_check_and_fixup_offset
 
 406                             (dev_priv, file_priv, &offset)) {
 
 407                                 DRM_ERROR("Invalid second packet offset\n");
 
 410                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
 
 415                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
 
 422 /* ================================================================
 
 423  * CP hardware state programming functions
 
 426 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
 
 427                                              struct drm_clip_rect * box)
 
 431         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
 
 432                   box->x1, box->y1, box->x2, box->y2);
 
 435         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
 
 436         OUT_RING((box->y1 << 16) | box->x1);
 
 437         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
 
 438         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
 
 444 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
 
 445                              struct drm_file *file_priv,
 
 446                              drm_radeon_context_regs_t * ctx,
 
 447                              drm_radeon_texture_regs_t * tex,
 
 451         DRM_DEBUG("dirty=0x%08x\n", dirty);
 
 453         if (dirty & RADEON_UPLOAD_CONTEXT) {
 
 454                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 455                                                   &ctx->rb3d_depthoffset)) {
 
 456                         DRM_ERROR("Invalid depth buffer offset\n");
 
 460                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 461                                                   &ctx->rb3d_coloroffset)) {
 
 462                         DRM_ERROR("Invalid depth buffer offset\n");
 
 467                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
 
 468                 OUT_RING(ctx->pp_misc);
 
 469                 OUT_RING(ctx->pp_fog_color);
 
 470                 OUT_RING(ctx->re_solid_color);
 
 471                 OUT_RING(ctx->rb3d_blendcntl);
 
 472                 OUT_RING(ctx->rb3d_depthoffset);
 
 473                 OUT_RING(ctx->rb3d_depthpitch);
 
 474                 OUT_RING(ctx->rb3d_zstencilcntl);
 
 475                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
 
 476                 OUT_RING(ctx->pp_cntl);
 
 477                 OUT_RING(ctx->rb3d_cntl);
 
 478                 OUT_RING(ctx->rb3d_coloroffset);
 
 479                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
 
 480                 OUT_RING(ctx->rb3d_colorpitch);
 
 484         if (dirty & RADEON_UPLOAD_VERTFMT) {
 
 486                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
 
 487                 OUT_RING(ctx->se_coord_fmt);
 
 491         if (dirty & RADEON_UPLOAD_LINE) {
 
 493                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
 
 494                 OUT_RING(ctx->re_line_pattern);
 
 495                 OUT_RING(ctx->re_line_state);
 
 496                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
 
 497                 OUT_RING(ctx->se_line_width);
 
 501         if (dirty & RADEON_UPLOAD_BUMPMAP) {
 
 503                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
 
 504                 OUT_RING(ctx->pp_lum_matrix);
 
 505                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
 
 506                 OUT_RING(ctx->pp_rot_matrix_0);
 
 507                 OUT_RING(ctx->pp_rot_matrix_1);
 
 511         if (dirty & RADEON_UPLOAD_MASKS) {
 
 513                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
 
 514                 OUT_RING(ctx->rb3d_stencilrefmask);
 
 515                 OUT_RING(ctx->rb3d_ropcntl);
 
 516                 OUT_RING(ctx->rb3d_planemask);
 
 520         if (dirty & RADEON_UPLOAD_VIEWPORT) {
 
 522                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
 
 523                 OUT_RING(ctx->se_vport_xscale);
 
 524                 OUT_RING(ctx->se_vport_xoffset);
 
 525                 OUT_RING(ctx->se_vport_yscale);
 
 526                 OUT_RING(ctx->se_vport_yoffset);
 
 527                 OUT_RING(ctx->se_vport_zscale);
 
 528                 OUT_RING(ctx->se_vport_zoffset);
 
 532         if (dirty & RADEON_UPLOAD_SETUP) {
 
 534                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
 
 535                 OUT_RING(ctx->se_cntl);
 
 536                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
 
 537                 OUT_RING(ctx->se_cntl_status);
 
 541         if (dirty & RADEON_UPLOAD_MISC) {
 
 543                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
 
 544                 OUT_RING(ctx->re_misc);
 
 548         if (dirty & RADEON_UPLOAD_TEX0) {
 
 549                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 550                                                   &tex[0].pp_txoffset)) {
 
 551                         DRM_ERROR("Invalid texture offset for unit 0\n");
 
 556                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
 
 557                 OUT_RING(tex[0].pp_txfilter);
 
 558                 OUT_RING(tex[0].pp_txformat);
 
 559                 OUT_RING(tex[0].pp_txoffset);
 
 560                 OUT_RING(tex[0].pp_txcblend);
 
 561                 OUT_RING(tex[0].pp_txablend);
 
 562                 OUT_RING(tex[0].pp_tfactor);
 
 563                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
 
 564                 OUT_RING(tex[0].pp_border_color);
 
 568         if (dirty & RADEON_UPLOAD_TEX1) {
 
 569                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 570                                                   &tex[1].pp_txoffset)) {
 
 571                         DRM_ERROR("Invalid texture offset for unit 1\n");
 
 576                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
 
 577                 OUT_RING(tex[1].pp_txfilter);
 
 578                 OUT_RING(tex[1].pp_txformat);
 
 579                 OUT_RING(tex[1].pp_txoffset);
 
 580                 OUT_RING(tex[1].pp_txcblend);
 
 581                 OUT_RING(tex[1].pp_txablend);
 
 582                 OUT_RING(tex[1].pp_tfactor);
 
 583                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
 
 584                 OUT_RING(tex[1].pp_border_color);
 
 588         if (dirty & RADEON_UPLOAD_TEX2) {
 
 589                 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
 
 590                                                   &tex[2].pp_txoffset)) {
 
 591                         DRM_ERROR("Invalid texture offset for unit 2\n");
 
 596                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
 
 597                 OUT_RING(tex[2].pp_txfilter);
 
 598                 OUT_RING(tex[2].pp_txformat);
 
 599                 OUT_RING(tex[2].pp_txoffset);
 
 600                 OUT_RING(tex[2].pp_txcblend);
 
 601                 OUT_RING(tex[2].pp_txablend);
 
 602                 OUT_RING(tex[2].pp_tfactor);
 
 603                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
 
 604                 OUT_RING(tex[2].pp_border_color);
 
 613 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
 
 614                               struct drm_file *file_priv,
 
 615                               drm_radeon_state_t * state)
 
 619         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
 
 621                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
 
 622                 OUT_RING(state->context2.se_zbias_factor);
 
 623                 OUT_RING(state->context2.se_zbias_constant);
 
 627         return radeon_emit_state(dev_priv, file_priv, &state->context,
 
 628                                  state->tex, state->dirty);
 
 631 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
 
 632  * 1.3 cmdbuffers allow all previous state to be updated as well as
 
 633  * the tcl scalar and vector areas.
 
 639 } packet[RADEON_MAX_STATE_PACKETS] = {
 
 640         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
 
 641         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
 
 642         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
 
 643         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
 
 644         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
 
 645         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
 
 646         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
 
 647         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
 
 648         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
 
 649         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
 
 650         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
 
 651         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
 
 652         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
 
 653         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
 
 654         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
 
 655         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
 
 656         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
 
 657         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
 
 658         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
 
 659         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
 
 660         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
 
 661                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
 
 662         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
 
 663         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
 
 664         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
 
 665         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
 
 666         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
 
 667         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
 
 668         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
 
 669         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
 
 670         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
 
 671         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
 
 672         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
 
 673         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
 
 674         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
 
 675         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
 
 676         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
 
 677         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
 
 678         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
 
 679         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
 
 680         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
 
 681         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
 
 682         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
 
 683         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
 
 684         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
 
 685         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
 
 686         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
 
 687         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
 
 688         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
 
 689         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
 
 690         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
 
 691          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
 
 692         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
 
 693         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
 
 694         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
 
 695         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
 
 696         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
 
 697         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
 
 698         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
 
 699         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
 
 700         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
 
 701         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
 
 702         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
 
 703                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
 
 704         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
 
 705         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
 
 706         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
 
 707         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
 
 708         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
 
 709         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
 
 710         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
 
 711         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
 
 712         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
 
 713         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
 
 714         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
 
 715         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
 
 716         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
 
 717         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
 
 718         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
 
 719         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
 
 720         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
 
 721         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
 
 722         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
 
 723         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
 
 724         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
 
 725         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
 
 726         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
 
 727         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
 
 728         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
 
 729         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
 
 730         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
 
 731         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
 
 732         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
 
 733         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
 
 734         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
 
 735         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
 
 736         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
 
 737         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
 
 740 /* ================================================================
 
 741  * Performance monitoring functions
 
 744 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
 
 745                              struct drm_radeon_master_private *master_priv,
 
 746                              int x, int y, int w, int h, int r, int g, int b)
 
 751         x += master_priv->sarea_priv->boxes[0].x1;
 
 752         y += master_priv->sarea_priv->boxes[0].y1;
 
 754         switch (dev_priv->color_fmt) {
 
 755         case RADEON_COLOR_FORMAT_RGB565:
 
 756                 color = (((r & 0xf8) << 8) |
 
 757                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
 
 759         case RADEON_COLOR_FORMAT_ARGB8888:
 
 761                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
 
 766         RADEON_WAIT_UNTIL_3D_IDLE();
 
 767         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
 
 768         OUT_RING(0xffffffff);
 
 773         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
 
 774         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 775                  RADEON_GMC_BRUSH_SOLID_COLOR |
 
 776                  (dev_priv->color_fmt << 8) |
 
 777                  RADEON_GMC_SRC_DATATYPE_COLOR |
 
 778                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 780         if (master_priv->sarea_priv->pfCurrentPage == 1) {
 
 781                 OUT_RING(dev_priv->front_pitch_offset);
 
 783                 OUT_RING(dev_priv->back_pitch_offset);
 
 788         OUT_RING((x << 16) | y);
 
 789         OUT_RING((w << 16) | h);
 
 794 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
 
 796         /* Collapse various things into a wait flag -- trying to
 
 797          * guess if userspase slept -- better just to have them tell us.
 
 799         if (dev_priv->stats.last_frame_reads > 1 ||
 
 800             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
 
 801                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 
 804         if (dev_priv->stats.freelist_loops) {
 
 805                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 
 808         /* Purple box for page flipping
 
 810         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
 
 811                 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
 
 813         /* Red box if we have to wait for idle at any point
 
 815         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
 
 816                 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
 
 818         /* Blue box: lost context?
 
 821         /* Yellow box for texture swaps
 
 823         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
 
 824                 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
 
 826         /* Green box if hardware never idles (as far as we can tell)
 
 828         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
 
 829                 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
 
 831         /* Draw bars indicating number of buffers allocated
 
 832          * (not a great measure, easily confused)
 
 834         if (dev_priv->stats.requested_bufs) {
 
 835                 if (dev_priv->stats.requested_bufs > 100)
 
 836                         dev_priv->stats.requested_bufs = 100;
 
 838                 radeon_clear_box(dev_priv, master_priv, 4, 16,
 
 839                                  dev_priv->stats.requested_bufs, 4,
 
 843         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
 
 847 /* ================================================================
 
 848  * CP command dispatch functions
 
 851 static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 852                                      struct drm_master *master,
 
 853                                      drm_radeon_clear_t * clear,
 
 854                                      drm_radeon_clear_rect_t * depth_boxes)
 
 856         drm_radeon_private_t *dev_priv = dev->dev_private;
 
 857         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
 858         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
 859         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
 
 860         int nbox = sarea_priv->nbox;
 
 861         struct drm_clip_rect *pbox = sarea_priv->boxes;
 
 862         unsigned int flags = clear->flags;
 
 863         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
 
 866         DRM_DEBUG("flags = 0x%x\n", flags);
 
 868         dev_priv->stats.clears++;
 
 870         if (sarea_priv->pfCurrentPage == 1) {
 
 871                 unsigned int tmp = flags;
 
 873                 flags &= ~(RADEON_FRONT | RADEON_BACK);
 
 874                 if (tmp & RADEON_FRONT)
 
 875                         flags |= RADEON_BACK;
 
 876                 if (tmp & RADEON_BACK)
 
 877                         flags |= RADEON_FRONT;
 
 880         if (flags & (RADEON_FRONT | RADEON_BACK)) {
 
 884                 /* Ensure the 3D stream is idle before doing a
 
 885                  * 2D fill to clear the front or back buffer.
 
 887                 RADEON_WAIT_UNTIL_3D_IDLE();
 
 889                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
 
 890                 OUT_RING(clear->color_mask);
 
 894                 /* Make sure we restore the 3D state next time.
 
 896                 sarea_priv->ctx_owner = 0;
 
 898                 for (i = 0; i < nbox; i++) {
 
 901                         int w = pbox[i].x2 - x;
 
 902                         int h = pbox[i].y2 - y;
 
 904                         DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
 
 907                         if (flags & RADEON_FRONT) {
 
 911                                          (RADEON_CNTL_PAINT_MULTI, 4));
 
 912                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 913                                          RADEON_GMC_BRUSH_SOLID_COLOR |
 
 916                                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
 918                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 920                                 OUT_RING(dev_priv->front_pitch_offset);
 
 921                                 OUT_RING(clear->clear_color);
 
 923                                 OUT_RING((x << 16) | y);
 
 924                                 OUT_RING((w << 16) | h);
 
 929                         if (flags & RADEON_BACK) {
 
 933                                          (RADEON_CNTL_PAINT_MULTI, 4));
 
 934                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 935                                          RADEON_GMC_BRUSH_SOLID_COLOR |
 
 938                                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
 940                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 942                                 OUT_RING(dev_priv->back_pitch_offset);
 
 943                                 OUT_RING(clear->clear_color);
 
 945                                 OUT_RING((x << 16) | y);
 
 946                                 OUT_RING((w << 16) | h);
 
 954         /* no docs available, based on reverse engeneering by Stephane Marchesin */
 
 955         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
 
 956             && (flags & RADEON_CLEAR_FASTZ)) {
 
 959                 int depthpixperline =
 
 960                     dev_priv->depth_fmt ==
 
 961                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
 
 967                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
 
 968                     ((clear->depth_mask & 0xff) << 24);
 
 970                 /* Make sure we restore the 3D state next time.
 
 971                  * we haven't touched any "normal" state - still need this?
 
 973                 sarea_priv->ctx_owner = 0;
 
 975                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
 976                     && (flags & RADEON_USE_HIERZ)) {
 
 977                         /* FIXME : reverse engineer that for Rx00 cards */
 
 978                         /* FIXME : the mask supposedly contains low-res z values. So can't set
 
 979                            just to the max (0xff? or actually 0x3fff?), need to take z clear
 
 980                            value into account? */
 
 981                         /* pattern seems to work for r100, though get slight
 
 982                            rendering errors with glxgears. If hierz is not enabled for r100,
 
 983                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
 
 984                            other ones are ignored, and the same clear mask can be used. That's
 
 985                            very different behaviour than R200 which needs different clear mask
 
 986                            and different number of tiles to clear if hierz is enabled or not !?!
 
 988                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
 
 990                         /* clear mask : chooses the clearing pattern.
 
 991                            rv250: could be used to clear only parts of macrotiles
 
 992                            (but that would get really complicated...)?
 
 993                            bit 0 and 1 (either or both of them ?!?!) are used to
 
 994                            not clear tile (or maybe one of the bits indicates if the tile is
 
 995                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
 
 996                            Pattern is as follows:
 
 997                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
 
 998                            bits -------------------------------------------------
 
 999                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
 
1000                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
 
1001                            covers 256 pixels ?!?
 
1007                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1008                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
 
1009                              tempRB3D_DEPTHCLEARVALUE);
 
1010                 /* what offset is this exactly ? */
 
1011                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
 
1012                 /* need ctlstat, otherwise get some strange black flickering */
 
1013                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
 
1014                              RADEON_RB3D_ZC_FLUSH_ALL);
 
1017                 for (i = 0; i < nbox; i++) {
 
1018                         int tileoffset, nrtilesx, nrtilesy, j;
 
1019                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
 
1020                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
1021                             && !(dev_priv->microcode_version == UCODE_R200)) {
 
1022                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
 
1023                                    maybe r200 actually doesn't need to put the low-res z value into
 
1024                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
 
1025                                    Works for R100, both with hierz and without.
 
1026                                    R100 seems to operate on 2x1 8x8 tiles, but...
 
1027                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
 
1028                                    problematic with resolutions which are not 64 pix aligned? */
 
1030                                     ((pbox[i].y1 >> 3) * depthpixperline +
 
1033                                     ((pbox[i].x2 & ~63) -
 
1034                                      (pbox[i].x1 & ~63)) >> 4;
 
1036                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 
1037                                 for (j = 0; j <= nrtilesy; j++) {
 
1040                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1042                                         OUT_RING(tileoffset * 8);
 
1043                                         /* the number of tiles to clear */
 
1044                                         OUT_RING(nrtilesx + 4);
 
1045                                         /* clear mask : chooses the clearing pattern. */
 
1046                                         OUT_RING(clearmask);
 
1048                                         tileoffset += depthpixperline >> 6;
 
1050                         } else if (dev_priv->microcode_version == UCODE_R200) {
 
1051                                 /* works for rv250. */
 
1052                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
 
1054                                     ((pbox[i].y1 >> 3) * depthpixperline +
 
1057                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
 
1059                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 
1060                                 for (j = 0; j <= nrtilesy; j++) {
 
1063                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1065                                         /* judging by the first tile offset needed, could possibly
 
1066                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
 
1067                                            macro tiles, though would still need clear mask for
 
1068                                            right/bottom if truely 4x4 granularity is desired ? */
 
1069                                         OUT_RING(tileoffset * 16);
 
1070                                         /* the number of tiles to clear */
 
1071                                         OUT_RING(nrtilesx + 1);
 
1072                                         /* clear mask : chooses the clearing pattern. */
 
1073                                         OUT_RING(clearmask);
 
1075                                         tileoffset += depthpixperline >> 5;
 
1077                         } else {        /* rv 100 */
 
1078                                 /* rv100 might not need 64 pix alignment, who knows */
 
1079                                 /* offsets are, hmm, weird */
 
1081                                     ((pbox[i].y1 >> 4) * depthpixperline +
 
1084                                     ((pbox[i].x2 & ~63) -
 
1085                                      (pbox[i].x1 & ~63)) >> 4;
 
1087                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
 
1088                                 for (j = 0; j <= nrtilesy; j++) {
 
1091                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1092                                         OUT_RING(tileoffset * 128);
 
1093                                         /* the number of tiles to clear */
 
1094                                         OUT_RING(nrtilesx + 4);
 
1095                                         /* clear mask : chooses the clearing pattern. */
 
1096                                         OUT_RING(clearmask);
 
1098                                         tileoffset += depthpixperline >> 6;
 
1103                 /* TODO don't always clear all hi-level z tiles */
 
1104                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
1105                     && (dev_priv->microcode_version == UCODE_R200)
 
1106                     && (flags & RADEON_USE_HIERZ))
 
1107                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
 
1108                         /* FIXME : the mask supposedly contains low-res z values. So can't set
 
1109                            just to the max (0xff? or actually 0x3fff?), need to take z clear
 
1110                            value into account? */
 
1113                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
 
1114                         OUT_RING(0x0);  /* First tile */
 
1116                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
 
1121         /* We have to clear the depth and/or stencil buffers by
 
1122          * rendering a quad into just those buffers.  Thus, we have to
 
1123          * make sure the 3D engine is configured correctly.
 
1125         else if ((dev_priv->microcode_version == UCODE_R200) &&
 
1126                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 
1131                 int tempRB3D_ZSTENCILCNTL;
 
1132                 int tempRB3D_STENCILREFMASK;
 
1133                 int tempRB3D_PLANEMASK;
 
1135                 int tempSE_VTE_CNTL;
 
1136                 int tempSE_VTX_FMT_0;
 
1137                 int tempSE_VTX_FMT_1;
 
1138                 int tempSE_VAP_CNTL;
 
1139                 int tempRE_AUX_SCISSOR_CNTL;
 
1144                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
 
1146                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
 
1147                 tempRB3D_STENCILREFMASK = 0x0;
 
1149                 tempSE_CNTL = depth_clear->se_cntl;
 
1153                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
 
1155                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
 
1157                 tempRB3D_PLANEMASK = 0x0;
 
1159                 tempRE_AUX_SCISSOR_CNTL = 0x0;
 
1162                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
 
1164                 /* Vertex format (X, Y, Z, W) */
 
1166                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
 
1167                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
 
1168                 tempSE_VTX_FMT_1 = 0x0;
 
1171                  * Depth buffer specific enables
 
1173                 if (flags & RADEON_DEPTH) {
 
1174                         /* Enable depth buffer */
 
1175                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
 
1177                         /* Disable depth buffer */
 
1178                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
 
1182                  * Stencil buffer specific enables
 
1184                 if (flags & RADEON_STENCIL) {
 
1185                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
 
1186                         tempRB3D_STENCILREFMASK = clear->depth_mask;
 
1188                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
 
1189                         tempRB3D_STENCILREFMASK = 0x00000000;
 
1192                 if (flags & RADEON_USE_COMP_ZBUF) {
 
1193                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
 
1194                             RADEON_Z_DECOMPRESSION_ENABLE;
 
1196                 if (flags & RADEON_USE_HIERZ) {
 
1197                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
 
1201                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1203                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
 
1204                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
 
1205                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
 
1206                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
 
1207                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
 
1208                              tempRB3D_STENCILREFMASK);
 
1209                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
 
1210                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
 
1211                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
 
1212                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
 
1213                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
 
1214                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
 
1215                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
 
1218                 /* Make sure we restore the 3D state next time.
 
1220                 sarea_priv->ctx_owner = 0;
 
1222                 for (i = 0; i < nbox; i++) {
 
1224                         /* Funny that this should be required --
 
1227                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1230                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
 
1231                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
 
1232                                   RADEON_PRIM_WALK_RING |
 
1233                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
 
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1235                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
 
1236                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1237                         OUT_RING(0x3f800000);
 
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1239                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1240                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1241                         OUT_RING(0x3f800000);
 
1242                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
 
1243                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1244                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1245                         OUT_RING(0x3f800000);
 
1248         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 
1250                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
 
1252                 rb3d_cntl = depth_clear->rb3d_cntl;
 
1254                 if (flags & RADEON_DEPTH) {
 
1255                         rb3d_cntl |= RADEON_Z_ENABLE;
 
1257                         rb3d_cntl &= ~RADEON_Z_ENABLE;
 
1260                 if (flags & RADEON_STENCIL) {
 
1261                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
 
1262                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
 
1264                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
 
1265                         rb3d_stencilrefmask = 0x00000000;
 
1268                 if (flags & RADEON_USE_COMP_ZBUF) {
 
1269                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
 
1270                             RADEON_Z_DECOMPRESSION_ENABLE;
 
1272                 if (flags & RADEON_USE_HIERZ) {
 
1273                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
 
1277                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1279                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
 
1280                 OUT_RING(0x00000000);
 
1281                 OUT_RING(rb3d_cntl);
 
1283                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
 
1284                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
 
1285                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
 
1286                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
 
1289                 /* Make sure we restore the 3D state next time.
 
1291                 sarea_priv->ctx_owner = 0;
 
1293                 for (i = 0; i < nbox; i++) {
 
1295                         /* Funny that this should be required --
 
1298                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1302                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
 
1303                         OUT_RING(RADEON_VTX_Z_PRESENT |
 
1304                                  RADEON_VTX_PKCOLOR_PRESENT);
 
1305                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
 
1306                                   RADEON_PRIM_WALK_RING |
 
1307                                   RADEON_MAOS_ENABLE |
 
1308                                   RADEON_VTX_FMT_RADEON_MODE |
 
1309                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
 
1311                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
 
1313                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1318                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1321                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
 
1322                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1323                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1330         /* Increment the clear counter.  The client-side 3D driver must
 
1331          * wait on this value before performing the clear ioctl.  We
 
1332          * need this because the card's so damned fast...
 
1334         sarea_priv->last_clear++;
 
1338         RADEON_CLEAR_AGE(sarea_priv->last_clear);
 
1339         RADEON_WAIT_UNTIL_IDLE();
 
1344 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
 
1346         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1347         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
1348         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
1349         int nbox = sarea_priv->nbox;
 
1350         struct drm_clip_rect *pbox = sarea_priv->boxes;
 
1355         /* Do some trivial performance monitoring...
 
1357         if (dev_priv->do_boxes)
 
1358                 radeon_cp_performance_boxes(dev_priv, master_priv);
 
1360         /* Wait for the 3D stream to idle before dispatching the bitblt.
 
1361          * This will prevent data corruption between the two streams.
 
1365         RADEON_WAIT_UNTIL_3D_IDLE();
 
1369         for (i = 0; i < nbox; i++) {
 
1372                 int w = pbox[i].x2 - x;
 
1373                 int h = pbox[i].y2 - y;
 
1375                 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
 
1379                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
 
1380                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 
1381                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
1382                          RADEON_GMC_BRUSH_NONE |
 
1383                          (dev_priv->color_fmt << 8) |
 
1384                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
1386                          RADEON_DP_SRC_SOURCE_MEMORY |
 
1387                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
 
1389                 /* Make this work even if front & back are flipped:
 
1391                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
 
1392                 if (sarea_priv->pfCurrentPage == 0) {
 
1393                         OUT_RING(dev_priv->back_pitch_offset);
 
1394                         OUT_RING(dev_priv->front_pitch_offset);
 
1396                         OUT_RING(dev_priv->front_pitch_offset);
 
1397                         OUT_RING(dev_priv->back_pitch_offset);
 
1400                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
 
1401                 OUT_RING((x << 16) | y);
 
1402                 OUT_RING((x << 16) | y);
 
1403                 OUT_RING((w << 16) | h);
 
1408         /* Increment the frame counter.  The client-side 3D driver must
 
1409          * throttle the framerate by waiting for this value before
 
1410          * performing the swapbuffer ioctl.
 
1412         sarea_priv->last_frame++;
 
1416         RADEON_FRAME_AGE(sarea_priv->last_frame);
 
1417         RADEON_WAIT_UNTIL_2D_IDLE();
 
1422 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
 
1424         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1425         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
1426         struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
 
1427         int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
 
1428             ? dev_priv->front_offset : dev_priv->back_offset;
 
1430         DRM_DEBUG("pfCurrentPage=%d\n",
 
1431                   master_priv->sarea_priv->pfCurrentPage);
 
1433         /* Do some trivial performance monitoring...
 
1435         if (dev_priv->do_boxes) {
 
1436                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
 
1437                 radeon_cp_performance_boxes(dev_priv, master_priv);
 
1440         /* Update the frame offsets for both CRTCs
 
1444         RADEON_WAIT_UNTIL_3D_IDLE();
 
1445         OUT_RING_REG(RADEON_CRTC_OFFSET,
 
1446                      ((sarea->frame.y * dev_priv->front_pitch +
 
1447                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
 
1449         OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
 
1454         /* Increment the frame counter.  The client-side 3D driver must
 
1455          * throttle the framerate by waiting for this value before
 
1456          * performing the swapbuffer ioctl.
 
1458         master_priv->sarea_priv->last_frame++;
 
1459         master_priv->sarea_priv->pfCurrentPage =
 
1460                 1 - master_priv->sarea_priv->pfCurrentPage;
 
1464         RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
 
1469 static int bad_prim_vertex_nr(int primitive, int nr)
 
1471         switch (primitive & RADEON_PRIM_TYPE_MASK) {
 
1472         case RADEON_PRIM_TYPE_NONE:
 
1473         case RADEON_PRIM_TYPE_POINT:
 
1475         case RADEON_PRIM_TYPE_LINE:
 
1476                 return (nr & 1) || nr == 0;
 
1477         case RADEON_PRIM_TYPE_LINE_STRIP:
 
1479         case RADEON_PRIM_TYPE_TRI_LIST:
 
1480         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
 
1481         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
 
1482         case RADEON_PRIM_TYPE_RECT_LIST:
 
1483                 return nr % 3 || nr == 0;
 
1484         case RADEON_PRIM_TYPE_TRI_FAN:
 
1485         case RADEON_PRIM_TYPE_TRI_STRIP:
 
1494         unsigned int finish;
 
1496         unsigned int numverts;
 
1497         unsigned int offset;
 
1498         unsigned int vc_format;
 
1499 } drm_radeon_tcl_prim_t;
 
1501 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
 
1502                                       struct drm_file *file_priv,
 
1503                                       struct drm_buf * buf,
 
1504                                       drm_radeon_tcl_prim_t * prim)
 
1506         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1507         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
1508         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
1509         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
 
1510         int numverts = (int)prim->numverts;
 
1511         int nbox = sarea_priv->nbox;
 
1515         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
 
1517                   prim->vc_format, prim->start, prim->finish, prim->numverts);
 
1519         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
 
1520                 DRM_ERROR("bad prim %x numverts %d\n",
 
1521                           prim->prim, prim->numverts);
 
1526                 /* Emit the next cliprect */
 
1528                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1531                 /* Emit the vertex buffer rendering commands */
 
1534                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
 
1537                 OUT_RING(prim->vc_format);
 
1538                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
 
1539                          RADEON_COLOR_ORDER_RGBA |
 
1540                          RADEON_VTX_FMT_RADEON_MODE |
 
1541                          (numverts << RADEON_NUM_VERTICES_SHIFT));
 
1549 static void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
 
1551         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1552         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
1553         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 
1556         buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
 
1558         /* Emit the vertex buffer age */
 
1560         RADEON_DISPATCH_AGE(buf_priv->age);
 
1567 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
 
1568                                         struct drm_buf * buf, int start, int end)
 
1570         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1572         DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
 
1575                 int offset = (dev_priv->gart_buffers_offset
 
1576                               + buf->offset + start);
 
1577                 int dwords = (end - start + 3) / sizeof(u32);
 
1579                 /* Indirect buffer data must be an even number of
 
1580                  * dwords, so if we've been given an odd number we must
 
1581                  * pad the data with a Type-2 CP packet.
 
1585                             ((char *)dev->agp_buffer_map->handle
 
1586                              + buf->offset + start);
 
1587                         data[dwords++] = RADEON_CP_PACKET2;
 
1590                 /* Fire off the indirect buffer */
 
1593                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
 
1601 static void radeon_cp_dispatch_indices(struct drm_device *dev,
 
1602                                        struct drm_master *master,
 
1603                                        struct drm_buf * elt_buf,
 
1604                                        drm_radeon_tcl_prim_t * prim)
 
1606         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1607         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
1608         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
1609         int offset = dev_priv->gart_buffers_offset + prim->offset;
 
1613         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
 
1614         int count = (prim->finish - start) / sizeof(u16);
 
1615         int nbox = sarea_priv->nbox;
 
1617         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
 
1620                   prim->start, prim->finish, prim->offset, prim->numverts);
 
1622         if (bad_prim_vertex_nr(prim->prim, count)) {
 
1623                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
 
1627         if (start >= prim->finish || (prim->start & 0x7)) {
 
1628                 DRM_ERROR("buffer prim %d\n", prim->prim);
 
1632         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
 
1634         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
 
1635                         elt_buf->offset + prim->start);
 
1637         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
 
1639         data[2] = prim->numverts;
 
1640         data[3] = prim->vc_format;
 
1641         data[4] = (prim->prim |
 
1642                    RADEON_PRIM_WALK_IND |
 
1643                    RADEON_COLOR_ORDER_RGBA |
 
1644                    RADEON_VTX_FMT_RADEON_MODE |
 
1645                    (count << RADEON_NUM_VERTICES_SHIFT));
 
1649                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1651                 radeon_cp_dispatch_indirect(dev, elt_buf,
 
1652                                             prim->start, prim->finish);
 
1659 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
 
1661 static int radeon_cp_dispatch_texture(struct drm_device * dev,
 
1662                                       struct drm_file *file_priv,
 
1663                                       drm_radeon_texture_t * tex,
 
1664                                       drm_radeon_tex_image_t * image)
 
1666         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1667         struct drm_buf *buf;
 
1670         const u8 __user *data;
 
1671         int size, dwords, tex_width, blit_width, spitch;
 
1674         u32 texpitch, microtile;
 
1675         u32 offset, byte_offset;
 
1678         if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
 
1679                 DRM_ERROR("Invalid destination offset\n");
 
1683         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
 
1685         /* Flush the pixel cache.  This ensures no pixel data gets mixed
 
1686          * up with the texture data from the host data blit, otherwise
 
1687          * part of the texture image may be corrupted.
 
1690         RADEON_FLUSH_CACHE();
 
1691         RADEON_WAIT_UNTIL_IDLE();
 
1694         /* The compiler won't optimize away a division by a variable,
 
1695          * even if the only legal values are powers of two.  Thus, we'll
 
1696          * use a shift instead.
 
1698         switch (tex->format) {
 
1699         case RADEON_TXFORMAT_ARGB8888:
 
1700         case RADEON_TXFORMAT_RGBA8888:
 
1701                 format = RADEON_COLOR_FORMAT_ARGB8888;
 
1702                 tex_width = tex->width * 4;
 
1703                 blit_width = image->width * 4;
 
1705         case RADEON_TXFORMAT_AI88:
 
1706         case RADEON_TXFORMAT_ARGB1555:
 
1707         case RADEON_TXFORMAT_RGB565:
 
1708         case RADEON_TXFORMAT_ARGB4444:
 
1709         case RADEON_TXFORMAT_VYUY422:
 
1710         case RADEON_TXFORMAT_YVYU422:
 
1711                 format = RADEON_COLOR_FORMAT_RGB565;
 
1712                 tex_width = tex->width * 2;
 
1713                 blit_width = image->width * 2;
 
1715         case RADEON_TXFORMAT_I8:
 
1716         case RADEON_TXFORMAT_RGB332:
 
1717                 format = RADEON_COLOR_FORMAT_CI8;
 
1718                 tex_width = tex->width * 1;
 
1719                 blit_width = image->width * 1;
 
1722                 DRM_ERROR("invalid texture format %d\n", tex->format);
 
1725         spitch = blit_width >> 6;
 
1726         if (spitch == 0 && image->height > 1)
 
1729         texpitch = tex->pitch;
 
1730         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
 
1732                 if (tex_width < 64) {
 
1733                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
 
1734                         /* we got tiled coordinates, untile them */
 
1740         /* this might fail for zero-sized uploads - are those illegal? */
 
1741         if (!radeon_check_offset(dev_priv, tex->offset + image->height *
 
1743                 DRM_ERROR("Invalid final destination offset\n");
 
1747         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
 
1750                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
 
1751                           tex->offset >> 10, tex->pitch, tex->format,
 
1752                           image->x, image->y, image->width, image->height);
 
1754                 /* Make a copy of some parameters in case we have to
 
1755                  * update them for a multi-pass texture blit.
 
1757                 height = image->height;
 
1758                 data = (const u8 __user *)image->data;
 
1760                 size = height * blit_width;
 
1762                 if (size > RADEON_MAX_TEXTURE_SIZE) {
 
1763                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
 
1764                         size = height * blit_width;
 
1765                 } else if (size < 4 && size > 0) {
 
1767                 } else if (size == 0) {
 
1771                 buf = radeon_freelist_get(dev);
 
1773                         radeon_do_cp_idle(dev_priv);
 
1774                         buf = radeon_freelist_get(dev);
 
1777                         DRM_DEBUG("EAGAIN\n");
 
1778                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
 
1783                 /* Dispatch the indirect buffer.
 
1786                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
 
1789 #define RADEON_COPY_MT(_buf, _data, _width) \
 
1791                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
 
1792                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
 
1798                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
 
1799                            however, we cannot use blitter directly for texture width < 64 bytes,
 
1800                            since minimum tex pitch is 64 bytes and we need this to match
 
1801                            the texture width, otherwise the blitter will tile it wrong.
 
1802                            Thus, tiling manually in this case. Additionally, need to special
 
1803                            case tex height = 1, since our actual image will have height 2
 
1804                            and we need to ensure we don't read beyond the texture size
 
1806                         if (tex->height == 1) {
 
1807                                 if (tex_width >= 64 || tex_width <= 16) {
 
1808                                         RADEON_COPY_MT(buffer, data,
 
1809                                                 (int)(tex_width * sizeof(u32)));
 
1810                                 } else if (tex_width == 32) {
 
1811                                         RADEON_COPY_MT(buffer, data, 16);
 
1812                                         RADEON_COPY_MT(buffer + 8,
 
1815                         } else if (tex_width >= 64 || tex_width == 16) {
 
1816                                 RADEON_COPY_MT(buffer, data,
 
1817                                                (int)(dwords * sizeof(u32)));
 
1818                         } else if (tex_width < 16) {
 
1819                                 for (i = 0; i < tex->height; i++) {
 
1820                                         RADEON_COPY_MT(buffer, data, tex_width);
 
1824                         } else if (tex_width == 32) {
 
1825                                 /* TODO: make sure this works when not fitting in one buffer
 
1826                                    (i.e. 32bytes x 2048...) */
 
1827                                 for (i = 0; i < tex->height; i += 2) {
 
1828                                         RADEON_COPY_MT(buffer, data, 16);
 
1830                                         RADEON_COPY_MT(buffer + 8, data, 16);
 
1832                                         RADEON_COPY_MT(buffer + 4, data, 16);
 
1834                                         RADEON_COPY_MT(buffer + 12, data, 16);
 
1840                         if (tex_width >= 32) {
 
1841                                 /* Texture image width is larger than the minimum, so we
 
1842                                  * can upload it directly.
 
1844                                 RADEON_COPY_MT(buffer, data,
 
1845                                                (int)(dwords * sizeof(u32)));
 
1847                                 /* Texture image width is less than the minimum, so we
 
1848                                  * need to pad out each image scanline to the minimum
 
1851                                 for (i = 0; i < tex->height; i++) {
 
1852                                         RADEON_COPY_MT(buffer, data, tex_width);
 
1859 #undef RADEON_COPY_MT
 
1860                 byte_offset = (image->y & ~2047) * blit_width;
 
1861                 buf->file_priv = file_priv;
 
1863                 offset = dev_priv->gart_buffers_offset + buf->offset;
 
1865                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
 
1866                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 
1867                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
1868                          RADEON_GMC_BRUSH_NONE |
 
1870                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
1872                          RADEON_DP_SRC_SOURCE_MEMORY |
 
1873                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
 
1874                 OUT_RING((spitch << 22) | (offset >> 10));
 
1875                 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
 
1877                 OUT_RING((image->x << 16) | (image->y % 2048));
 
1878                 OUT_RING((image->width << 16) | height);
 
1879                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1883                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
1885                 /* Update the input parameters for next time */
 
1887                 image->height -= height;
 
1888                 image->data = (const u8 __user *)image->data + size;
 
1889         } while (image->height > 0);
 
1891         /* Flush the pixel cache after the blit completes.  This ensures
 
1892          * the texture data is written out to memory before rendering
 
1896         RADEON_FLUSH_CACHE();
 
1897         RADEON_WAIT_UNTIL_2D_IDLE();
 
1904 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
 
1906         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1913         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
 
1914         OUT_RING(0x00000000);
 
1916         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
 
1917         for (i = 0; i < 32; i++) {
 
1918                 OUT_RING(stipple[i]);
 
1924 static void radeon_apply_surface_regs(int surf_index,
 
1925                                       drm_radeon_private_t *dev_priv)
 
1927         if (!dev_priv->mmio)
 
1930         radeon_do_cp_idle(dev_priv);
 
1932         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
 
1933                      dev_priv->surfaces[surf_index].flags);
 
1934         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
 
1935                      dev_priv->surfaces[surf_index].lower);
 
1936         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
 
1937                      dev_priv->surfaces[surf_index].upper);
 
1940 /* Allocates a virtual surface
 
1941  * doesn't always allocate a real surface, will stretch an existing
 
1942  * surface when possible.
 
1944  * Note that refcount can be at most 2, since during a free refcount=3
 
1945  * might mean we have to allocate a new surface which might not always
 
1947  * For example : we allocate three contigous surfaces ABC. If B is
 
1948  * freed, we suddenly need two surfaces to store A and C, which might
 
1949  * not always be available.
 
1951 static int alloc_surface(drm_radeon_surface_alloc_t *new,
 
1952                          drm_radeon_private_t *dev_priv,
 
1953                          struct drm_file *file_priv)
 
1955         struct radeon_virt_surface *s;
 
1957         int virt_surface_index;
 
1958         uint32_t new_upper, new_lower;
 
1960         new_lower = new->address;
 
1961         new_upper = new_lower + new->size - 1;
 
1964         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
 
1965             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
 
1966              RADEON_SURF_ADDRESS_FIXED_MASK)
 
1967             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
 
1970         /* make sure there is no overlap with existing surfaces */
 
1971         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
1972                 if ((dev_priv->surfaces[i].refcount != 0) &&
 
1973                     (((new_lower >= dev_priv->surfaces[i].lower) &&
 
1974                       (new_lower < dev_priv->surfaces[i].upper)) ||
 
1975                      ((new_lower < dev_priv->surfaces[i].lower) &&
 
1976                       (new_upper > dev_priv->surfaces[i].lower)))) {
 
1981         /* find a virtual surface */
 
1982         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
 
1983                 if (dev_priv->virt_surfaces[i].file_priv == 0)
 
1985         if (i == 2 * RADEON_MAX_SURFACES) {
 
1988         virt_surface_index = i;
 
1990         /* try to reuse an existing surface */
 
1991         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
1993                 if ((dev_priv->surfaces[i].refcount == 1) &&
 
1994                     (new->flags == dev_priv->surfaces[i].flags) &&
 
1995                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
 
1996                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
1997                         s->surface_index = i;
 
1998                         s->lower = new_lower;
 
1999                         s->upper = new_upper;
 
2000                         s->flags = new->flags;
 
2001                         s->file_priv = file_priv;
 
2002                         dev_priv->surfaces[i].refcount++;
 
2003                         dev_priv->surfaces[i].lower = s->lower;
 
2004                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
2005                         return virt_surface_index;
 
2009                 if ((dev_priv->surfaces[i].refcount == 1) &&
 
2010                     (new->flags == dev_priv->surfaces[i].flags) &&
 
2011                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
 
2012                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
2013                         s->surface_index = i;
 
2014                         s->lower = new_lower;
 
2015                         s->upper = new_upper;
 
2016                         s->flags = new->flags;
 
2017                         s->file_priv = file_priv;
 
2018                         dev_priv->surfaces[i].refcount++;
 
2019                         dev_priv->surfaces[i].upper = s->upper;
 
2020                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
2021                         return virt_surface_index;
 
2025         /* okay, we need a new one */
 
2026         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
2027                 if (dev_priv->surfaces[i].refcount == 0) {
 
2028                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
2029                         s->surface_index = i;
 
2030                         s->lower = new_lower;
 
2031                         s->upper = new_upper;
 
2032                         s->flags = new->flags;
 
2033                         s->file_priv = file_priv;
 
2034                         dev_priv->surfaces[i].refcount = 1;
 
2035                         dev_priv->surfaces[i].lower = s->lower;
 
2036                         dev_priv->surfaces[i].upper = s->upper;
 
2037                         dev_priv->surfaces[i].flags = s->flags;
 
2038                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
2039                         return virt_surface_index;
 
2043         /* we didn't find anything */
 
2047 static int free_surface(struct drm_file *file_priv,
 
2048                         drm_radeon_private_t * dev_priv,
 
2051         struct radeon_virt_surface *s;
 
2053         /* find the virtual surface */
 
2054         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
 
2055                 s = &(dev_priv->virt_surfaces[i]);
 
2057                         if ((lower == s->lower) && (file_priv == s->file_priv))
 
2059                                 if (dev_priv->surfaces[s->surface_index].
 
2061                                         dev_priv->surfaces[s->surface_index].
 
2064                                 if (dev_priv->surfaces[s->surface_index].
 
2066                                         dev_priv->surfaces[s->surface_index].
 
2069                                 dev_priv->surfaces[s->surface_index].refcount--;
 
2070                                 if (dev_priv->surfaces[s->surface_index].
 
2072                                         dev_priv->surfaces[s->surface_index].
 
2074                                 s->file_priv = NULL;
 
2075                                 radeon_apply_surface_regs(s->surface_index,
 
2084 static void radeon_surfaces_release(struct drm_file *file_priv,
 
2085                                     drm_radeon_private_t * dev_priv)
 
2088         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
 
2089                 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
 
2090                         free_surface(file_priv, dev_priv,
 
2091                                      dev_priv->virt_surfaces[i].lower);
 
2095 /* ================================================================
 
2098 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2100         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2101         drm_radeon_surface_alloc_t *alloc = data;
 
2103         if (alloc_surface(alloc, dev_priv, file_priv) == -1)
 
2109 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2111         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2112         drm_radeon_surface_free_t *memfree = data;
 
2114         if (free_surface(file_priv, dev_priv, memfree->address))
 
2120 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2122         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2123         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
2124         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
2125         drm_radeon_clear_t *clear = data;
 
2126         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
 
2129         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2131         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2133         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2134                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
2136         if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
 
2137                                sarea_priv->nbox * sizeof(depth_boxes[0])))
 
2140         radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
 
2146 /* Not sure why this isn't set all the time:
 
2148 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
 
2150         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2151         struct drm_radeon_master_private *master_priv = master->driver_priv;
 
2157         RADEON_WAIT_UNTIL_3D_IDLE();
 
2158         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
 
2159         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
 
2160                  RADEON_CRTC_OFFSET_FLIP_CNTL);
 
2161         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
 
2162         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
 
2163                  RADEON_CRTC_OFFSET_FLIP_CNTL);
 
2166         dev_priv->page_flipping = 1;
 
2168         if (master_priv->sarea_priv->pfCurrentPage != 1)
 
2169                 master_priv->sarea_priv->pfCurrentPage = 0;
 
2174 /* Swapping and flipping are different operations, need different ioctls.
 
2175  * They can & should be intermixed to support multiple 3d windows.
 
2177 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2179         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2182         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2184         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2186         if (!dev_priv->page_flipping)
 
2187                 radeon_do_init_pageflip(dev, file_priv->master);
 
2189         radeon_cp_dispatch_flip(dev, file_priv->master);
 
2195 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2197         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2198         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
2199         drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
 
2203         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2205         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2207         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2208                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
2210         radeon_cp_dispatch_swap(dev, file_priv->master);
 
2211         sarea_priv->ctx_owner = 0;
 
2217 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2219         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2220         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
2221         drm_radeon_sarea_t *sarea_priv;
 
2222         struct drm_device_dma *dma = dev->dma;
 
2223         struct drm_buf *buf;
 
2224         drm_radeon_vertex_t *vertex = data;
 
2225         drm_radeon_tcl_prim_t prim;
 
2227         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2229         sarea_priv = master_priv->sarea_priv;
 
2231         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
 
2232                   DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
 
2234         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
 
2235                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2236                           vertex->idx, dma->buf_count - 1);
 
2239         if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
 
2240                 DRM_ERROR("buffer prim %d\n", vertex->prim);
 
2244         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2245         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2247         buf = dma->buflist[vertex->idx];
 
2249         if (buf->file_priv != file_priv) {
 
2250                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2251                           DRM_CURRENTPID, buf->file_priv);
 
2255                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
 
2259         /* Build up a prim_t record:
 
2261         if (vertex->count) {
 
2262                 buf->used = vertex->count;      /* not used? */
 
2264                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
 
2265                         if (radeon_emit_state(dev_priv, file_priv,
 
2266                                               &sarea_priv->context_state,
 
2267                                               sarea_priv->tex_state,
 
2268                                               sarea_priv->dirty)) {
 
2269                                 DRM_ERROR("radeon_emit_state failed\n");
 
2273                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
 
2274                                                RADEON_UPLOAD_TEX1IMAGES |
 
2275                                                RADEON_UPLOAD_TEX2IMAGES |
 
2276                                                RADEON_REQUIRE_QUIESCENCE);
 
2280                 prim.finish = vertex->count;    /* unused */
 
2281                 prim.prim = vertex->prim;
 
2282                 prim.numverts = vertex->count;
 
2283                 prim.vc_format = sarea_priv->vc_format;
 
2285                 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
 
2288         if (vertex->discard) {
 
2289                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
2296 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2298         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2299         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
2300         drm_radeon_sarea_t *sarea_priv;
 
2301         struct drm_device_dma *dma = dev->dma;
 
2302         struct drm_buf *buf;
 
2303         drm_radeon_indices_t *elts = data;
 
2304         drm_radeon_tcl_prim_t prim;
 
2307         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2309         sarea_priv = master_priv->sarea_priv;
 
2311         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
 
2312                   DRM_CURRENTPID, elts->idx, elts->start, elts->end,
 
2315         if (elts->idx < 0 || elts->idx >= dma->buf_count) {
 
2316                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2317                           elts->idx, dma->buf_count - 1);
 
2320         if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
 
2321                 DRM_ERROR("buffer prim %d\n", elts->prim);
 
2325         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2326         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2328         buf = dma->buflist[elts->idx];
 
2330         if (buf->file_priv != file_priv) {
 
2331                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2332                           DRM_CURRENTPID, buf->file_priv);
 
2336                 DRM_ERROR("sending pending buffer %d\n", elts->idx);
 
2340         count = (elts->end - elts->start) / sizeof(u16);
 
2341         elts->start -= RADEON_INDEX_PRIM_OFFSET;
 
2343         if (elts->start & 0x7) {
 
2344                 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
 
2347         if (elts->start < buf->used) {
 
2348                 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
 
2352         buf->used = elts->end;
 
2354         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
 
2355                 if (radeon_emit_state(dev_priv, file_priv,
 
2356                                       &sarea_priv->context_state,
 
2357                                       sarea_priv->tex_state,
 
2358                                       sarea_priv->dirty)) {
 
2359                         DRM_ERROR("radeon_emit_state failed\n");
 
2363                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
 
2364                                        RADEON_UPLOAD_TEX1IMAGES |
 
2365                                        RADEON_UPLOAD_TEX2IMAGES |
 
2366                                        RADEON_REQUIRE_QUIESCENCE);
 
2369         /* Build up a prim_t record:
 
2371         prim.start = elts->start;
 
2372         prim.finish = elts->end;
 
2373         prim.prim = elts->prim;
 
2374         prim.offset = 0;        /* offset from start of dma buffers */
 
2375         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
 
2376         prim.vc_format = sarea_priv->vc_format;
 
2378         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
 
2379         if (elts->discard) {
 
2380                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
2387 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2389         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2390         drm_radeon_texture_t *tex = data;
 
2391         drm_radeon_tex_image_t image;
 
2394         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2396         if (tex->image == NULL) {
 
2397                 DRM_ERROR("null texture image!\n");
 
2401         if (DRM_COPY_FROM_USER(&image,
 
2402                                (drm_radeon_tex_image_t __user *) tex->image,
 
2406         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2407         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2409         ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
 
2414 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2416         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2417         drm_radeon_stipple_t *stipple = data;
 
2420         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2422         if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
 
2425         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2427         radeon_cp_dispatch_stipple(dev, mask);
 
2433 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2435         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2436         struct drm_device_dma *dma = dev->dma;
 
2437         struct drm_buf *buf;
 
2438         drm_radeon_indirect_t *indirect = data;
 
2441         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2443         DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
 
2444                   indirect->idx, indirect->start, indirect->end,
 
2447         if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
 
2448                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2449                           indirect->idx, dma->buf_count - 1);
 
2453         buf = dma->buflist[indirect->idx];
 
2455         if (buf->file_priv != file_priv) {
 
2456                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2457                           DRM_CURRENTPID, buf->file_priv);
 
2461                 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
 
2465         if (indirect->start < buf->used) {
 
2466                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
 
2467                           indirect->start, buf->used);
 
2471         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2472         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2474         buf->used = indirect->end;
 
2476         /* Wait for the 3D stream to idle before the indirect buffer
 
2477          * containing 2D acceleration commands is processed.
 
2481         RADEON_WAIT_UNTIL_3D_IDLE();
 
2485         /* Dispatch the indirect buffer full of commands from the
 
2486          * X server.  This is insecure and is thus only available to
 
2487          * privileged clients.
 
2489         radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
 
2490         if (indirect->discard) {
 
2491                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
2498 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2500         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2501         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
2502         drm_radeon_sarea_t *sarea_priv;
 
2503         struct drm_device_dma *dma = dev->dma;
 
2504         struct drm_buf *buf;
 
2505         drm_radeon_vertex2_t *vertex = data;
 
2507         unsigned char laststate;
 
2509         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2511         sarea_priv = master_priv->sarea_priv;
 
2513         DRM_DEBUG("pid=%d index=%d discard=%d\n",
 
2514                   DRM_CURRENTPID, vertex->idx, vertex->discard);
 
2516         if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
 
2517                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2518                           vertex->idx, dma->buf_count - 1);
 
2522         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2523         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2525         buf = dma->buflist[vertex->idx];
 
2527         if (buf->file_priv != file_priv) {
 
2528                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2529                           DRM_CURRENTPID, buf->file_priv);
 
2534                 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
 
2538         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2541         for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
 
2542                 drm_radeon_prim_t prim;
 
2543                 drm_radeon_tcl_prim_t tclprim;
 
2545                 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
 
2548                 if (prim.stateidx != laststate) {
 
2549                         drm_radeon_state_t state;
 
2551                         if (DRM_COPY_FROM_USER(&state,
 
2552                                                &vertex->state[prim.stateidx],
 
2556                         if (radeon_emit_state2(dev_priv, file_priv, &state)) {
 
2557                                 DRM_ERROR("radeon_emit_state2 failed\n");
 
2561                         laststate = prim.stateidx;
 
2564                 tclprim.start = prim.start;
 
2565                 tclprim.finish = prim.finish;
 
2566                 tclprim.prim = prim.prim;
 
2567                 tclprim.vc_format = prim.vc_format;
 
2569                 if (prim.prim & RADEON_PRIM_WALK_IND) {
 
2570                         tclprim.offset = prim.numverts * 64;
 
2571                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
 
2573                         radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
 
2575                         tclprim.numverts = prim.numverts;
 
2576                         tclprim.offset = 0;     /* not used */
 
2578                         radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
 
2581                 if (sarea_priv->nbox == 1)
 
2582                         sarea_priv->nbox = 0;
 
2585         if (vertex->discard) {
 
2586                 radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
2593 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
 
2594                                struct drm_file *file_priv,
 
2595                                drm_radeon_cmd_header_t header,
 
2596                                drm_radeon_kcmd_buffer_t *cmdbuf)
 
2598         int id = (int)header.packet.packet_id;
 
2600         int *data = (int *)cmdbuf->buf;
 
2603         if (id >= RADEON_MAX_STATE_PACKETS)
 
2606         sz = packet[id].len;
 
2607         reg = packet[id].start;
 
2609         if (sz * sizeof(int) > cmdbuf->bufsz) {
 
2610                 DRM_ERROR("Packet size provided larger than data provided\n");
 
2614         if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
 
2615                 DRM_ERROR("Packet verification failed\n");
 
2620         OUT_RING(CP_PACKET0(reg, (sz - 1)));
 
2621         OUT_RING_TABLE(data, sz);
 
2624         cmdbuf->buf += sz * sizeof(int);
 
2625         cmdbuf->bufsz -= sz * sizeof(int);
 
2629 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
 
2630                                           drm_radeon_cmd_header_t header,
 
2631                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2633         int sz = header.scalars.count;
 
2634         int start = header.scalars.offset;
 
2635         int stride = header.scalars.stride;
 
2639         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 
2640         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 
2641         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
 
2642         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2644         cmdbuf->buf += sz * sizeof(int);
 
2645         cmdbuf->bufsz -= sz * sizeof(int);
 
2651 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
 
2652                                            drm_radeon_cmd_header_t header,
 
2653                                            drm_radeon_kcmd_buffer_t *cmdbuf)
 
2655         int sz = header.scalars.count;
 
2656         int start = ((unsigned int)header.scalars.offset) + 0x100;
 
2657         int stride = header.scalars.stride;
 
2661         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 
2662         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 
2663         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
 
2664         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2666         cmdbuf->buf += sz * sizeof(int);
 
2667         cmdbuf->bufsz -= sz * sizeof(int);
 
2671 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
 
2672                                           drm_radeon_cmd_header_t header,
 
2673                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2675         int sz = header.vectors.count;
 
2676         int start = header.vectors.offset;
 
2677         int stride = header.vectors.stride;
 
2681         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
2682         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 
2683         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 
2684         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
 
2685         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2688         cmdbuf->buf += sz * sizeof(int);
 
2689         cmdbuf->bufsz -= sz * sizeof(int);
 
2693 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
 
2694                                           drm_radeon_cmd_header_t header,
 
2695                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2697         int sz = header.veclinear.count * 4;
 
2698         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
 
2703         if (sz * 4 > cmdbuf->bufsz)
 
2707         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
2708         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 
2709         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 
2710         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
 
2711         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2714         cmdbuf->buf += sz * sizeof(int);
 
2715         cmdbuf->bufsz -= sz * sizeof(int);
 
2719 static int radeon_emit_packet3(struct drm_device * dev,
 
2720                                struct drm_file *file_priv,
 
2721                                drm_radeon_kcmd_buffer_t *cmdbuf)
 
2723         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2730         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
 
2732                 DRM_ERROR("Packet verification failed\n");
 
2737         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
 
2740         cmdbuf->buf += cmdsz * 4;
 
2741         cmdbuf->bufsz -= cmdsz * 4;
 
2745 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
 
2746                                         struct drm_file *file_priv,
 
2747                                         drm_radeon_kcmd_buffer_t *cmdbuf,
 
2750         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2751         struct drm_clip_rect box;
 
2754         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
 
2760         if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
 
2762                 DRM_ERROR("Packet verification failed\n");
 
2770                 if (i < cmdbuf->nbox) {
 
2771                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
 
2773                         /* FIXME The second and subsequent times round
 
2774                          * this loop, send a WAIT_UNTIL_3D_IDLE before
 
2775                          * calling emit_clip_rect(). This fixes a
 
2776                          * lockup on fast machines when sending
 
2777                          * several cliprects with a cmdbuf, as when
 
2778                          * waving a 2D window over a 3D
 
2779                          * window. Something in the commands from user
 
2780                          * space seems to hang the card when they're
 
2781                          * sent several times in a row. That would be
 
2782                          * the correct place to fix it but this works
 
2783                          * around it until I can figure that out - Tim
 
2787                                 RADEON_WAIT_UNTIL_3D_IDLE();
 
2790                         radeon_emit_clip_rect(dev_priv, &box);
 
2794                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
 
2797         } while (++i < cmdbuf->nbox);
 
2798         if (cmdbuf->nbox == 1)
 
2802         cmdbuf->buf += cmdsz * 4;
 
2803         cmdbuf->bufsz -= cmdsz * 4;
 
2807 static int radeon_emit_wait(struct drm_device * dev, int flags)
 
2809         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2812         DRM_DEBUG("%x\n", flags);
 
2814         case RADEON_WAIT_2D:
 
2816                 RADEON_WAIT_UNTIL_2D_IDLE();
 
2819         case RADEON_WAIT_3D:
 
2821                 RADEON_WAIT_UNTIL_3D_IDLE();
 
2824         case RADEON_WAIT_2D | RADEON_WAIT_3D:
 
2826                 RADEON_WAIT_UNTIL_IDLE();
 
2836 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
2838         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2839         struct drm_device_dma *dma = dev->dma;
 
2840         struct drm_buf *buf = NULL;
 
2842         drm_radeon_kcmd_buffer_t *cmdbuf = data;
 
2843         drm_radeon_cmd_header_t header;
 
2844         int orig_nbox, orig_bufsz;
 
2847         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
2849         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2850         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2852         if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
 
2856         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
 
2857          * races between checking values and using those values in other code,
 
2858          * and simply to avoid a lot of function calls to copy in data.
 
2860         orig_bufsz = cmdbuf->bufsz;
 
2861         if (orig_bufsz != 0) {
 
2862                 kbuf = drm_alloc(cmdbuf->bufsz, DRM_MEM_DRIVER);
 
2865                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
 
2867                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2873         orig_nbox = cmdbuf->nbox;
 
2875         if (dev_priv->microcode_version == UCODE_R300) {
 
2877                 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
 
2879                 if (orig_bufsz != 0)
 
2880                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2885         /* microcode_version != r300 */
 
2886         while (cmdbuf->bufsz >= sizeof(header)) {
 
2888                 header.i = *(int *)cmdbuf->buf;
 
2889                 cmdbuf->buf += sizeof(header);
 
2890                 cmdbuf->bufsz -= sizeof(header);
 
2892                 switch (header.header.cmd_type) {
 
2893                 case RADEON_CMD_PACKET:
 
2894                         DRM_DEBUG("RADEON_CMD_PACKET\n");
 
2895                         if (radeon_emit_packets
 
2896                             (dev_priv, file_priv, header, cmdbuf)) {
 
2897                                 DRM_ERROR("radeon_emit_packets failed\n");
 
2902                 case RADEON_CMD_SCALARS:
 
2903                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
 
2904                         if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
 
2905                                 DRM_ERROR("radeon_emit_scalars failed\n");
 
2910                 case RADEON_CMD_VECTORS:
 
2911                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
 
2912                         if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
 
2913                                 DRM_ERROR("radeon_emit_vectors failed\n");
 
2918                 case RADEON_CMD_DMA_DISCARD:
 
2919                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
 
2920                         idx = header.dma.buf_idx;
 
2921                         if (idx < 0 || idx >= dma->buf_count) {
 
2922                                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2923                                           idx, dma->buf_count - 1);
 
2927                         buf = dma->buflist[idx];
 
2928                         if (buf->file_priv != file_priv || buf->pending) {
 
2929                                 DRM_ERROR("bad buffer %p %p %d\n",
 
2930                                           buf->file_priv, file_priv,
 
2935                         radeon_cp_discard_buffer(dev, file_priv->master, buf);
 
2938                 case RADEON_CMD_PACKET3:
 
2939                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
 
2940                         if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
 
2941                                 DRM_ERROR("radeon_emit_packet3 failed\n");
 
2946                 case RADEON_CMD_PACKET3_CLIP:
 
2947                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
 
2948                         if (radeon_emit_packet3_cliprect
 
2949                             (dev, file_priv, cmdbuf, orig_nbox)) {
 
2950                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
 
2955                 case RADEON_CMD_SCALARS2:
 
2956                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
 
2957                         if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
 
2958                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
 
2963                 case RADEON_CMD_WAIT:
 
2964                         DRM_DEBUG("RADEON_CMD_WAIT\n");
 
2965                         if (radeon_emit_wait(dev, header.wait.flags)) {
 
2966                                 DRM_ERROR("radeon_emit_wait failed\n");
 
2970                 case RADEON_CMD_VECLINEAR:
 
2971                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
 
2972                         if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
 
2973                                 DRM_ERROR("radeon_emit_veclinear failed\n");
 
2979                         DRM_ERROR("bad cmd_type %d at %p\n",
 
2980                                   header.header.cmd_type,
 
2981                                   cmdbuf->buf - sizeof(header));
 
2986         if (orig_bufsz != 0)
 
2987                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2989         DRM_DEBUG("DONE\n");
 
2994         if (orig_bufsz != 0)
 
2995                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2999 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
3001         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3002         drm_radeon_getparam_t *param = data;
 
3005         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
 
3007         switch (param->param) {
 
3008         case RADEON_PARAM_GART_BUFFER_OFFSET:
 
3009                 value = dev_priv->gart_buffers_offset;
 
3011         case RADEON_PARAM_LAST_FRAME:
 
3012                 dev_priv->stats.last_frame_reads++;
 
3013                 value = GET_SCRATCH(0);
 
3015         case RADEON_PARAM_LAST_DISPATCH:
 
3016                 value = GET_SCRATCH(1);
 
3018         case RADEON_PARAM_LAST_CLEAR:
 
3019                 dev_priv->stats.last_clear_reads++;
 
3020                 value = GET_SCRATCH(2);
 
3022         case RADEON_PARAM_IRQ_NR:
 
3023                 value = drm_dev_to_irq(dev);
 
3025         case RADEON_PARAM_GART_BASE:
 
3026                 value = dev_priv->gart_vm_start;
 
3028         case RADEON_PARAM_REGISTER_HANDLE:
 
3029                 value = dev_priv->mmio->offset;
 
3031         case RADEON_PARAM_STATUS_HANDLE:
 
3032                 value = dev_priv->ring_rptr_offset;
 
3034 #if BITS_PER_LONG == 32
 
3036                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
 
3037                  * pointer which can't fit into an int-sized variable.  According to
 
3038                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
 
3039                  * not supporting it shouldn't be a problem.  If the same functionality
 
3040                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
 
3041                  * so backwards-compatibility for the embedded platforms can be
 
3042                  * maintained.  --davidm 4-Feb-2004.
 
3044         case RADEON_PARAM_SAREA_HANDLE:
 
3045                 /* The lock is the first dword in the sarea. */
 
3046                 /* no users of this parameter */
 
3049         case RADEON_PARAM_GART_TEX_HANDLE:
 
3050                 value = dev_priv->gart_textures_offset;
 
3052         case RADEON_PARAM_SCRATCH_OFFSET:
 
3053                 if (!dev_priv->writeback_works)
 
3055                 value = RADEON_SCRATCH_REG_OFFSET;
 
3057         case RADEON_PARAM_CARD_TYPE:
 
3058                 if (dev_priv->flags & RADEON_IS_PCIE)
 
3059                         value = RADEON_CARD_PCIE;
 
3060                 else if (dev_priv->flags & RADEON_IS_AGP)
 
3061                         value = RADEON_CARD_AGP;
 
3063                         value = RADEON_CARD_PCI;
 
3065         case RADEON_PARAM_VBLANK_CRTC:
 
3066                 value = radeon_vblank_crtc_get(dev);
 
3068         case RADEON_PARAM_FB_LOCATION:
 
3069                 value = radeon_read_fb_location(dev_priv);
 
3071         case RADEON_PARAM_NUM_GB_PIPES:
 
3072                 value = dev_priv->num_gb_pipes;
 
3075                 DRM_DEBUG("Invalid parameter %d\n", param->param);
 
3079         if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
 
3080                 DRM_ERROR("copy_to_user\n");
 
3087 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
3089         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3090         struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
 
3091         drm_radeon_setparam_t *sp = data;
 
3092         struct drm_radeon_driver_file_fields *radeon_priv;
 
3094         switch (sp->param) {
 
3095         case RADEON_SETPARAM_FB_LOCATION:
 
3096                 radeon_priv = file_priv->driver_priv;
 
3097                 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
 
3100         case RADEON_SETPARAM_SWITCH_TILING:
 
3101                 if (sp->value == 0) {
 
3102                         DRM_DEBUG("color tiling disabled\n");
 
3103                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 
3104                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 
3105                         if (master_priv->sarea_priv)
 
3106                                 master_priv->sarea_priv->tiling_enabled = 0;
 
3107                 } else if (sp->value == 1) {
 
3108                         DRM_DEBUG("color tiling enabled\n");
 
3109                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
 
3110                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
 
3111                         if (master_priv->sarea_priv)
 
3112                                 master_priv->sarea_priv->tiling_enabled = 1;
 
3115         case RADEON_SETPARAM_PCIGART_LOCATION:
 
3116                 dev_priv->pcigart_offset = sp->value;
 
3117                 dev_priv->pcigart_offset_set = 1;
 
3119         case RADEON_SETPARAM_NEW_MEMMAP:
 
3120                 dev_priv->new_memmap = sp->value;
 
3122         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
 
3123                 dev_priv->gart_info.table_size = sp->value;
 
3124                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
 
3125                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
 
3127         case RADEON_SETPARAM_VBLANK_CRTC:
 
3128                 return radeon_vblank_crtc_set(dev, sp->value);
 
3131                 DRM_DEBUG("Invalid parameter %d\n", sp->param);
 
3138 /* When a client dies:
 
3139  *    - Check for and clean up flipped page state
 
3140  *    - Free any alloced GART memory.
 
3141  *    - Free any alloced radeon surfaces.
 
3143  * DRM infrastructure takes care of reclaiming dma buffers.
 
3145 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
 
3147         if (dev->dev_private) {
 
3148                 drm_radeon_private_t *dev_priv = dev->dev_private;
 
3149                 dev_priv->page_flipping = 0;
 
3150                 radeon_mem_release(file_priv, dev_priv->gart_heap);
 
3151                 radeon_mem_release(file_priv, dev_priv->fb_heap);
 
3152                 radeon_surfaces_release(file_priv, dev_priv);
 
3156 void radeon_driver_lastclose(struct drm_device *dev)
 
3158         radeon_do_release(dev);
 
3161 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 
3163         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3164         struct drm_radeon_driver_file_fields *radeon_priv;
 
3168             (struct drm_radeon_driver_file_fields *)
 
3169             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
 
3174         file_priv->driver_priv = radeon_priv;
 
3177                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
 
3179                 radeon_priv->radeon_fb_delta = 0;
 
3183 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
 
3185         struct drm_radeon_driver_file_fields *radeon_priv =
 
3186             file_priv->driver_priv;
 
3188         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
 
3191 struct drm_ioctl_desc radeon_ioctls[] = {
 
3192         DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3193         DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3194         DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3195         DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3196         DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
 
3197         DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
 
3198         DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
 
3199         DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
 
3200         DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
 
3201         DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
 
3202         DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
 
3203         DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
 
3204         DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
 
3205         DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
 
3206         DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3207         DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
 
3208         DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
 
3209         DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
 
3210         DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
 
3211         DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
 
3212         DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
 
3213         DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
3214         DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
 
3215         DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
 
3216         DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
 
3217         DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
 
3218         DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH)
 
3221 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);