1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
 
   3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
 
   6  * Permission is hereby granted, free of charge, to any person obtaining a
 
   7  * copy of this software and associated documentation files (the "Software"),
 
   8  * to deal in the Software without restriction, including without limitation
 
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 
  10  * and/or sell copies of the Software, and to permit persons to whom the
 
  11  * Software is furnished to do so, subject to the following conditions:
 
  13  * The above copyright notice and this permission notice (including the next
 
  14  * paragraph) shall be included in all copies or substantial portions of the
 
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 
  18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 
  19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 
  20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 
  21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 
  22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 
  23  * DEALINGS IN THE SOFTWARE.
 
  26  *    Gareth Hughes <gareth@valinux.com>
 
  27  *    Kevin E. Martin <martin@valinux.com>
 
  32 #include "drm_sarea.h"
 
  33 #include "radeon_drm.h"
 
  34 #include "radeon_drv.h"
 
  36 /* ================================================================
 
  37  * Helper functions for client state checking and fixup
 
  40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
 
  42                                                     struct drm_file * filp_priv,
 
  46         u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
 
  47         struct drm_radeon_driver_file_fields *radeon_priv;
 
  49         /* Hrm ... the story of the offset ... So this function converts
 
  50          * the various ideas of what userland clients might have for an
 
  51          * offset in the card address space into an offset into the card
 
  52          * address space :) So with a sane client, it should just keep
 
  53          * the value intact and just do some boundary checking. However,
 
  54          * not all clients are sane. Some older clients pass us 0 based
 
  55          * offsets relative to the start of the framebuffer and some may
 
  56          * assume the AGP aperture it appended to the framebuffer, so we
 
  57          * try to detect those cases and fix them up.
 
  59          * Note: It might be a good idea here to make sure the offset lands
 
  60          * in some "allowed" area to protect things like the PCIE GART...
 
  63         /* First, the best case, the offset already lands in either the
 
  64          * framebuffer or the GART mapped space
 
  66         if (radeon_check_offset(dev_priv, off))
 
  69         /* Ok, that didn't happen... now check if we have a zero based
 
  70          * offset that fits in the framebuffer + gart space, apply the
 
  71          * magic offset we get from SETPARAM or calculated from fb_location
 
  73         if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
 
  74                 radeon_priv = filp_priv->driver_priv;
 
  75                 off += radeon_priv->radeon_fb_delta;
 
  78         /* Finally, assume we aimed at a GART offset if beyond the fb */
 
  80                 off = off - fb_end - 1 + dev_priv->gart_vm_start;
 
  82         /* Now recheck and fail if out of bounds */
 
  83         if (radeon_check_offset(dev_priv, off)) {
 
  84                 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
 
  88         return DRM_ERR(EINVAL);
 
  91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 
  93                                                      struct drm_file * filp_priv,
 
  98         case RADEON_EMIT_PP_MISC:
 
  99                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 100                     &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
 
 101                         DRM_ERROR("Invalid depth buffer offset\n");
 
 102                         return DRM_ERR(EINVAL);
 
 106         case RADEON_EMIT_PP_CNTL:
 
 107                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 108                     &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
 
 109                         DRM_ERROR("Invalid colour buffer offset\n");
 
 110                         return DRM_ERR(EINVAL);
 
 114         case R200_EMIT_PP_TXOFFSET_0:
 
 115         case R200_EMIT_PP_TXOFFSET_1:
 
 116         case R200_EMIT_PP_TXOFFSET_2:
 
 117         case R200_EMIT_PP_TXOFFSET_3:
 
 118         case R200_EMIT_PP_TXOFFSET_4:
 
 119         case R200_EMIT_PP_TXOFFSET_5:
 
 120                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 122                         DRM_ERROR("Invalid R200 texture offset\n");
 
 123                         return DRM_ERR(EINVAL);
 
 127         case RADEON_EMIT_PP_TXFILTER_0:
 
 128         case RADEON_EMIT_PP_TXFILTER_1:
 
 129         case RADEON_EMIT_PP_TXFILTER_2:
 
 130                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 131                     &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
 
 132                         DRM_ERROR("Invalid R100 texture offset\n");
 
 133                         return DRM_ERR(EINVAL);
 
 137         case R200_EMIT_PP_CUBIC_OFFSETS_0:
 
 138         case R200_EMIT_PP_CUBIC_OFFSETS_1:
 
 139         case R200_EMIT_PP_CUBIC_OFFSETS_2:
 
 140         case R200_EMIT_PP_CUBIC_OFFSETS_3:
 
 141         case R200_EMIT_PP_CUBIC_OFFSETS_4:
 
 142         case R200_EMIT_PP_CUBIC_OFFSETS_5:{
 
 144                         for (i = 0; i < 5; i++) {
 
 145                                 if (radeon_check_and_fixup_offset(dev_priv,
 
 149                                             ("Invalid R200 cubic texture offset\n");
 
 150                                         return DRM_ERR(EINVAL);
 
 156         case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
 
 157         case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
 
 158         case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
 
 160                         for (i = 0; i < 5; i++) {
 
 161                                 if (radeon_check_and_fixup_offset(dev_priv,
 
 165                                             ("Invalid R100 cubic texture offset\n");
 
 166                                         return DRM_ERR(EINVAL);
 
 172         case R200_EMIT_VAP_CTL:{
 
 175                         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
 180         case RADEON_EMIT_RB3D_COLORPITCH:
 
 181         case RADEON_EMIT_RE_LINE_PATTERN:
 
 182         case RADEON_EMIT_SE_LINE_WIDTH:
 
 183         case RADEON_EMIT_PP_LUM_MATRIX:
 
 184         case RADEON_EMIT_PP_ROT_MATRIX_0:
 
 185         case RADEON_EMIT_RB3D_STENCILREFMASK:
 
 186         case RADEON_EMIT_SE_VPORT_XSCALE:
 
 187         case RADEON_EMIT_SE_CNTL:
 
 188         case RADEON_EMIT_SE_CNTL_STATUS:
 
 189         case RADEON_EMIT_RE_MISC:
 
 190         case RADEON_EMIT_PP_BORDER_COLOR_0:
 
 191         case RADEON_EMIT_PP_BORDER_COLOR_1:
 
 192         case RADEON_EMIT_PP_BORDER_COLOR_2:
 
 193         case RADEON_EMIT_SE_ZBIAS_FACTOR:
 
 194         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
 
 195         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
 
 196         case R200_EMIT_PP_TXCBLEND_0:
 
 197         case R200_EMIT_PP_TXCBLEND_1:
 
 198         case R200_EMIT_PP_TXCBLEND_2:
 
 199         case R200_EMIT_PP_TXCBLEND_3:
 
 200         case R200_EMIT_PP_TXCBLEND_4:
 
 201         case R200_EMIT_PP_TXCBLEND_5:
 
 202         case R200_EMIT_PP_TXCBLEND_6:
 
 203         case R200_EMIT_PP_TXCBLEND_7:
 
 204         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
 
 205         case R200_EMIT_TFACTOR_0:
 
 206         case R200_EMIT_VTX_FMT_0:
 
 207         case R200_EMIT_MATRIX_SELECT_0:
 
 208         case R200_EMIT_TEX_PROC_CTL_2:
 
 209         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
 
 210         case R200_EMIT_PP_TXFILTER_0:
 
 211         case R200_EMIT_PP_TXFILTER_1:
 
 212         case R200_EMIT_PP_TXFILTER_2:
 
 213         case R200_EMIT_PP_TXFILTER_3:
 
 214         case R200_EMIT_PP_TXFILTER_4:
 
 215         case R200_EMIT_PP_TXFILTER_5:
 
 216         case R200_EMIT_VTE_CNTL:
 
 217         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
 
 218         case R200_EMIT_PP_TAM_DEBUG3:
 
 219         case R200_EMIT_PP_CNTL_X:
 
 220         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
 
 221         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
 
 222         case R200_EMIT_RE_SCISSOR_TL_0:
 
 223         case R200_EMIT_RE_SCISSOR_TL_1:
 
 224         case R200_EMIT_RE_SCISSOR_TL_2:
 
 225         case R200_EMIT_SE_VAP_CNTL_STATUS:
 
 226         case R200_EMIT_SE_VTX_STATE_CNTL:
 
 227         case R200_EMIT_RE_POINTSIZE:
 
 228         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
 
 229         case R200_EMIT_PP_CUBIC_FACES_0:
 
 230         case R200_EMIT_PP_CUBIC_FACES_1:
 
 231         case R200_EMIT_PP_CUBIC_FACES_2:
 
 232         case R200_EMIT_PP_CUBIC_FACES_3:
 
 233         case R200_EMIT_PP_CUBIC_FACES_4:
 
 234         case R200_EMIT_PP_CUBIC_FACES_5:
 
 235         case RADEON_EMIT_PP_TEX_SIZE_0:
 
 236         case RADEON_EMIT_PP_TEX_SIZE_1:
 
 237         case RADEON_EMIT_PP_TEX_SIZE_2:
 
 238         case R200_EMIT_RB3D_BLENDCOLOR:
 
 239         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
 
 240         case RADEON_EMIT_PP_CUBIC_FACES_0:
 
 241         case RADEON_EMIT_PP_CUBIC_FACES_1:
 
 242         case RADEON_EMIT_PP_CUBIC_FACES_2:
 
 243         case R200_EMIT_PP_TRI_PERF_CNTL:
 
 244         case R200_EMIT_PP_AFS_0:
 
 245         case R200_EMIT_PP_AFS_1:
 
 246         case R200_EMIT_ATF_TFACTOR:
 
 247         case R200_EMIT_PP_TXCTLALL_0:
 
 248         case R200_EMIT_PP_TXCTLALL_1:
 
 249         case R200_EMIT_PP_TXCTLALL_2:
 
 250         case R200_EMIT_PP_TXCTLALL_3:
 
 251         case R200_EMIT_PP_TXCTLALL_4:
 
 252         case R200_EMIT_PP_TXCTLALL_5:
 
 253         case R200_EMIT_VAP_PVS_CNTL:
 
 254                 /* These packets don't contain memory offsets */
 
 258                 DRM_ERROR("Unknown state packet ID %d\n", id);
 
 259                 return DRM_ERR(EINVAL);
 
 265 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
 
 267                                                      struct drm_file *filp_priv,
 
 268                                                      drm_radeon_kcmd_buffer_t *
 
 272         u32 *cmd = (u32 *) cmdbuf->buf;
 
 276         *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
 
 278         if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
 
 279                 DRM_ERROR("Not a type 3 packet\n");
 
 280                 return DRM_ERR(EINVAL);
 
 283         if (4 * *cmdsz > cmdbuf->bufsz) {
 
 284                 DRM_ERROR("Packet size larger than size of data provided\n");
 
 285                 return DRM_ERR(EINVAL);
 
 288         switch(cmd[0] & 0xff00) {
 
 289         /* XXX Are there old drivers needing other packets? */
 
 291         case RADEON_3D_DRAW_IMMD:
 
 292         case RADEON_3D_DRAW_VBUF:
 
 293         case RADEON_3D_DRAW_INDX:
 
 294         case RADEON_WAIT_FOR_IDLE:
 
 296         case RADEON_3D_CLEAR_ZMASK:
 
 297 /*      case RADEON_CP_NEXT_CHAR:
 
 298         case RADEON_CP_PLY_NEXTSCAN:
 
 299         case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
 
 300                 /* these packets are safe */
 
 303         case RADEON_CP_3D_DRAW_IMMD_2:
 
 304         case RADEON_CP_3D_DRAW_VBUF_2:
 
 305         case RADEON_CP_3D_DRAW_INDX_2:
 
 306         case RADEON_3D_CLEAR_HIZ:
 
 307                 /* safe but r200 only */
 
 308                 if (dev_priv->microcode_version != UCODE_R200) {
 
 309                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
 
 310                         return DRM_ERR(EINVAL);
 
 314         case RADEON_3D_LOAD_VBPNTR:
 
 315                 count = (cmd[0] >> 16) & 0x3fff;
 
 317                 if (count > 18) { /* 12 arrays max */
 
 318                         DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
 
 320                         return DRM_ERR(EINVAL);
 
 323                 /* carefully check packet contents */
 
 324                 narrays = cmd[1] & ~0xc000;
 
 327                 while ((k < narrays) && (i < (count + 2))) {
 
 328                         i++;            /* skip attribute field */
 
 329                         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[i])) {
 
 331                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
 
 333                                 return DRM_ERR(EINVAL);
 
 339                         /* have one more to process, they come in pairs */
 
 340                         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[i])) {
 
 342                                     ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
 
 344                                 return DRM_ERR(EINVAL);
 
 349                 /* do the counts match what we expect ? */
 
 350                 if ((k != narrays) || (i != (count + 2))) {
 
 352                             ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
 
 353                               k, i, narrays, count + 1);
 
 354                         return DRM_ERR(EINVAL);
 
 358         case RADEON_3D_RNDR_GEN_INDX_PRIM:
 
 359                 if (dev_priv->microcode_version != UCODE_R100) {
 
 360                         DRM_ERROR("Invalid 3d packet for r200-class chip\n");
 
 361                         return DRM_ERR(EINVAL);
 
 363                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[1])) {
 
 364                                 DRM_ERROR("Invalid rndr_gen_indx offset\n");
 
 365                                 return DRM_ERR(EINVAL);
 
 369         case RADEON_CP_INDX_BUFFER:
 
 370                 if (dev_priv->microcode_version != UCODE_R200) {
 
 371                         DRM_ERROR("Invalid 3d packet for r100-class chip\n");
 
 372                         return DRM_ERR(EINVAL);
 
 374                 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
 
 375                         DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
 
 376                         return DRM_ERR(EINVAL);
 
 378                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &cmd[2])) {
 
 379                         DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
 
 380                         return DRM_ERR(EINVAL);
 
 384         case RADEON_CNTL_HOSTDATA_BLT:
 
 385         case RADEON_CNTL_PAINT_MULTI:
 
 386         case RADEON_CNTL_BITBLT_MULTI:
 
 387                 /* MSB of opcode: next DWORD GUI_CNTL */
 
 388                 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 
 389                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 
 390                         offset = cmd[2] << 10;
 
 391                         if (radeon_check_and_fixup_offset
 
 392                             (dev_priv, filp_priv, &offset)) {
 
 393                                 DRM_ERROR("Invalid first packet offset\n");
 
 394                                 return DRM_ERR(EINVAL);
 
 396                         cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
 
 399                 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
 
 400                     (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
 
 401                         offset = cmd[3] << 10;
 
 402                         if (radeon_check_and_fixup_offset
 
 403                             (dev_priv, filp_priv, &offset)) {
 
 404                                 DRM_ERROR("Invalid second packet offset\n");
 
 405                                 return DRM_ERR(EINVAL);
 
 407                         cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
 
 412                 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
 
 413                 return DRM_ERR(EINVAL);
 
 419 /* ================================================================
 
 420  * CP hardware state programming functions
 
 423 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
 
 424                                              struct drm_clip_rect * box)
 
 428         DRM_DEBUG("   box:  x1=%d y1=%d  x2=%d y2=%d\n",
 
 429                   box->x1, box->y1, box->x2, box->y2);
 
 432         OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
 
 433         OUT_RING((box->y1 << 16) | box->x1);
 
 434         OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
 
 435         OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
 
 441 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
 
 442                              struct drm_file * filp_priv,
 
 443                              drm_radeon_context_regs_t * ctx,
 
 444                              drm_radeon_texture_regs_t * tex,
 
 448         DRM_DEBUG("dirty=0x%08x\n", dirty);
 
 450         if (dirty & RADEON_UPLOAD_CONTEXT) {
 
 451                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 452                                                   &ctx->rb3d_depthoffset)) {
 
 453                         DRM_ERROR("Invalid depth buffer offset\n");
 
 454                         return DRM_ERR(EINVAL);
 
 457                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 458                                                   &ctx->rb3d_coloroffset)) {
 
 459                         DRM_ERROR("Invalid depth buffer offset\n");
 
 460                         return DRM_ERR(EINVAL);
 
 464                 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
 
 465                 OUT_RING(ctx->pp_misc);
 
 466                 OUT_RING(ctx->pp_fog_color);
 
 467                 OUT_RING(ctx->re_solid_color);
 
 468                 OUT_RING(ctx->rb3d_blendcntl);
 
 469                 OUT_RING(ctx->rb3d_depthoffset);
 
 470                 OUT_RING(ctx->rb3d_depthpitch);
 
 471                 OUT_RING(ctx->rb3d_zstencilcntl);
 
 472                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
 
 473                 OUT_RING(ctx->pp_cntl);
 
 474                 OUT_RING(ctx->rb3d_cntl);
 
 475                 OUT_RING(ctx->rb3d_coloroffset);
 
 476                 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
 
 477                 OUT_RING(ctx->rb3d_colorpitch);
 
 481         if (dirty & RADEON_UPLOAD_VERTFMT) {
 
 483                 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
 
 484                 OUT_RING(ctx->se_coord_fmt);
 
 488         if (dirty & RADEON_UPLOAD_LINE) {
 
 490                 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
 
 491                 OUT_RING(ctx->re_line_pattern);
 
 492                 OUT_RING(ctx->re_line_state);
 
 493                 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
 
 494                 OUT_RING(ctx->se_line_width);
 
 498         if (dirty & RADEON_UPLOAD_BUMPMAP) {
 
 500                 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
 
 501                 OUT_RING(ctx->pp_lum_matrix);
 
 502                 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
 
 503                 OUT_RING(ctx->pp_rot_matrix_0);
 
 504                 OUT_RING(ctx->pp_rot_matrix_1);
 
 508         if (dirty & RADEON_UPLOAD_MASKS) {
 
 510                 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
 
 511                 OUT_RING(ctx->rb3d_stencilrefmask);
 
 512                 OUT_RING(ctx->rb3d_ropcntl);
 
 513                 OUT_RING(ctx->rb3d_planemask);
 
 517         if (dirty & RADEON_UPLOAD_VIEWPORT) {
 
 519                 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
 
 520                 OUT_RING(ctx->se_vport_xscale);
 
 521                 OUT_RING(ctx->se_vport_xoffset);
 
 522                 OUT_RING(ctx->se_vport_yscale);
 
 523                 OUT_RING(ctx->se_vport_yoffset);
 
 524                 OUT_RING(ctx->se_vport_zscale);
 
 525                 OUT_RING(ctx->se_vport_zoffset);
 
 529         if (dirty & RADEON_UPLOAD_SETUP) {
 
 531                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
 
 532                 OUT_RING(ctx->se_cntl);
 
 533                 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
 
 534                 OUT_RING(ctx->se_cntl_status);
 
 538         if (dirty & RADEON_UPLOAD_MISC) {
 
 540                 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
 
 541                 OUT_RING(ctx->re_misc);
 
 545         if (dirty & RADEON_UPLOAD_TEX0) {
 
 546                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 547                                                   &tex[0].pp_txoffset)) {
 
 548                         DRM_ERROR("Invalid texture offset for unit 0\n");
 
 549                         return DRM_ERR(EINVAL);
 
 553                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
 
 554                 OUT_RING(tex[0].pp_txfilter);
 
 555                 OUT_RING(tex[0].pp_txformat);
 
 556                 OUT_RING(tex[0].pp_txoffset);
 
 557                 OUT_RING(tex[0].pp_txcblend);
 
 558                 OUT_RING(tex[0].pp_txablend);
 
 559                 OUT_RING(tex[0].pp_tfactor);
 
 560                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
 
 561                 OUT_RING(tex[0].pp_border_color);
 
 565         if (dirty & RADEON_UPLOAD_TEX1) {
 
 566                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 567                                                   &tex[1].pp_txoffset)) {
 
 568                         DRM_ERROR("Invalid texture offset for unit 1\n");
 
 569                         return DRM_ERR(EINVAL);
 
 573                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
 
 574                 OUT_RING(tex[1].pp_txfilter);
 
 575                 OUT_RING(tex[1].pp_txformat);
 
 576                 OUT_RING(tex[1].pp_txoffset);
 
 577                 OUT_RING(tex[1].pp_txcblend);
 
 578                 OUT_RING(tex[1].pp_txablend);
 
 579                 OUT_RING(tex[1].pp_tfactor);
 
 580                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
 
 581                 OUT_RING(tex[1].pp_border_color);
 
 585         if (dirty & RADEON_UPLOAD_TEX2) {
 
 586                 if (radeon_check_and_fixup_offset(dev_priv, filp_priv,
 
 587                                                   &tex[2].pp_txoffset)) {
 
 588                         DRM_ERROR("Invalid texture offset for unit 2\n");
 
 589                         return DRM_ERR(EINVAL);
 
 593                 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
 
 594                 OUT_RING(tex[2].pp_txfilter);
 
 595                 OUT_RING(tex[2].pp_txformat);
 
 596                 OUT_RING(tex[2].pp_txoffset);
 
 597                 OUT_RING(tex[2].pp_txcblend);
 
 598                 OUT_RING(tex[2].pp_txablend);
 
 599                 OUT_RING(tex[2].pp_tfactor);
 
 600                 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
 
 601                 OUT_RING(tex[2].pp_border_color);
 
 610 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
 
 611                               struct drm_file * filp_priv,
 
 612                               drm_radeon_state_t * state)
 
 616         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
 
 618                 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
 
 619                 OUT_RING(state->context2.se_zbias_factor);
 
 620                 OUT_RING(state->context2.se_zbias_constant);
 
 624         return radeon_emit_state(dev_priv, filp_priv, &state->context,
 
 625                                  state->tex, state->dirty);
 
 628 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
 
 629  * 1.3 cmdbuffers allow all previous state to be updated as well as
 
 630  * the tcl scalar and vector areas.
 
 636 } packet[RADEON_MAX_STATE_PACKETS] = {
 
 637         {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
 
 638         {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
 
 639         {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
 
 640         {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
 
 641         {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
 
 642         {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
 
 643         {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
 
 644         {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
 
 645         {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
 
 646         {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
 
 647         {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
 
 648         {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
 
 649         {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
 
 650         {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
 
 651         {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
 
 652         {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
 
 653         {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
 
 654         {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
 
 655         {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
 
 656         {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
 
 657         {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
 
 658                     "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
 
 659         {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
 
 660         {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
 
 661         {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
 
 662         {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
 
 663         {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
 
 664         {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
 
 665         {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
 
 666         {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
 
 667         {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
 
 668         {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
 
 669         {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
 
 670         {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
 
 671         {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
 
 672         {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
 
 673         {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
 
 674         {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
 
 675         {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
 
 676         {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
 
 677         {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
 
 678         {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
 
 679         {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
 
 680         {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
 
 681         {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
 
 682         {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
 
 683         {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
 
 684         {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
 
 685         {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
 
 686         {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
 
 687         {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
 
 688          "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
 
 689         {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
 
 690         {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
 
 691         {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
 
 692         {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
 
 693         {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
 
 694         {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
 
 695         {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
 
 696         {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
 
 697         {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
 
 698         {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
 
 699         {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
 
 700                     "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
 
 701         {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"},    /* 61 */
 
 702         {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
 
 703         {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
 
 704         {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
 
 705         {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
 
 706         {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
 
 707         {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
 
 708         {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
 
 709         {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
 
 710         {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
 
 711         {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
 
 712         {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
 
 713         {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
 
 714         {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
 
 715         {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
 
 716         {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
 
 717         {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
 
 718         {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
 
 719         {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
 
 720         {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
 
 721         {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
 
 722         {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
 
 723         {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
 
 724         {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
 
 725         {R200_PP_AFS_0, 32, "R200_PP_AFS_0"},     /* 85 */
 
 726         {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
 
 727         {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
 
 728         {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
 
 729         {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
 
 730         {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
 
 731         {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
 
 732         {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
 
 733         {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
 
 734         {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
 
 737 /* ================================================================
 
 738  * Performance monitoring functions
 
 741 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
 
 742                              int x, int y, int w, int h, int r, int g, int b)
 
 747         x += dev_priv->sarea_priv->boxes[0].x1;
 
 748         y += dev_priv->sarea_priv->boxes[0].y1;
 
 750         switch (dev_priv->color_fmt) {
 
 751         case RADEON_COLOR_FORMAT_RGB565:
 
 752                 color = (((r & 0xf8) << 8) |
 
 753                          ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
 
 755         case RADEON_COLOR_FORMAT_ARGB8888:
 
 757                 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
 
 762         RADEON_WAIT_UNTIL_3D_IDLE();
 
 763         OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
 
 764         OUT_RING(0xffffffff);
 
 769         OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
 
 770         OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 771                  RADEON_GMC_BRUSH_SOLID_COLOR |
 
 772                  (dev_priv->color_fmt << 8) |
 
 773                  RADEON_GMC_SRC_DATATYPE_COLOR |
 
 774                  RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 776         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
 
 777                 OUT_RING(dev_priv->front_pitch_offset);
 
 779                 OUT_RING(dev_priv->back_pitch_offset);
 
 784         OUT_RING((x << 16) | y);
 
 785         OUT_RING((w << 16) | h);
 
 790 static void radeon_cp_performance_boxes(drm_radeon_private_t * dev_priv)
 
 792         /* Collapse various things into a wait flag -- trying to
 
 793          * guess if userspase slept -- better just to have them tell us.
 
 795         if (dev_priv->stats.last_frame_reads > 1 ||
 
 796             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
 
 797                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 
 800         if (dev_priv->stats.freelist_loops) {
 
 801                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 
 804         /* Purple box for page flipping
 
 806         if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
 
 807                 radeon_clear_box(dev_priv, 4, 4, 8, 8, 255, 0, 255);
 
 809         /* Red box if we have to wait for idle at any point
 
 811         if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
 
 812                 radeon_clear_box(dev_priv, 16, 4, 8, 8, 255, 0, 0);
 
 814         /* Blue box: lost context?
 
 817         /* Yellow box for texture swaps
 
 819         if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
 
 820                 radeon_clear_box(dev_priv, 40, 4, 8, 8, 255, 255, 0);
 
 822         /* Green box if hardware never idles (as far as we can tell)
 
 824         if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
 
 825                 radeon_clear_box(dev_priv, 64, 4, 8, 8, 0, 255, 0);
 
 827         /* Draw bars indicating number of buffers allocated
 
 828          * (not a great measure, easily confused)
 
 830         if (dev_priv->stats.requested_bufs) {
 
 831                 if (dev_priv->stats.requested_bufs > 100)
 
 832                         dev_priv->stats.requested_bufs = 100;
 
 834                 radeon_clear_box(dev_priv, 4, 16,
 
 835                                  dev_priv->stats.requested_bufs, 4,
 
 839         memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
 
 843 /* ================================================================
 
 844  * CP command dispatch functions
 
 847 static void radeon_cp_dispatch_clear(struct drm_device * dev,
 
 848                                      drm_radeon_clear_t * clear,
 
 849                                      drm_radeon_clear_rect_t * depth_boxes)
 
 851         drm_radeon_private_t *dev_priv = dev->dev_private;
 
 852         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
 853         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
 
 854         int nbox = sarea_priv->nbox;
 
 855         struct drm_clip_rect *pbox = sarea_priv->boxes;
 
 856         unsigned int flags = clear->flags;
 
 857         u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
 
 860         DRM_DEBUG("flags = 0x%x\n", flags);
 
 862         dev_priv->stats.clears++;
 
 864         if (dev_priv->sarea_priv->pfCurrentPage == 1) {
 
 865                 unsigned int tmp = flags;
 
 867                 flags &= ~(RADEON_FRONT | RADEON_BACK);
 
 868                 if (tmp & RADEON_FRONT)
 
 869                         flags |= RADEON_BACK;
 
 870                 if (tmp & RADEON_BACK)
 
 871                         flags |= RADEON_FRONT;
 
 874         if (flags & (RADEON_FRONT | RADEON_BACK)) {
 
 878                 /* Ensure the 3D stream is idle before doing a
 
 879                  * 2D fill to clear the front or back buffer.
 
 881                 RADEON_WAIT_UNTIL_3D_IDLE();
 
 883                 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
 
 884                 OUT_RING(clear->color_mask);
 
 888                 /* Make sure we restore the 3D state next time.
 
 890                 dev_priv->sarea_priv->ctx_owner = 0;
 
 892                 for (i = 0; i < nbox; i++) {
 
 895                         int w = pbox[i].x2 - x;
 
 896                         int h = pbox[i].y2 - y;
 
 898                         DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
 
 901                         if (flags & RADEON_FRONT) {
 
 905                                          (RADEON_CNTL_PAINT_MULTI, 4));
 
 906                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 907                                          RADEON_GMC_BRUSH_SOLID_COLOR |
 
 910                                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
 912                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 914                                 OUT_RING(dev_priv->front_pitch_offset);
 
 915                                 OUT_RING(clear->clear_color);
 
 917                                 OUT_RING((x << 16) | y);
 
 918                                 OUT_RING((w << 16) | h);
 
 923                         if (flags & RADEON_BACK) {
 
 927                                          (RADEON_CNTL_PAINT_MULTI, 4));
 
 928                                 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
 929                                          RADEON_GMC_BRUSH_SOLID_COLOR |
 
 932                                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
 934                                          RADEON_GMC_CLR_CMP_CNTL_DIS);
 
 936                                 OUT_RING(dev_priv->back_pitch_offset);
 
 937                                 OUT_RING(clear->clear_color);
 
 939                                 OUT_RING((x << 16) | y);
 
 940                                 OUT_RING((w << 16) | h);
 
 948         /* no docs available, based on reverse engeneering by Stephane Marchesin */
 
 949         if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
 
 950             && (flags & RADEON_CLEAR_FASTZ)) {
 
 953                 int depthpixperline =
 
 954                     dev_priv->depth_fmt ==
 
 955                     RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
 
 961                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
 
 962                     ((clear->depth_mask & 0xff) << 24);
 
 964                 /* Make sure we restore the 3D state next time.
 
 965                  * we haven't touched any "normal" state - still need this?
 
 967                 dev_priv->sarea_priv->ctx_owner = 0;
 
 969                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
 970                     && (flags & RADEON_USE_HIERZ)) {
 
 971                         /* FIXME : reverse engineer that for Rx00 cards */
 
 972                         /* FIXME : the mask supposedly contains low-res z values. So can't set
 
 973                            just to the max (0xff? or actually 0x3fff?), need to take z clear
 
 974                            value into account? */
 
 975                         /* pattern seems to work for r100, though get slight
 
 976                            rendering errors with glxgears. If hierz is not enabled for r100,
 
 977                            only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
 
 978                            other ones are ignored, and the same clear mask can be used. That's
 
 979                            very different behaviour than R200 which needs different clear mask
 
 980                            and different number of tiles to clear if hierz is enabled or not !?!
 
 982                         clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
 
 984                         /* clear mask : chooses the clearing pattern.
 
 985                            rv250: could be used to clear only parts of macrotiles
 
 986                            (but that would get really complicated...)?
 
 987                            bit 0 and 1 (either or both of them ?!?!) are used to
 
 988                            not clear tile (or maybe one of the bits indicates if the tile is
 
 989                            compressed or not), bit 2 and 3 to not clear tile 1,...,.
 
 990                            Pattern is as follows:
 
 991                            | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
 
 992                            bits -------------------------------------------------
 
 993                            | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
 
 994                            rv100: clearmask covers 2x8 4x1 tiles, but one clear still
 
 995                            covers 256 pixels ?!?
 
1001                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1002                 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
 
1003                              tempRB3D_DEPTHCLEARVALUE);
 
1004                 /* what offset is this exactly ? */
 
1005                 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
 
1006                 /* need ctlstat, otherwise get some strange black flickering */
 
1007                 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
 
1008                              RADEON_RB3D_ZC_FLUSH_ALL);
 
1011                 for (i = 0; i < nbox; i++) {
 
1012                         int tileoffset, nrtilesx, nrtilesy, j;
 
1013                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
 
1014                         if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
1015                             && !(dev_priv->microcode_version == UCODE_R200)) {
 
1016                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
 
1017                                    maybe r200 actually doesn't need to put the low-res z value into
 
1018                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
 
1019                                    Works for R100, both with hierz and without.
 
1020                                    R100 seems to operate on 2x1 8x8 tiles, but...
 
1021                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
 
1022                                    problematic with resolutions which are not 64 pix aligned? */
 
1024                                     ((pbox[i].y1 >> 3) * depthpixperline +
 
1027                                     ((pbox[i].x2 & ~63) -
 
1028                                      (pbox[i].x1 & ~63)) >> 4;
 
1030                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 
1031                                 for (j = 0; j <= nrtilesy; j++) {
 
1034                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1036                                         OUT_RING(tileoffset * 8);
 
1037                                         /* the number of tiles to clear */
 
1038                                         OUT_RING(nrtilesx + 4);
 
1039                                         /* clear mask : chooses the clearing pattern. */
 
1040                                         OUT_RING(clearmask);
 
1042                                         tileoffset += depthpixperline >> 6;
 
1044                         } else if (dev_priv->microcode_version == UCODE_R200) {
 
1045                                 /* works for rv250. */
 
1046                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
 
1048                                     ((pbox[i].y1 >> 3) * depthpixperline +
 
1051                                     (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
 
1053                                     (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 
1054                                 for (j = 0; j <= nrtilesy; j++) {
 
1057                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1059                                         /* judging by the first tile offset needed, could possibly
 
1060                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
 
1061                                            macro tiles, though would still need clear mask for
 
1062                                            right/bottom if truely 4x4 granularity is desired ? */
 
1063                                         OUT_RING(tileoffset * 16);
 
1064                                         /* the number of tiles to clear */
 
1065                                         OUT_RING(nrtilesx + 1);
 
1066                                         /* clear mask : chooses the clearing pattern. */
 
1067                                         OUT_RING(clearmask);
 
1069                                         tileoffset += depthpixperline >> 5;
 
1071                         } else {        /* rv 100 */
 
1072                                 /* rv100 might not need 64 pix alignment, who knows */
 
1073                                 /* offsets are, hmm, weird */
 
1075                                     ((pbox[i].y1 >> 4) * depthpixperline +
 
1078                                     ((pbox[i].x2 & ~63) -
 
1079                                      (pbox[i].x1 & ~63)) >> 4;
 
1081                                     (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
 
1082                                 for (j = 0; j <= nrtilesy; j++) {
 
1085                                                  (RADEON_3D_CLEAR_ZMASK, 2));
 
1086                                         OUT_RING(tileoffset * 128);
 
1087                                         /* the number of tiles to clear */
 
1088                                         OUT_RING(nrtilesx + 4);
 
1089                                         /* clear mask : chooses the clearing pattern. */
 
1090                                         OUT_RING(clearmask);
 
1092                                         tileoffset += depthpixperline >> 6;
 
1097                 /* TODO don't always clear all hi-level z tiles */
 
1098                 if ((dev_priv->flags & RADEON_HAS_HIERZ)
 
1099                     && (dev_priv->microcode_version == UCODE_R200)
 
1100                     && (flags & RADEON_USE_HIERZ))
 
1101                         /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
 
1102                         /* FIXME : the mask supposedly contains low-res z values. So can't set
 
1103                            just to the max (0xff? or actually 0x3fff?), need to take z clear
 
1104                            value into account? */
 
1107                         OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
 
1108                         OUT_RING(0x0);  /* First tile */
 
1110                         OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
 
1115         /* We have to clear the depth and/or stencil buffers by
 
1116          * rendering a quad into just those buffers.  Thus, we have to
 
1117          * make sure the 3D engine is configured correctly.
 
1119         else if ((dev_priv->microcode_version == UCODE_R200) &&
 
1120                 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 
1125                 int tempRB3D_ZSTENCILCNTL;
 
1126                 int tempRB3D_STENCILREFMASK;
 
1127                 int tempRB3D_PLANEMASK;
 
1129                 int tempSE_VTE_CNTL;
 
1130                 int tempSE_VTX_FMT_0;
 
1131                 int tempSE_VTX_FMT_1;
 
1132                 int tempSE_VAP_CNTL;
 
1133                 int tempRE_AUX_SCISSOR_CNTL;
 
1138                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
 
1140                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
 
1141                 tempRB3D_STENCILREFMASK = 0x0;
 
1143                 tempSE_CNTL = depth_clear->se_cntl;
 
1147                 tempSE_VAP_CNTL = (     /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
 
1149                                            SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
 
1151                 tempRB3D_PLANEMASK = 0x0;
 
1153                 tempRE_AUX_SCISSOR_CNTL = 0x0;
 
1156                     SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
 
1158                 /* Vertex format (X, Y, Z, W) */
 
1160                     SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
 
1161                     SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
 
1162                 tempSE_VTX_FMT_1 = 0x0;
 
1165                  * Depth buffer specific enables
 
1167                 if (flags & RADEON_DEPTH) {
 
1168                         /* Enable depth buffer */
 
1169                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
 
1171                         /* Disable depth buffer */
 
1172                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
 
1176                  * Stencil buffer specific enables
 
1178                 if (flags & RADEON_STENCIL) {
 
1179                         tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
 
1180                         tempRB3D_STENCILREFMASK = clear->depth_mask;
 
1182                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
 
1183                         tempRB3D_STENCILREFMASK = 0x00000000;
 
1186                 if (flags & RADEON_USE_COMP_ZBUF) {
 
1187                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
 
1188                             RADEON_Z_DECOMPRESSION_ENABLE;
 
1190                 if (flags & RADEON_USE_HIERZ) {
 
1191                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
 
1195                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1197                 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
 
1198                 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
 
1199                 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
 
1200                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
 
1201                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
 
1202                              tempRB3D_STENCILREFMASK);
 
1203                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
 
1204                 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
 
1205                 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
 
1206                 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
 
1207                 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
 
1208                 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
 
1209                 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
 
1212                 /* Make sure we restore the 3D state next time.
 
1214                 dev_priv->sarea_priv->ctx_owner = 0;
 
1216                 for (i = 0; i < nbox; i++) {
 
1218                         /* Funny that this should be required --
 
1221                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1224                         OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
 
1225                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
 
1226                                   RADEON_PRIM_WALK_RING |
 
1227                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
 
1228                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1229                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
 
1230                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1231                         OUT_RING(0x3f800000);
 
1232                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1233                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1234                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1235                         OUT_RING(0x3f800000);
 
1236                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
 
1237                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1238                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1239                         OUT_RING(0x3f800000);
 
1242         } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 
1244                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
 
1246                 rb3d_cntl = depth_clear->rb3d_cntl;
 
1248                 if (flags & RADEON_DEPTH) {
 
1249                         rb3d_cntl |= RADEON_Z_ENABLE;
 
1251                         rb3d_cntl &= ~RADEON_Z_ENABLE;
 
1254                 if (flags & RADEON_STENCIL) {
 
1255                         rb3d_cntl |= RADEON_STENCIL_ENABLE;
 
1256                         rb3d_stencilrefmask = clear->depth_mask;        /* misnamed field */
 
1258                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
 
1259                         rb3d_stencilrefmask = 0x00000000;
 
1262                 if (flags & RADEON_USE_COMP_ZBUF) {
 
1263                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
 
1264                             RADEON_Z_DECOMPRESSION_ENABLE;
 
1266                 if (flags & RADEON_USE_HIERZ) {
 
1267                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
 
1271                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1273                 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
 
1274                 OUT_RING(0x00000000);
 
1275                 OUT_RING(rb3d_cntl);
 
1277                 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
 
1278                 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
 
1279                 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
 
1280                 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
 
1283                 /* Make sure we restore the 3D state next time.
 
1285                 dev_priv->sarea_priv->ctx_owner = 0;
 
1287                 for (i = 0; i < nbox; i++) {
 
1289                         /* Funny that this should be required --
 
1292                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1296                         OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
 
1297                         OUT_RING(RADEON_VTX_Z_PRESENT |
 
1298                                  RADEON_VTX_PKCOLOR_PRESENT);
 
1299                         OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
 
1300                                   RADEON_PRIM_WALK_RING |
 
1301                                   RADEON_MAOS_ENABLE |
 
1302                                   RADEON_VTX_FMT_RADEON_MODE |
 
1303                                   (3 << RADEON_NUM_VERTICES_SHIFT)));
 
1305                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1306                         OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
 
1307                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1310                         OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
 
1311                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1312                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1315                         OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
 
1316                         OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
 
1317                         OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
 
1324         /* Increment the clear counter.  The client-side 3D driver must
 
1325          * wait on this value before performing the clear ioctl.  We
 
1326          * need this because the card's so damned fast...
 
1328         dev_priv->sarea_priv->last_clear++;
 
1332         RADEON_CLEAR_AGE(dev_priv->sarea_priv->last_clear);
 
1333         RADEON_WAIT_UNTIL_IDLE();
 
1338 static void radeon_cp_dispatch_swap(struct drm_device * dev)
 
1340         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1341         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
1342         int nbox = sarea_priv->nbox;
 
1343         struct drm_clip_rect *pbox = sarea_priv->boxes;
 
1348         /* Do some trivial performance monitoring...
 
1350         if (dev_priv->do_boxes)
 
1351                 radeon_cp_performance_boxes(dev_priv);
 
1353         /* Wait for the 3D stream to idle before dispatching the bitblt.
 
1354          * This will prevent data corruption between the two streams.
 
1358         RADEON_WAIT_UNTIL_3D_IDLE();
 
1362         for (i = 0; i < nbox; i++) {
 
1365                 int w = pbox[i].x2 - x;
 
1366                 int h = pbox[i].y2 - y;
 
1368                 DRM_DEBUG("dispatch swap %d,%d-%d,%d\n", x, y, w, h);
 
1372                 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
 
1373                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 
1374                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
1375                          RADEON_GMC_BRUSH_NONE |
 
1376                          (dev_priv->color_fmt << 8) |
 
1377                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
1379                          RADEON_DP_SRC_SOURCE_MEMORY |
 
1380                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
 
1382                 /* Make this work even if front & back are flipped:
 
1384                 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
 
1385                 if (dev_priv->sarea_priv->pfCurrentPage == 0) {
 
1386                         OUT_RING(dev_priv->back_pitch_offset);
 
1387                         OUT_RING(dev_priv->front_pitch_offset);
 
1389                         OUT_RING(dev_priv->front_pitch_offset);
 
1390                         OUT_RING(dev_priv->back_pitch_offset);
 
1393                 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
 
1394                 OUT_RING((x << 16) | y);
 
1395                 OUT_RING((x << 16) | y);
 
1396                 OUT_RING((w << 16) | h);
 
1401         /* Increment the frame counter.  The client-side 3D driver must
 
1402          * throttle the framerate by waiting for this value before
 
1403          * performing the swapbuffer ioctl.
 
1405         dev_priv->sarea_priv->last_frame++;
 
1409         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
 
1410         RADEON_WAIT_UNTIL_2D_IDLE();
 
1415 static void radeon_cp_dispatch_flip(struct drm_device * dev)
 
1417         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1418         struct drm_sarea *sarea = (struct drm_sarea *) dev_priv->sarea->handle;
 
1419         int offset = (dev_priv->sarea_priv->pfCurrentPage == 1)
 
1420             ? dev_priv->front_offset : dev_priv->back_offset;
 
1422         DRM_DEBUG("%s: pfCurrentPage=%d\n",
 
1424                   dev_priv->sarea_priv->pfCurrentPage);
 
1426         /* Do some trivial performance monitoring...
 
1428         if (dev_priv->do_boxes) {
 
1429                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
 
1430                 radeon_cp_performance_boxes(dev_priv);
 
1433         /* Update the frame offsets for both CRTCs
 
1437         RADEON_WAIT_UNTIL_3D_IDLE();
 
1438         OUT_RING_REG(RADEON_CRTC_OFFSET,
 
1439                      ((sarea->frame.y * dev_priv->front_pitch +
 
1440                        sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
 
1442         OUT_RING_REG(RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
 
1447         /* Increment the frame counter.  The client-side 3D driver must
 
1448          * throttle the framerate by waiting for this value before
 
1449          * performing the swapbuffer ioctl.
 
1451         dev_priv->sarea_priv->last_frame++;
 
1452         dev_priv->sarea_priv->pfCurrentPage =
 
1453                 1 - dev_priv->sarea_priv->pfCurrentPage;
 
1457         RADEON_FRAME_AGE(dev_priv->sarea_priv->last_frame);
 
1462 static int bad_prim_vertex_nr(int primitive, int nr)
 
1464         switch (primitive & RADEON_PRIM_TYPE_MASK) {
 
1465         case RADEON_PRIM_TYPE_NONE:
 
1466         case RADEON_PRIM_TYPE_POINT:
 
1468         case RADEON_PRIM_TYPE_LINE:
 
1469                 return (nr & 1) || nr == 0;
 
1470         case RADEON_PRIM_TYPE_LINE_STRIP:
 
1472         case RADEON_PRIM_TYPE_TRI_LIST:
 
1473         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
 
1474         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
 
1475         case RADEON_PRIM_TYPE_RECT_LIST:
 
1476                 return nr % 3 || nr == 0;
 
1477         case RADEON_PRIM_TYPE_TRI_FAN:
 
1478         case RADEON_PRIM_TYPE_TRI_STRIP:
 
1487         unsigned int finish;
 
1489         unsigned int numverts;
 
1490         unsigned int offset;
 
1491         unsigned int vc_format;
 
1492 } drm_radeon_tcl_prim_t;
 
1494 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
 
1495                                       struct drm_buf * buf,
 
1496                                       drm_radeon_tcl_prim_t * prim)
 
1498         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1499         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
1500         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
 
1501         int numverts = (int)prim->numverts;
 
1502         int nbox = sarea_priv->nbox;
 
1506         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
 
1508                   prim->vc_format, prim->start, prim->finish, prim->numverts);
 
1510         if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
 
1511                 DRM_ERROR("bad prim %x numverts %d\n",
 
1512                           prim->prim, prim->numverts);
 
1517                 /* Emit the next cliprect */
 
1519                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1522                 /* Emit the vertex buffer rendering commands */
 
1525                 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
 
1528                 OUT_RING(prim->vc_format);
 
1529                 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
 
1530                          RADEON_COLOR_ORDER_RGBA |
 
1531                          RADEON_VTX_FMT_RADEON_MODE |
 
1532                          (numverts << RADEON_NUM_VERTICES_SHIFT));
 
1540 static void radeon_cp_discard_buffer(struct drm_device * dev, struct drm_buf * buf)
 
1542         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1543         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
 
1546         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
 
1548         /* Emit the vertex buffer age */
 
1550         RADEON_DISPATCH_AGE(buf_priv->age);
 
1557 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
 
1558                                         struct drm_buf * buf, int start, int end)
 
1560         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1562         DRM_DEBUG("indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
 
1565                 int offset = (dev_priv->gart_buffers_offset
 
1566                               + buf->offset + start);
 
1567                 int dwords = (end - start + 3) / sizeof(u32);
 
1569                 /* Indirect buffer data must be an even number of
 
1570                  * dwords, so if we've been given an odd number we must
 
1571                  * pad the data with a Type-2 CP packet.
 
1575                             ((char *)dev->agp_buffer_map->handle
 
1576                              + buf->offset + start);
 
1577                         data[dwords++] = RADEON_CP_PACKET2;
 
1580                 /* Fire off the indirect buffer */
 
1583                 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
 
1591 static void radeon_cp_dispatch_indices(struct drm_device * dev,
 
1592                                        struct drm_buf * elt_buf,
 
1593                                        drm_radeon_tcl_prim_t * prim)
 
1595         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1596         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
1597         int offset = dev_priv->gart_buffers_offset + prim->offset;
 
1601         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
 
1602         int count = (prim->finish - start) / sizeof(u16);
 
1603         int nbox = sarea_priv->nbox;
 
1605         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
 
1608                   prim->start, prim->finish, prim->offset, prim->numverts);
 
1610         if (bad_prim_vertex_nr(prim->prim, count)) {
 
1611                 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
 
1615         if (start >= prim->finish || (prim->start & 0x7)) {
 
1616                 DRM_ERROR("buffer prim %d\n", prim->prim);
 
1620         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
 
1622         data = (u32 *) ((char *)dev->agp_buffer_map->handle +
 
1623                         elt_buf->offset + prim->start);
 
1625         data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
 
1627         data[2] = prim->numverts;
 
1628         data[3] = prim->vc_format;
 
1629         data[4] = (prim->prim |
 
1630                    RADEON_PRIM_WALK_IND |
 
1631                    RADEON_COLOR_ORDER_RGBA |
 
1632                    RADEON_VTX_FMT_RADEON_MODE |
 
1633                    (count << RADEON_NUM_VERTICES_SHIFT));
 
1637                         radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
 
1639                 radeon_cp_dispatch_indirect(dev, elt_buf,
 
1640                                             prim->start, prim->finish);
 
1647 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
 
1649 static int radeon_cp_dispatch_texture(DRMFILE filp,
 
1650                                       struct drm_device * dev,
 
1651                                       drm_radeon_texture_t * tex,
 
1652                                       drm_radeon_tex_image_t * image)
 
1654         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1655         struct drm_file *filp_priv;
 
1656         struct drm_buf *buf;
 
1659         const u8 __user *data;
 
1660         int size, dwords, tex_width, blit_width, spitch;
 
1663         u32 texpitch, microtile;
 
1667         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
1669         if (radeon_check_and_fixup_offset(dev_priv, filp_priv, &tex->offset)) {
 
1670                 DRM_ERROR("Invalid destination offset\n");
 
1671                 return DRM_ERR(EINVAL);
 
1674         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
 
1676         /* Flush the pixel cache.  This ensures no pixel data gets mixed
 
1677          * up with the texture data from the host data blit, otherwise
 
1678          * part of the texture image may be corrupted.
 
1681         RADEON_FLUSH_CACHE();
 
1682         RADEON_WAIT_UNTIL_IDLE();
 
1685         /* The compiler won't optimize away a division by a variable,
 
1686          * even if the only legal values are powers of two.  Thus, we'll
 
1687          * use a shift instead.
 
1689         switch (tex->format) {
 
1690         case RADEON_TXFORMAT_ARGB8888:
 
1691         case RADEON_TXFORMAT_RGBA8888:
 
1692                 format = RADEON_COLOR_FORMAT_ARGB8888;
 
1693                 tex_width = tex->width * 4;
 
1694                 blit_width = image->width * 4;
 
1696         case RADEON_TXFORMAT_AI88:
 
1697         case RADEON_TXFORMAT_ARGB1555:
 
1698         case RADEON_TXFORMAT_RGB565:
 
1699         case RADEON_TXFORMAT_ARGB4444:
 
1700         case RADEON_TXFORMAT_VYUY422:
 
1701         case RADEON_TXFORMAT_YVYU422:
 
1702                 format = RADEON_COLOR_FORMAT_RGB565;
 
1703                 tex_width = tex->width * 2;
 
1704                 blit_width = image->width * 2;
 
1706         case RADEON_TXFORMAT_I8:
 
1707         case RADEON_TXFORMAT_RGB332:
 
1708                 format = RADEON_COLOR_FORMAT_CI8;
 
1709                 tex_width = tex->width * 1;
 
1710                 blit_width = image->width * 1;
 
1713                 DRM_ERROR("invalid texture format %d\n", tex->format);
 
1714                 return DRM_ERR(EINVAL);
 
1716         spitch = blit_width >> 6;
 
1717         if (spitch == 0 && image->height > 1)
 
1718                 return DRM_ERR(EINVAL);
 
1720         texpitch = tex->pitch;
 
1721         if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
 
1723                 if (tex_width < 64) {
 
1724                         texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
 
1725                         /* we got tiled coordinates, untile them */
 
1731         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
 
1734                 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
 
1735                           tex->offset >> 10, tex->pitch, tex->format,
 
1736                           image->x, image->y, image->width, image->height);
 
1738                 /* Make a copy of some parameters in case we have to
 
1739                  * update them for a multi-pass texture blit.
 
1741                 height = image->height;
 
1742                 data = (const u8 __user *)image->data;
 
1744                 size = height * blit_width;
 
1746                 if (size > RADEON_MAX_TEXTURE_SIZE) {
 
1747                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
 
1748                         size = height * blit_width;
 
1749                 } else if (size < 4 && size > 0) {
 
1751                 } else if (size == 0) {
 
1755                 buf = radeon_freelist_get(dev);
 
1757                         radeon_do_cp_idle(dev_priv);
 
1758                         buf = radeon_freelist_get(dev);
 
1761                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
 
1762                         if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
 
1763                                 return DRM_ERR(EFAULT);
 
1764                         return DRM_ERR(EAGAIN);
 
1767                 /* Dispatch the indirect buffer.
 
1770                     (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
 
1773 #define RADEON_COPY_MT(_buf, _data, _width) \
 
1775                 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
 
1776                         DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
 
1777                         return DRM_ERR(EFAULT); \
 
1782                         /* texture micro tiling in use, minimum texture width is thus 16 bytes.
 
1783                            however, we cannot use blitter directly for texture width < 64 bytes,
 
1784                            since minimum tex pitch is 64 bytes and we need this to match
 
1785                            the texture width, otherwise the blitter will tile it wrong.
 
1786                            Thus, tiling manually in this case. Additionally, need to special
 
1787                            case tex height = 1, since our actual image will have height 2
 
1788                            and we need to ensure we don't read beyond the texture size
 
1790                         if (tex->height == 1) {
 
1791                                 if (tex_width >= 64 || tex_width <= 16) {
 
1792                                         RADEON_COPY_MT(buffer, data,
 
1793                                                 (int)(tex_width * sizeof(u32)));
 
1794                                 } else if (tex_width == 32) {
 
1795                                         RADEON_COPY_MT(buffer, data, 16);
 
1796                                         RADEON_COPY_MT(buffer + 8,
 
1799                         } else if (tex_width >= 64 || tex_width == 16) {
 
1800                                 RADEON_COPY_MT(buffer, data,
 
1801                                                (int)(dwords * sizeof(u32)));
 
1802                         } else if (tex_width < 16) {
 
1803                                 for (i = 0; i < tex->height; i++) {
 
1804                                         RADEON_COPY_MT(buffer, data, tex_width);
 
1808                         } else if (tex_width == 32) {
 
1809                                 /* TODO: make sure this works when not fitting in one buffer
 
1810                                    (i.e. 32bytes x 2048...) */
 
1811                                 for (i = 0; i < tex->height; i += 2) {
 
1812                                         RADEON_COPY_MT(buffer, data, 16);
 
1814                                         RADEON_COPY_MT(buffer + 8, data, 16);
 
1816                                         RADEON_COPY_MT(buffer + 4, data, 16);
 
1818                                         RADEON_COPY_MT(buffer + 12, data, 16);
 
1824                         if (tex_width >= 32) {
 
1825                                 /* Texture image width is larger than the minimum, so we
 
1826                                  * can upload it directly.
 
1828                                 RADEON_COPY_MT(buffer, data,
 
1829                                                (int)(dwords * sizeof(u32)));
 
1831                                 /* Texture image width is less than the minimum, so we
 
1832                                  * need to pad out each image scanline to the minimum
 
1835                                 for (i = 0; i < tex->height; i++) {
 
1836                                         RADEON_COPY_MT(buffer, data, tex_width);
 
1843 #undef RADEON_COPY_MT
 
1846                 offset = dev_priv->gart_buffers_offset + buf->offset;
 
1848                 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
 
1849                 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 
1850                          RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 
1851                          RADEON_GMC_BRUSH_NONE |
 
1853                          RADEON_GMC_SRC_DATATYPE_COLOR |
 
1855                          RADEON_DP_SRC_SOURCE_MEMORY |
 
1856                          RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
 
1857                 OUT_RING((spitch << 22) | (offset >> 10));
 
1858                 OUT_RING((texpitch << 22) | (tex->offset >> 10));
 
1860                 OUT_RING((image->x << 16) | image->y);
 
1861                 OUT_RING((image->width << 16) | height);
 
1862                 RADEON_WAIT_UNTIL_2D_IDLE();
 
1865                 radeon_cp_discard_buffer(dev, buf);
 
1867                 /* Update the input parameters for next time */
 
1869                 image->height -= height;
 
1870                 image->data = (const u8 __user *)image->data + size;
 
1871         } while (image->height > 0);
 
1873         /* Flush the pixel cache after the blit completes.  This ensures
 
1874          * the texture data is written out to memory before rendering
 
1878         RADEON_FLUSH_CACHE();
 
1879         RADEON_WAIT_UNTIL_2D_IDLE();
 
1884 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
 
1886         drm_radeon_private_t *dev_priv = dev->dev_private;
 
1893         OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
 
1894         OUT_RING(0x00000000);
 
1896         OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
 
1897         for (i = 0; i < 32; i++) {
 
1898                 OUT_RING(stipple[i]);
 
1904 static void radeon_apply_surface_regs(int surf_index,
 
1905                                       drm_radeon_private_t *dev_priv)
 
1907         if (!dev_priv->mmio)
 
1910         radeon_do_cp_idle(dev_priv);
 
1912         RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
 
1913                      dev_priv->surfaces[surf_index].flags);
 
1914         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
 
1915                      dev_priv->surfaces[surf_index].lower);
 
1916         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
 
1917                      dev_priv->surfaces[surf_index].upper);
 
1920 /* Allocates a virtual surface
 
1921  * doesn't always allocate a real surface, will stretch an existing
 
1922  * surface when possible.
 
1924  * Note that refcount can be at most 2, since during a free refcount=3
 
1925  * might mean we have to allocate a new surface which might not always
 
1927  * For example : we allocate three contigous surfaces ABC. If B is
 
1928  * freed, we suddenly need two surfaces to store A and C, which might
 
1929  * not always be available.
 
1931 static int alloc_surface(drm_radeon_surface_alloc_t *new,
 
1932                          drm_radeon_private_t *dev_priv, DRMFILE filp)
 
1934         struct radeon_virt_surface *s;
 
1936         int virt_surface_index;
 
1937         uint32_t new_upper, new_lower;
 
1939         new_lower = new->address;
 
1940         new_upper = new_lower + new->size - 1;
 
1943         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
 
1944             ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
 
1945              RADEON_SURF_ADDRESS_FIXED_MASK)
 
1946             || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
 
1949         /* make sure there is no overlap with existing surfaces */
 
1950         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
1951                 if ((dev_priv->surfaces[i].refcount != 0) &&
 
1952                     (((new_lower >= dev_priv->surfaces[i].lower) &&
 
1953                       (new_lower < dev_priv->surfaces[i].upper)) ||
 
1954                      ((new_lower < dev_priv->surfaces[i].lower) &&
 
1955                       (new_upper > dev_priv->surfaces[i].lower)))) {
 
1960         /* find a virtual surface */
 
1961         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
 
1962                 if (dev_priv->virt_surfaces[i].filp == 0)
 
1964         if (i == 2 * RADEON_MAX_SURFACES) {
 
1967         virt_surface_index = i;
 
1969         /* try to reuse an existing surface */
 
1970         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
1972                 if ((dev_priv->surfaces[i].refcount == 1) &&
 
1973                     (new->flags == dev_priv->surfaces[i].flags) &&
 
1974                     (new_upper + 1 == dev_priv->surfaces[i].lower)) {
 
1975                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
1976                         s->surface_index = i;
 
1977                         s->lower = new_lower;
 
1978                         s->upper = new_upper;
 
1979                         s->flags = new->flags;
 
1981                         dev_priv->surfaces[i].refcount++;
 
1982                         dev_priv->surfaces[i].lower = s->lower;
 
1983                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
1984                         return virt_surface_index;
 
1988                 if ((dev_priv->surfaces[i].refcount == 1) &&
 
1989                     (new->flags == dev_priv->surfaces[i].flags) &&
 
1990                     (new_lower == dev_priv->surfaces[i].upper + 1)) {
 
1991                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
1992                         s->surface_index = i;
 
1993                         s->lower = new_lower;
 
1994                         s->upper = new_upper;
 
1995                         s->flags = new->flags;
 
1997                         dev_priv->surfaces[i].refcount++;
 
1998                         dev_priv->surfaces[i].upper = s->upper;
 
1999                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
2000                         return virt_surface_index;
 
2004         /* okay, we need a new one */
 
2005         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
 
2006                 if (dev_priv->surfaces[i].refcount == 0) {
 
2007                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
 
2008                         s->surface_index = i;
 
2009                         s->lower = new_lower;
 
2010                         s->upper = new_upper;
 
2011                         s->flags = new->flags;
 
2013                         dev_priv->surfaces[i].refcount = 1;
 
2014                         dev_priv->surfaces[i].lower = s->lower;
 
2015                         dev_priv->surfaces[i].upper = s->upper;
 
2016                         dev_priv->surfaces[i].flags = s->flags;
 
2017                         radeon_apply_surface_regs(s->surface_index, dev_priv);
 
2018                         return virt_surface_index;
 
2022         /* we didn't find anything */
 
2026 static int free_surface(DRMFILE filp, drm_radeon_private_t * dev_priv,
 
2029         struct radeon_virt_surface *s;
 
2031         /* find the virtual surface */
 
2032         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
 
2033                 s = &(dev_priv->virt_surfaces[i]);
 
2035                         if ((lower == s->lower) && (filp == s->filp)) {
 
2036                                 if (dev_priv->surfaces[s->surface_index].
 
2038                                         dev_priv->surfaces[s->surface_index].
 
2041                                 if (dev_priv->surfaces[s->surface_index].
 
2043                                         dev_priv->surfaces[s->surface_index].
 
2046                                 dev_priv->surfaces[s->surface_index].refcount--;
 
2047                                 if (dev_priv->surfaces[s->surface_index].
 
2049                                         dev_priv->surfaces[s->surface_index].
 
2052                                 radeon_apply_surface_regs(s->surface_index,
 
2061 static void radeon_surfaces_release(DRMFILE filp,
 
2062                                     drm_radeon_private_t * dev_priv)
 
2065         for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
 
2066                 if (dev_priv->virt_surfaces[i].filp == filp)
 
2067                         free_surface(filp, dev_priv,
 
2068                                      dev_priv->virt_surfaces[i].lower);
 
2072 /* ================================================================
 
2075 static int radeon_surface_alloc(DRM_IOCTL_ARGS)
 
2078         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2079         drm_radeon_surface_alloc_t alloc;
 
2081         DRM_COPY_FROM_USER_IOCTL(alloc,
 
2082                                  (drm_radeon_surface_alloc_t __user *) data,
 
2085         if (alloc_surface(&alloc, dev_priv, filp) == -1)
 
2086                 return DRM_ERR(EINVAL);
 
2091 static int radeon_surface_free(DRM_IOCTL_ARGS)
 
2094         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2095         drm_radeon_surface_free_t memfree;
 
2097         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_surface_free_t __user *) data,
 
2100         if (free_surface(filp, dev_priv, memfree.address))
 
2101                 return DRM_ERR(EINVAL);
 
2106 static int radeon_cp_clear(DRM_IOCTL_ARGS)
 
2109         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2110         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
2111         drm_radeon_clear_t clear;
 
2112         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
 
2115         LOCK_TEST_WITH_RETURN(dev, filp);
 
2117         DRM_COPY_FROM_USER_IOCTL(clear, (drm_radeon_clear_t __user *) data,
 
2120         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2122         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2123                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
2125         if (DRM_COPY_FROM_USER(&depth_boxes, clear.depth_boxes,
 
2126                                sarea_priv->nbox * sizeof(depth_boxes[0])))
 
2127                 return DRM_ERR(EFAULT);
 
2129         radeon_cp_dispatch_clear(dev, &clear, depth_boxes);
 
2135 /* Not sure why this isn't set all the time:
 
2137 static int radeon_do_init_pageflip(struct drm_device * dev)
 
2139         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2145         RADEON_WAIT_UNTIL_3D_IDLE();
 
2146         OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
 
2147         OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
 
2148                  RADEON_CRTC_OFFSET_FLIP_CNTL);
 
2149         OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
 
2150         OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
 
2151                  RADEON_CRTC_OFFSET_FLIP_CNTL);
 
2154         dev_priv->page_flipping = 1;
 
2156         if (dev_priv->sarea_priv->pfCurrentPage != 1)
 
2157                 dev_priv->sarea_priv->pfCurrentPage = 0;
 
2162 /* Swapping and flipping are different operations, need different ioctls.
 
2163  * They can & should be intermixed to support multiple 3d windows.
 
2165 static int radeon_cp_flip(DRM_IOCTL_ARGS)
 
2168         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2171         LOCK_TEST_WITH_RETURN(dev, filp);
 
2173         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2175         if (!dev_priv->page_flipping)
 
2176                 radeon_do_init_pageflip(dev);
 
2178         radeon_cp_dispatch_flip(dev);
 
2184 static int radeon_cp_swap(DRM_IOCTL_ARGS)
 
2187         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2188         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
2191         LOCK_TEST_WITH_RETURN(dev, filp);
 
2193         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2195         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2196                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
 
2198         radeon_cp_dispatch_swap(dev);
 
2199         dev_priv->sarea_priv->ctx_owner = 0;
 
2205 static int radeon_cp_vertex(DRM_IOCTL_ARGS)
 
2208         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2209         struct drm_file *filp_priv;
 
2210         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
2211         struct drm_device_dma *dma = dev->dma;
 
2212         struct drm_buf *buf;
 
2213         drm_radeon_vertex_t vertex;
 
2214         drm_radeon_tcl_prim_t prim;
 
2216         LOCK_TEST_WITH_RETURN(dev, filp);
 
2218         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
2220         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex_t __user *) data,
 
2223         DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
 
2224                   DRM_CURRENTPID, vertex.idx, vertex.count, vertex.discard);
 
2226         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
 
2227                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2228                           vertex.idx, dma->buf_count - 1);
 
2229                 return DRM_ERR(EINVAL);
 
2231         if (vertex.prim < 0 || vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
 
2232                 DRM_ERROR("buffer prim %d\n", vertex.prim);
 
2233                 return DRM_ERR(EINVAL);
 
2236         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2237         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2239         buf = dma->buflist[vertex.idx];
 
2241         if (buf->filp != filp) {
 
2242                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2243                           DRM_CURRENTPID, buf->filp);
 
2244                 return DRM_ERR(EINVAL);
 
2247                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
 
2248                 return DRM_ERR(EINVAL);
 
2251         /* Build up a prim_t record:
 
2254                 buf->used = vertex.count;       /* not used? */
 
2256                 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
 
2257                         if (radeon_emit_state(dev_priv, filp_priv,
 
2258                                               &sarea_priv->context_state,
 
2259                                               sarea_priv->tex_state,
 
2260                                               sarea_priv->dirty)) {
 
2261                                 DRM_ERROR("radeon_emit_state failed\n");
 
2262                                 return DRM_ERR(EINVAL);
 
2265                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
 
2266                                                RADEON_UPLOAD_TEX1IMAGES |
 
2267                                                RADEON_UPLOAD_TEX2IMAGES |
 
2268                                                RADEON_REQUIRE_QUIESCENCE);
 
2272                 prim.finish = vertex.count;     /* unused */
 
2273                 prim.prim = vertex.prim;
 
2274                 prim.numverts = vertex.count;
 
2275                 prim.vc_format = dev_priv->sarea_priv->vc_format;
 
2277                 radeon_cp_dispatch_vertex(dev, buf, &prim);
 
2280         if (vertex.discard) {
 
2281                 radeon_cp_discard_buffer(dev, buf);
 
2288 static int radeon_cp_indices(DRM_IOCTL_ARGS)
 
2291         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2292         struct drm_file *filp_priv;
 
2293         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
2294         struct drm_device_dma *dma = dev->dma;
 
2295         struct drm_buf *buf;
 
2296         drm_radeon_indices_t elts;
 
2297         drm_radeon_tcl_prim_t prim;
 
2300         LOCK_TEST_WITH_RETURN(dev, filp);
 
2302         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
2304         DRM_COPY_FROM_USER_IOCTL(elts, (drm_radeon_indices_t __user *) data,
 
2307         DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
 
2308                   DRM_CURRENTPID, elts.idx, elts.start, elts.end, elts.discard);
 
2310         if (elts.idx < 0 || elts.idx >= dma->buf_count) {
 
2311                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2312                           elts.idx, dma->buf_count - 1);
 
2313                 return DRM_ERR(EINVAL);
 
2315         if (elts.prim < 0 || elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
 
2316                 DRM_ERROR("buffer prim %d\n", elts.prim);
 
2317                 return DRM_ERR(EINVAL);
 
2320         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2321         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2323         buf = dma->buflist[elts.idx];
 
2325         if (buf->filp != filp) {
 
2326                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2327                           DRM_CURRENTPID, buf->filp);
 
2328                 return DRM_ERR(EINVAL);
 
2331                 DRM_ERROR("sending pending buffer %d\n", elts.idx);
 
2332                 return DRM_ERR(EINVAL);
 
2335         count = (elts.end - elts.start) / sizeof(u16);
 
2336         elts.start -= RADEON_INDEX_PRIM_OFFSET;
 
2338         if (elts.start & 0x7) {
 
2339                 DRM_ERROR("misaligned buffer 0x%x\n", elts.start);
 
2340                 return DRM_ERR(EINVAL);
 
2342         if (elts.start < buf->used) {
 
2343                 DRM_ERROR("no header 0x%x - 0x%x\n", elts.start, buf->used);
 
2344                 return DRM_ERR(EINVAL);
 
2347         buf->used = elts.end;
 
2349         if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
 
2350                 if (radeon_emit_state(dev_priv, filp_priv,
 
2351                                       &sarea_priv->context_state,
 
2352                                       sarea_priv->tex_state,
 
2353                                       sarea_priv->dirty)) {
 
2354                         DRM_ERROR("radeon_emit_state failed\n");
 
2355                         return DRM_ERR(EINVAL);
 
2358                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
 
2359                                        RADEON_UPLOAD_TEX1IMAGES |
 
2360                                        RADEON_UPLOAD_TEX2IMAGES |
 
2361                                        RADEON_REQUIRE_QUIESCENCE);
 
2364         /* Build up a prim_t record:
 
2366         prim.start = elts.start;
 
2367         prim.finish = elts.end;
 
2368         prim.prim = elts.prim;
 
2369         prim.offset = 0;        /* offset from start of dma buffers */
 
2370         prim.numverts = RADEON_MAX_VB_VERTS;    /* duh */
 
2371         prim.vc_format = dev_priv->sarea_priv->vc_format;
 
2373         radeon_cp_dispatch_indices(dev, buf, &prim);
 
2375                 radeon_cp_discard_buffer(dev, buf);
 
2382 static int radeon_cp_texture(DRM_IOCTL_ARGS)
 
2385         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2386         drm_radeon_texture_t tex;
 
2387         drm_radeon_tex_image_t image;
 
2390         LOCK_TEST_WITH_RETURN(dev, filp);
 
2392         DRM_COPY_FROM_USER_IOCTL(tex, (drm_radeon_texture_t __user *) data,
 
2395         if (tex.image == NULL) {
 
2396                 DRM_ERROR("null texture image!\n");
 
2397                 return DRM_ERR(EINVAL);
 
2400         if (DRM_COPY_FROM_USER(&image,
 
2401                                (drm_radeon_tex_image_t __user *) tex.image,
 
2403                 return DRM_ERR(EFAULT);
 
2405         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2406         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2408         ret = radeon_cp_dispatch_texture(filp, dev, &tex, &image);
 
2414 static int radeon_cp_stipple(DRM_IOCTL_ARGS)
 
2417         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2418         drm_radeon_stipple_t stipple;
 
2421         LOCK_TEST_WITH_RETURN(dev, filp);
 
2423         DRM_COPY_FROM_USER_IOCTL(stipple, (drm_radeon_stipple_t __user *) data,
 
2426         if (DRM_COPY_FROM_USER(&mask, stipple.mask, 32 * sizeof(u32)))
 
2427                 return DRM_ERR(EFAULT);
 
2429         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2431         radeon_cp_dispatch_stipple(dev, mask);
 
2437 static int radeon_cp_indirect(DRM_IOCTL_ARGS)
 
2440         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2441         struct drm_device_dma *dma = dev->dma;
 
2442         struct drm_buf *buf;
 
2443         drm_radeon_indirect_t indirect;
 
2446         LOCK_TEST_WITH_RETURN(dev, filp);
 
2448         DRM_COPY_FROM_USER_IOCTL(indirect,
 
2449                                  (drm_radeon_indirect_t __user *) data,
 
2452         DRM_DEBUG("indirect: idx=%d s=%d e=%d d=%d\n",
 
2453                   indirect.idx, indirect.start, indirect.end, indirect.discard);
 
2455         if (indirect.idx < 0 || indirect.idx >= dma->buf_count) {
 
2456                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2457                           indirect.idx, dma->buf_count - 1);
 
2458                 return DRM_ERR(EINVAL);
 
2461         buf = dma->buflist[indirect.idx];
 
2463         if (buf->filp != filp) {
 
2464                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2465                           DRM_CURRENTPID, buf->filp);
 
2466                 return DRM_ERR(EINVAL);
 
2469                 DRM_ERROR("sending pending buffer %d\n", indirect.idx);
 
2470                 return DRM_ERR(EINVAL);
 
2473         if (indirect.start < buf->used) {
 
2474                 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
 
2475                           indirect.start, buf->used);
 
2476                 return DRM_ERR(EINVAL);
 
2479         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2480         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2482         buf->used = indirect.end;
 
2484         /* Wait for the 3D stream to idle before the indirect buffer
 
2485          * containing 2D acceleration commands is processed.
 
2489         RADEON_WAIT_UNTIL_3D_IDLE();
 
2493         /* Dispatch the indirect buffer full of commands from the
 
2494          * X server.  This is insecure and is thus only available to
 
2495          * privileged clients.
 
2497         radeon_cp_dispatch_indirect(dev, buf, indirect.start, indirect.end);
 
2498         if (indirect.discard) {
 
2499                 radeon_cp_discard_buffer(dev, buf);
 
2506 static int radeon_cp_vertex2(DRM_IOCTL_ARGS)
 
2509         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2510         struct drm_file *filp_priv;
 
2511         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 
2512         struct drm_device_dma *dma = dev->dma;
 
2513         struct drm_buf *buf;
 
2514         drm_radeon_vertex2_t vertex;
 
2516         unsigned char laststate;
 
2518         LOCK_TEST_WITH_RETURN(dev, filp);
 
2520         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
2522         DRM_COPY_FROM_USER_IOCTL(vertex, (drm_radeon_vertex2_t __user *) data,
 
2525         DRM_DEBUG("pid=%d index=%d discard=%d\n",
 
2526                   DRM_CURRENTPID, vertex.idx, vertex.discard);
 
2528         if (vertex.idx < 0 || vertex.idx >= dma->buf_count) {
 
2529                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2530                           vertex.idx, dma->buf_count - 1);
 
2531                 return DRM_ERR(EINVAL);
 
2534         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2535         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2537         buf = dma->buflist[vertex.idx];
 
2539         if (buf->filp != filp) {
 
2540                 DRM_ERROR("process %d using buffer owned by %p\n",
 
2541                           DRM_CURRENTPID, buf->filp);
 
2542                 return DRM_ERR(EINVAL);
 
2546                 DRM_ERROR("sending pending buffer %d\n", vertex.idx);
 
2547                 return DRM_ERR(EINVAL);
 
2550         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
 
2551                 return DRM_ERR(EINVAL);
 
2553         for (laststate = 0xff, i = 0; i < vertex.nr_prims; i++) {
 
2554                 drm_radeon_prim_t prim;
 
2555                 drm_radeon_tcl_prim_t tclprim;
 
2557                 if (DRM_COPY_FROM_USER(&prim, &vertex.prim[i], sizeof(prim)))
 
2558                         return DRM_ERR(EFAULT);
 
2560                 if (prim.stateidx != laststate) {
 
2561                         drm_radeon_state_t state;
 
2563                         if (DRM_COPY_FROM_USER(&state,
 
2564                                                &vertex.state[prim.stateidx],
 
2566                                 return DRM_ERR(EFAULT);
 
2568                         if (radeon_emit_state2(dev_priv, filp_priv, &state)) {
 
2569                                 DRM_ERROR("radeon_emit_state2 failed\n");
 
2570                                 return DRM_ERR(EINVAL);
 
2573                         laststate = prim.stateidx;
 
2576                 tclprim.start = prim.start;
 
2577                 tclprim.finish = prim.finish;
 
2578                 tclprim.prim = prim.prim;
 
2579                 tclprim.vc_format = prim.vc_format;
 
2581                 if (prim.prim & RADEON_PRIM_WALK_IND) {
 
2582                         tclprim.offset = prim.numverts * 64;
 
2583                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
 
2585                         radeon_cp_dispatch_indices(dev, buf, &tclprim);
 
2587                         tclprim.numverts = prim.numverts;
 
2588                         tclprim.offset = 0;     /* not used */
 
2590                         radeon_cp_dispatch_vertex(dev, buf, &tclprim);
 
2593                 if (sarea_priv->nbox == 1)
 
2594                         sarea_priv->nbox = 0;
 
2597         if (vertex.discard) {
 
2598                 radeon_cp_discard_buffer(dev, buf);
 
2605 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
 
2606                                struct drm_file * filp_priv,
 
2607                                drm_radeon_cmd_header_t header,
 
2608                                drm_radeon_kcmd_buffer_t *cmdbuf)
 
2610         int id = (int)header.packet.packet_id;
 
2612         int *data = (int *)cmdbuf->buf;
 
2615         if (id >= RADEON_MAX_STATE_PACKETS)
 
2616                 return DRM_ERR(EINVAL);
 
2618         sz = packet[id].len;
 
2619         reg = packet[id].start;
 
2621         if (sz * sizeof(int) > cmdbuf->bufsz) {
 
2622                 DRM_ERROR("Packet size provided larger than data provided\n");
 
2623                 return DRM_ERR(EINVAL);
 
2626         if (radeon_check_and_fixup_packets(dev_priv, filp_priv, id, data)) {
 
2627                 DRM_ERROR("Packet verification failed\n");
 
2628                 return DRM_ERR(EINVAL);
 
2632         OUT_RING(CP_PACKET0(reg, (sz - 1)));
 
2633         OUT_RING_TABLE(data, sz);
 
2636         cmdbuf->buf += sz * sizeof(int);
 
2637         cmdbuf->bufsz -= sz * sizeof(int);
 
2641 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
 
2642                                           drm_radeon_cmd_header_t header,
 
2643                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2645         int sz = header.scalars.count;
 
2646         int start = header.scalars.offset;
 
2647         int stride = header.scalars.stride;
 
2651         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 
2652         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 
2653         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
 
2654         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2656         cmdbuf->buf += sz * sizeof(int);
 
2657         cmdbuf->bufsz -= sz * sizeof(int);
 
2663 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
 
2664                                            drm_radeon_cmd_header_t header,
 
2665                                            drm_radeon_kcmd_buffer_t *cmdbuf)
 
2667         int sz = header.scalars.count;
 
2668         int start = ((unsigned int)header.scalars.offset) + 0x100;
 
2669         int stride = header.scalars.stride;
 
2673         OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
 
2674         OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
 
2675         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
 
2676         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2678         cmdbuf->buf += sz * sizeof(int);
 
2679         cmdbuf->bufsz -= sz * sizeof(int);
 
2683 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
 
2684                                           drm_radeon_cmd_header_t header,
 
2685                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2687         int sz = header.vectors.count;
 
2688         int start = header.vectors.offset;
 
2689         int stride = header.vectors.stride;
 
2693         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
2694         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 
2695         OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 
2696         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
 
2697         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2700         cmdbuf->buf += sz * sizeof(int);
 
2701         cmdbuf->bufsz -= sz * sizeof(int);
 
2705 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
 
2706                                           drm_radeon_cmd_header_t header,
 
2707                                           drm_radeon_kcmd_buffer_t *cmdbuf)
 
2709         int sz = header.veclinear.count * 4;
 
2710         int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
 
2715         if (sz * 4 > cmdbuf->bufsz)
 
2716                 return DRM_ERR(EINVAL);
 
2719         OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
 
2720         OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
 
2721         OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
 
2722         OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
 
2723         OUT_RING_TABLE(cmdbuf->buf, sz);
 
2726         cmdbuf->buf += sz * sizeof(int);
 
2727         cmdbuf->bufsz -= sz * sizeof(int);
 
2731 static int radeon_emit_packet3(struct drm_device * dev,
 
2732                                struct drm_file * filp_priv,
 
2733                                drm_radeon_kcmd_buffer_t *cmdbuf)
 
2735         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2742         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
 
2744                 DRM_ERROR("Packet verification failed\n");
 
2749         OUT_RING_TABLE(cmdbuf->buf, cmdsz);
 
2752         cmdbuf->buf += cmdsz * 4;
 
2753         cmdbuf->bufsz -= cmdsz * 4;
 
2757 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
 
2758                                         struct drm_file *filp_priv,
 
2759                                         drm_radeon_kcmd_buffer_t *cmdbuf,
 
2762         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2763         struct drm_clip_rect box;
 
2766         struct drm_clip_rect __user *boxes = cmdbuf->boxes;
 
2772         if ((ret = radeon_check_and_fixup_packet3(dev_priv, filp_priv,
 
2774                 DRM_ERROR("Packet verification failed\n");
 
2782                 if (i < cmdbuf->nbox) {
 
2783                         if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
 
2784                                 return DRM_ERR(EFAULT);
 
2785                         /* FIXME The second and subsequent times round
 
2786                          * this loop, send a WAIT_UNTIL_3D_IDLE before
 
2787                          * calling emit_clip_rect(). This fixes a
 
2788                          * lockup on fast machines when sending
 
2789                          * several cliprects with a cmdbuf, as when
 
2790                          * waving a 2D window over a 3D
 
2791                          * window. Something in the commands from user
 
2792                          * space seems to hang the card when they're
 
2793                          * sent several times in a row. That would be
 
2794                          * the correct place to fix it but this works
 
2795                          * around it until I can figure that out - Tim
 
2799                                 RADEON_WAIT_UNTIL_3D_IDLE();
 
2802                         radeon_emit_clip_rect(dev_priv, &box);
 
2806                 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
 
2809         } while (++i < cmdbuf->nbox);
 
2810         if (cmdbuf->nbox == 1)
 
2814         cmdbuf->buf += cmdsz * 4;
 
2815         cmdbuf->bufsz -= cmdsz * 4;
 
2819 static int radeon_emit_wait(struct drm_device * dev, int flags)
 
2821         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2824         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
 
2826         case RADEON_WAIT_2D:
 
2828                 RADEON_WAIT_UNTIL_2D_IDLE();
 
2831         case RADEON_WAIT_3D:
 
2833                 RADEON_WAIT_UNTIL_3D_IDLE();
 
2836         case RADEON_WAIT_2D | RADEON_WAIT_3D:
 
2838                 RADEON_WAIT_UNTIL_IDLE();
 
2842                 return DRM_ERR(EINVAL);
 
2848 static int radeon_cp_cmdbuf(DRM_IOCTL_ARGS)
 
2851         drm_radeon_private_t *dev_priv = dev->dev_private;
 
2852         struct drm_file *filp_priv;
 
2853         struct drm_device_dma *dma = dev->dma;
 
2854         struct drm_buf *buf = NULL;
 
2856         drm_radeon_kcmd_buffer_t cmdbuf;
 
2857         drm_radeon_cmd_header_t header;
 
2858         int orig_nbox, orig_bufsz;
 
2861         LOCK_TEST_WITH_RETURN(dev, filp);
 
2863         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
2865         DRM_COPY_FROM_USER_IOCTL(cmdbuf,
 
2866                                  (drm_radeon_cmd_buffer_t __user *) data,
 
2869         RING_SPACE_TEST_WITH_RETURN(dev_priv);
 
2870         VB_AGE_TEST_WITH_RETURN(dev_priv);
 
2872         if (cmdbuf.bufsz > 64 * 1024 || cmdbuf.bufsz < 0) {
 
2873                 return DRM_ERR(EINVAL);
 
2876         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
 
2877          * races between checking values and using those values in other code,
 
2878          * and simply to avoid a lot of function calls to copy in data.
 
2880         orig_bufsz = cmdbuf.bufsz;
 
2881         if (orig_bufsz != 0) {
 
2882                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
 
2884                         return DRM_ERR(ENOMEM);
 
2885                 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf.buf,
 
2887                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2888                         return DRM_ERR(EFAULT);
 
2893         orig_nbox = cmdbuf.nbox;
 
2895         if (dev_priv->microcode_version == UCODE_R300) {
 
2897                 temp = r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
 
2899                 if (orig_bufsz != 0)
 
2900                         drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
2905         /* microcode_version != r300 */
 
2906         while (cmdbuf.bufsz >= sizeof(header)) {
 
2908                 header.i = *(int *)cmdbuf.buf;
 
2909                 cmdbuf.buf += sizeof(header);
 
2910                 cmdbuf.bufsz -= sizeof(header);
 
2912                 switch (header.header.cmd_type) {
 
2913                 case RADEON_CMD_PACKET:
 
2914                         DRM_DEBUG("RADEON_CMD_PACKET\n");
 
2915                         if (radeon_emit_packets
 
2916                             (dev_priv, filp_priv, header, &cmdbuf)) {
 
2917                                 DRM_ERROR("radeon_emit_packets failed\n");
 
2922                 case RADEON_CMD_SCALARS:
 
2923                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
 
2924                         if (radeon_emit_scalars(dev_priv, header, &cmdbuf)) {
 
2925                                 DRM_ERROR("radeon_emit_scalars failed\n");
 
2930                 case RADEON_CMD_VECTORS:
 
2931                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
 
2932                         if (radeon_emit_vectors(dev_priv, header, &cmdbuf)) {
 
2933                                 DRM_ERROR("radeon_emit_vectors failed\n");
 
2938                 case RADEON_CMD_DMA_DISCARD:
 
2939                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
 
2940                         idx = header.dma.buf_idx;
 
2941                         if (idx < 0 || idx >= dma->buf_count) {
 
2942                                 DRM_ERROR("buffer index %d (of %d max)\n",
 
2943                                           idx, dma->buf_count - 1);
 
2947                         buf = dma->buflist[idx];
 
2948                         if (buf->filp != filp || buf->pending) {
 
2949                                 DRM_ERROR("bad buffer %p %p %d\n",
 
2950                                           buf->filp, filp, buf->pending);
 
2954                         radeon_cp_discard_buffer(dev, buf);
 
2957                 case RADEON_CMD_PACKET3:
 
2958                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
 
2959                         if (radeon_emit_packet3(dev, filp_priv, &cmdbuf)) {
 
2960                                 DRM_ERROR("radeon_emit_packet3 failed\n");
 
2965                 case RADEON_CMD_PACKET3_CLIP:
 
2966                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
 
2967                         if (radeon_emit_packet3_cliprect
 
2968                             (dev, filp_priv, &cmdbuf, orig_nbox)) {
 
2969                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
 
2974                 case RADEON_CMD_SCALARS2:
 
2975                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
 
2976                         if (radeon_emit_scalars2(dev_priv, header, &cmdbuf)) {
 
2977                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
 
2982                 case RADEON_CMD_WAIT:
 
2983                         DRM_DEBUG("RADEON_CMD_WAIT\n");
 
2984                         if (radeon_emit_wait(dev, header.wait.flags)) {
 
2985                                 DRM_ERROR("radeon_emit_wait failed\n");
 
2989                 case RADEON_CMD_VECLINEAR:
 
2990                         DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
 
2991                         if (radeon_emit_veclinear(dev_priv, header, &cmdbuf)) {
 
2992                                 DRM_ERROR("radeon_emit_veclinear failed\n");
 
2998                         DRM_ERROR("bad cmd_type %d at %p\n",
 
2999                                   header.header.cmd_type,
 
3000                                   cmdbuf.buf - sizeof(header));
 
3005         if (orig_bufsz != 0)
 
3006                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
3008         DRM_DEBUG("DONE\n");
 
3013         if (orig_bufsz != 0)
 
3014                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
3015         return DRM_ERR(EINVAL);
 
3018 static int radeon_cp_getparam(DRM_IOCTL_ARGS)
 
3021         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3022         drm_radeon_getparam_t param;
 
3025         DRM_COPY_FROM_USER_IOCTL(param, (drm_radeon_getparam_t __user *) data,
 
3028         DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
 
3030         switch (param.param) {
 
3031         case RADEON_PARAM_GART_BUFFER_OFFSET:
 
3032                 value = dev_priv->gart_buffers_offset;
 
3034         case RADEON_PARAM_LAST_FRAME:
 
3035                 dev_priv->stats.last_frame_reads++;
 
3036                 value = GET_SCRATCH(0);
 
3038         case RADEON_PARAM_LAST_DISPATCH:
 
3039                 value = GET_SCRATCH(1);
 
3041         case RADEON_PARAM_LAST_CLEAR:
 
3042                 dev_priv->stats.last_clear_reads++;
 
3043                 value = GET_SCRATCH(2);
 
3045         case RADEON_PARAM_IRQ_NR:
 
3048         case RADEON_PARAM_GART_BASE:
 
3049                 value = dev_priv->gart_vm_start;
 
3051         case RADEON_PARAM_REGISTER_HANDLE:
 
3052                 value = dev_priv->mmio->offset;
 
3054         case RADEON_PARAM_STATUS_HANDLE:
 
3055                 value = dev_priv->ring_rptr_offset;
 
3057 #if BITS_PER_LONG == 32
 
3059                  * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
 
3060                  * pointer which can't fit into an int-sized variable.  According to
 
3061                  * Michel Dänzer, the ioctl() is only used on embedded platforms, so
 
3062                  * not supporting it shouldn't be a problem.  If the same functionality
 
3063                  * is needed on 64-bit platforms, a new ioctl() would have to be added,
 
3064                  * so backwards-compatibility for the embedded platforms can be
 
3065                  * maintained.  --davidm 4-Feb-2004.
 
3067         case RADEON_PARAM_SAREA_HANDLE:
 
3068                 /* The lock is the first dword in the sarea. */
 
3069                 value = (long)dev->lock.hw_lock;
 
3072         case RADEON_PARAM_GART_TEX_HANDLE:
 
3073                 value = dev_priv->gart_textures_offset;
 
3075         case RADEON_PARAM_SCRATCH_OFFSET:
 
3076                 if (!dev_priv->writeback_works)
 
3077                         return DRM_ERR(EINVAL);
 
3078                 value = RADEON_SCRATCH_REG_OFFSET;
 
3080         case RADEON_PARAM_CARD_TYPE:
 
3081                 if (dev_priv->flags & RADEON_IS_PCIE)
 
3082                         value = RADEON_CARD_PCIE;
 
3083                 else if (dev_priv->flags & RADEON_IS_AGP)
 
3084                         value = RADEON_CARD_AGP;
 
3086                         value = RADEON_CARD_PCI;
 
3088         case RADEON_PARAM_VBLANK_CRTC:
 
3089                 value = radeon_vblank_crtc_get(dev);
 
3092                 DRM_DEBUG("Invalid parameter %d\n", param.param);
 
3093                 return DRM_ERR(EINVAL);
 
3096         if (DRM_COPY_TO_USER(param.value, &value, sizeof(int))) {
 
3097                 DRM_ERROR("copy_to_user\n");
 
3098                 return DRM_ERR(EFAULT);
 
3104 static int radeon_cp_setparam(DRM_IOCTL_ARGS)
 
3107         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3108         struct drm_file *filp_priv;
 
3109         drm_radeon_setparam_t sp;
 
3110         struct drm_radeon_driver_file_fields *radeon_priv;
 
3112         DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
 
3114         DRM_COPY_FROM_USER_IOCTL(sp, (drm_radeon_setparam_t __user *) data,
 
3118         case RADEON_SETPARAM_FB_LOCATION:
 
3119                 radeon_priv = filp_priv->driver_priv;
 
3120                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
 
3122         case RADEON_SETPARAM_SWITCH_TILING:
 
3123                 if (sp.value == 0) {
 
3124                         DRM_DEBUG("color tiling disabled\n");
 
3125                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 
3126                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 
3127                         dev_priv->sarea_priv->tiling_enabled = 0;
 
3128                 } else if (sp.value == 1) {
 
3129                         DRM_DEBUG("color tiling enabled\n");
 
3130                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
 
3131                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
 
3132                         dev_priv->sarea_priv->tiling_enabled = 1;
 
3135         case RADEON_SETPARAM_PCIGART_LOCATION:
 
3136                 dev_priv->pcigart_offset = sp.value;
 
3137                 dev_priv->pcigart_offset_set = 1;
 
3139         case RADEON_SETPARAM_NEW_MEMMAP:
 
3140                 dev_priv->new_memmap = sp.value;
 
3142         case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
 
3143                 dev_priv->gart_info.table_size = sp.value;
 
3144                 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
 
3145                         dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
 
3147         case RADEON_SETPARAM_VBLANK_CRTC:
 
3148                 return radeon_vblank_crtc_set(dev, sp.value);
 
3151                 DRM_DEBUG("Invalid parameter %d\n", sp.param);
 
3152                 return DRM_ERR(EINVAL);
 
3158 /* When a client dies:
 
3159  *    - Check for and clean up flipped page state
 
3160  *    - Free any alloced GART memory.
 
3161  *    - Free any alloced radeon surfaces.
 
3163  * DRM infrastructure takes care of reclaiming dma buffers.
 
3165 void radeon_driver_preclose(struct drm_device *dev, DRMFILE filp)
 
3167         if (dev->dev_private) {
 
3168                 drm_radeon_private_t *dev_priv = dev->dev_private;
 
3169                 dev_priv->page_flipping = 0;
 
3170                 radeon_mem_release(filp, dev_priv->gart_heap);
 
3171                 radeon_mem_release(filp, dev_priv->fb_heap);
 
3172                 radeon_surfaces_release(filp, dev_priv);
 
3176 void radeon_driver_lastclose(struct drm_device *dev)
 
3178         if (dev->dev_private) {
 
3179                 drm_radeon_private_t *dev_priv = dev->dev_private;
 
3181                 if (dev_priv->sarea_priv &&
 
3182                     dev_priv->sarea_priv->pfCurrentPage != 0)
 
3183                         radeon_cp_dispatch_flip(dev);
 
3186         radeon_do_release(dev);
 
3189 int radeon_driver_open(struct drm_device *dev, struct drm_file *filp_priv)
 
3191         drm_radeon_private_t *dev_priv = dev->dev_private;
 
3192         struct drm_radeon_driver_file_fields *radeon_priv;
 
3196             (struct drm_radeon_driver_file_fields *)
 
3197             drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
 
3202         filp_priv->driver_priv = radeon_priv;
 
3205                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
 
3207                 radeon_priv->radeon_fb_delta = 0;
 
3211 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *filp_priv)
 
3213         struct drm_radeon_driver_file_fields *radeon_priv =
 
3214             filp_priv->driver_priv;
 
3216         drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
 
3219 drm_ioctl_desc_t radeon_ioctls[] = {
 
3220         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = {radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3221         [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = {radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3222         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = {radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3223         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = {radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3224         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = {radeon_cp_idle, DRM_AUTH},
 
3225         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = {radeon_cp_resume, DRM_AUTH},
 
3226         [DRM_IOCTL_NR(DRM_RADEON_RESET)] = {radeon_engine_reset, DRM_AUTH},
 
3227         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = {radeon_fullscreen, DRM_AUTH},
 
3228         [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = {radeon_cp_swap, DRM_AUTH},
 
3229         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = {radeon_cp_clear, DRM_AUTH},
 
3230         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = {radeon_cp_vertex, DRM_AUTH},
 
3231         [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = {radeon_cp_indices, DRM_AUTH},
 
3232         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = {radeon_cp_texture, DRM_AUTH},
 
3233         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = {radeon_cp_stipple, DRM_AUTH},
 
3234         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = {radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3235         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = {radeon_cp_vertex2, DRM_AUTH},
 
3236         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = {radeon_cp_cmdbuf, DRM_AUTH},
 
3237         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = {radeon_cp_getparam, DRM_AUTH},
 
3238         [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = {radeon_cp_flip, DRM_AUTH},
 
3239         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = {radeon_mem_alloc, DRM_AUTH},
 
3240         [DRM_IOCTL_NR(DRM_RADEON_FREE)] = {radeon_mem_free, DRM_AUTH},
 
3241         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = {radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY},
 
3242         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = {radeon_irq_emit, DRM_AUTH},
 
3243         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = {radeon_irq_wait, DRM_AUTH},
 
3244         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = {radeon_cp_setparam, DRM_AUTH},
 
3245         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = {radeon_surface_alloc, DRM_AUTH},
 
3246         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = {radeon_surface_free, DRM_AUTH}
 
3249 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);