1 /* savage_state.c -- State and drawing support for Savage
 
   3  * Copyright 2004  Felix Kuehling
 
   6  * Permission is hereby granted, free of charge, to any person obtaining a
 
   7  * copy of this software and associated documentation files (the "Software"),
 
   8  * to deal in the Software without restriction, including without limitation
 
   9  * the rights to use, copy, modify, merge, publish, distribute, sub license,
 
  10  * and/or sell copies of the Software, and to permit persons to whom the
 
  11  * Software is furnished to do so, subject to the following conditions:
 
  13  * The above copyright notice and this permission notice (including the
 
  14  * next paragraph) shall be included in all copies or substantial portions
 
  17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  18  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  19  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 
  20  * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
 
  21  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
 
  22  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 
  23  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
  26 #include "savage_drm.h"
 
  27 #include "savage_drv.h"
 
  29 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
 
  30                                const struct drm_clip_rect * pbox)
 
  32         uint32_t scstart = dev_priv->state.s3d.new_scstart;
 
  33         uint32_t scend = dev_priv->state.s3d.new_scend;
 
  34         scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
 
  35             ((uint32_t) pbox->x1 & 0x000007ff) |
 
  36             (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
 
  37         scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
 
  38             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
 
  39             ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
 
  40         if (scstart != dev_priv->state.s3d.scstart ||
 
  41             scend != dev_priv->state.s3d.scend) {
 
  44                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
 
  45                 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
 
  48                 dev_priv->state.s3d.scstart = scstart;
 
  49                 dev_priv->state.s3d.scend = scend;
 
  50                 dev_priv->waiting = 1;
 
  55 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
 
  56                               const struct drm_clip_rect * pbox)
 
  58         uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
 
  59         uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
 
  60         drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
 
  61             ((uint32_t) pbox->x1 & 0x000007ff) |
 
  62             (((uint32_t) pbox->y1 << 12) & 0x00fff000);
 
  63         drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
 
  64             (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
 
  65             ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
 
  66         if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
 
  67             drawctrl1 != dev_priv->state.s4.drawctrl1) {
 
  70                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
 
  71                 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
 
  74                 dev_priv->state.s4.drawctrl0 = drawctrl0;
 
  75                 dev_priv->state.s4.drawctrl1 = drawctrl1;
 
  76                 dev_priv->waiting = 1;
 
  81 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
 
  84         if ((addr & 6) != 2) {  /* reserved bits */
 
  85                 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
 
  88         if (!(addr & 1)) {      /* local */
 
  90                 if (addr < dev_priv->texture_offset ||
 
  91                     addr >= dev_priv->texture_offset + dev_priv->texture_size) {
 
  93                             ("bad texAddr%d %08x (local addr out of range)\n",
 
  98                 if (!dev_priv->agp_textures) {
 
  99                         DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
 
 104                 if (addr < dev_priv->agp_textures->offset ||
 
 105                     addr >= (dev_priv->agp_textures->offset +
 
 106                              dev_priv->agp_textures->size)) {
 
 108                             ("bad texAddr%d %08x (AGP addr out of range)\n",
 
 116 #define SAVE_STATE(reg,where)                   \
 
 117         if(start <= reg && start+count > reg)   \
 
 118                 dev_priv->state.where = regs[reg - start]
 
 119 #define SAVE_STATE_MASK(reg,where,mask) do {                    \
 
 120         if(start <= reg && start+count > reg) {                 \
 
 122                 tmp = regs[reg - start];                        \
 
 123                 dev_priv->state.where = (tmp & (mask)) |        \
 
 124                         (dev_priv->state.where & ~(mask));      \
 
 128 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
 
 129                                    unsigned int start, unsigned int count,
 
 130                                    const uint32_t *regs)
 
 132         if (start < SAVAGE_TEXPALADDR_S3D ||
 
 133             start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
 
 134                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
 
 135                           start, start + count - 1);
 
 139         SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
 
 140                         ~SAVAGE_SCISSOR_MASK_S3D);
 
 141         SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
 
 142                         ~SAVAGE_SCISSOR_MASK_S3D);
 
 144         /* if any texture regs were changed ... */
 
 145         if (start <= SAVAGE_TEXCTRL_S3D &&
 
 146             start + count > SAVAGE_TEXPALADDR_S3D) {
 
 147                 /* ... check texture state */
 
 148                 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
 
 149                 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
 
 150                 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
 
 151                         return savage_verify_texaddr(dev_priv, 0,
 
 152                                                 dev_priv->state.s3d.texaddr);
 
 158 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
 
 159                                   unsigned int start, unsigned int count,
 
 160                                   const uint32_t *regs)
 
 164         if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
 
 165             start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
 
 166                 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
 
 167                           start, start + count - 1);
 
 171         SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
 
 172                         ~SAVAGE_SCISSOR_MASK_S4);
 
 173         SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
 
 174                         ~SAVAGE_SCISSOR_MASK_S4);
 
 176         /* if any texture regs were changed ... */
 
 177         if (start <= SAVAGE_TEXDESCR_S4 &&
 
 178             start + count > SAVAGE_TEXPALADDR_S4) {
 
 179                 /* ... check texture state */
 
 180                 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
 
 181                 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
 
 182                 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
 
 183                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
 
 184                         ret |= savage_verify_texaddr(dev_priv, 0,
 
 185                                                 dev_priv->state.s4.texaddr0);
 
 186                 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
 
 187                         ret |= savage_verify_texaddr(dev_priv, 1,
 
 188                                                 dev_priv->state.s4.texaddr1);
 
 195 #undef SAVE_STATE_MASK
 
 197 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
 
 198                                  const drm_savage_cmd_header_t * cmd_header,
 
 199                                  const uint32_t *regs)
 
 201         unsigned int count = cmd_header->state.count;
 
 202         unsigned int start = cmd_header->state.start;
 
 203         unsigned int count2 = 0;
 
 204         unsigned int bci_size;
 
 211         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 212                 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
 
 215                 /* scissor regs are emitted in savage_dispatch_draw */
 
 216                 if (start < SAVAGE_SCSTART_S3D) {
 
 217                         if (start + count > SAVAGE_SCEND_S3D + 1)
 
 218                                 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
 
 219                         if (start + count > SAVAGE_SCSTART_S3D)
 
 220                                 count = SAVAGE_SCSTART_S3D - start;
 
 221                 } else if (start <= SAVAGE_SCEND_S3D) {
 
 222                         if (start + count > SAVAGE_SCEND_S3D + 1) {
 
 223                                 count -= SAVAGE_SCEND_S3D + 1 - start;
 
 224                                 start = SAVAGE_SCEND_S3D + 1;
 
 229                 ret = savage_verify_state_s4(dev_priv, start, count, regs);
 
 232                 /* scissor regs are emitted in savage_dispatch_draw */
 
 233                 if (start < SAVAGE_DRAWCTRL0_S4) {
 
 234                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
 
 236                                          (SAVAGE_DRAWCTRL1_S4 + 1 - start);
 
 237                         if (start + count > SAVAGE_DRAWCTRL0_S4)
 
 238                                 count = SAVAGE_DRAWCTRL0_S4 - start;
 
 239                 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
 
 240                         if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
 
 241                                 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
 
 242                                 start = SAVAGE_DRAWCTRL1_S4 + 1;
 
 248         bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
 
 250         if (cmd_header->state.global) {
 
 251                 BEGIN_DMA(bci_size + 1);
 
 252                 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
 
 253                 dev_priv->waiting = 1;
 
 260                         unsigned int n = count < 255 ? count : 255;
 
 261                         DMA_SET_REGISTERS(start, n);
 
 278 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
 
 279                                     const drm_savage_cmd_header_t * cmd_header,
 
 280                                     const struct drm_buf * dmabuf)
 
 282         unsigned char reorder = 0;
 
 283         unsigned int prim = cmd_header->prim.prim;
 
 284         unsigned int skip = cmd_header->prim.skip;
 
 285         unsigned int n = cmd_header->prim.count;
 
 286         unsigned int start = cmd_header->prim.start;
 
 291                 DRM_ERROR("called without dma buffers!\n");
 
 299         case SAVAGE_PRIM_TRILIST_201:
 
 301                 prim = SAVAGE_PRIM_TRILIST;
 
 302         case SAVAGE_PRIM_TRILIST:
 
 304                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
 
 309         case SAVAGE_PRIM_TRISTRIP:
 
 310         case SAVAGE_PRIM_TRIFAN:
 
 313                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
 
 319                 DRM_ERROR("invalid primitive type %u\n", prim);
 
 323         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 325                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
 
 329                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
 
 330                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
 
 331                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
 
 332                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
 
 333                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
 
 337                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
 
 342         if (start + n > dmabuf->total / 32) {
 
 343                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
 
 344                           start, start + n - 1, dmabuf->total / 32);
 
 348         /* Vertex DMA doesn't work with command DMA at the same time,
 
 349          * so we use BCI_... to submit commands here. Flush buffered
 
 350          * faked DMA first. */
 
 353         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
 
 355                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
 
 356                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
 
 357                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
 
 359         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
 
 360                 /* Workaround for what looks like a hardware bug. If a
 
 361                  * WAIT_3D_IDLE was emitted some time before the
 
 362                  * indexed drawing command then the engine will lock
 
 363                  * up. There are two known workarounds:
 
 364                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
 
 366                 for (i = 0; i < 63; ++i)
 
 367                         BCI_WRITE(BCI_CMD_WAIT);
 
 368                 dev_priv->waiting = 0;
 
 373                 /* Can emit up to 255 indices (85 triangles) at once. */
 
 374                 unsigned int count = n > 255 ? 255 : n;
 
 376                         /* Need to reorder indices for correct flat
 
 377                          * shading while preserving the clock sense
 
 378                          * for correct culling. Only on Savage3D. */
 
 379                         int reorder[3] = { -1, -1, -1 };
 
 380                         reorder[start % 3] = 2;
 
 382                         BEGIN_BCI((count + 1 + 1) / 2);
 
 383                         BCI_DRAW_INDICES_S3D(count, prim, start + 2);
 
 385                         for (i = start + 1; i + 1 < start + count; i += 2)
 
 386                                 BCI_WRITE((i + reorder[i % 3]) |
 
 388                                             reorder[(i + 1) % 3]) << 16));
 
 389                         if (i < start + count)
 
 390                                 BCI_WRITE(i + reorder[i % 3]);
 
 391                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 392                         BEGIN_BCI((count + 1 + 1) / 2);
 
 393                         BCI_DRAW_INDICES_S3D(count, prim, start);
 
 395                         for (i = start + 1; i + 1 < start + count; i += 2)
 
 396                                 BCI_WRITE(i | ((i + 1) << 16));
 
 397                         if (i < start + count)
 
 400                         BEGIN_BCI((count + 2 + 1) / 2);
 
 401                         BCI_DRAW_INDICES_S4(count, prim, skip);
 
 403                         for (i = start; i + 1 < start + count; i += 2)
 
 404                                 BCI_WRITE(i | ((i + 1) << 16));
 
 405                         if (i < start + count)
 
 412                 prim |= BCI_CMD_DRAW_CONT;
 
 418 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
 
 419                                    const drm_savage_cmd_header_t * cmd_header,
 
 420                                    const uint32_t *vtxbuf, unsigned int vb_size,
 
 421                                    unsigned int vb_stride)
 
 423         unsigned char reorder = 0;
 
 424         unsigned int prim = cmd_header->prim.prim;
 
 425         unsigned int skip = cmd_header->prim.skip;
 
 426         unsigned int n = cmd_header->prim.count;
 
 427         unsigned int start = cmd_header->prim.start;
 
 428         unsigned int vtx_size;
 
 436         case SAVAGE_PRIM_TRILIST_201:
 
 438                 prim = SAVAGE_PRIM_TRILIST;
 
 439         case SAVAGE_PRIM_TRILIST:
 
 441                         DRM_ERROR("wrong number of vertices %u in TRILIST\n",
 
 446         case SAVAGE_PRIM_TRISTRIP:
 
 447         case SAVAGE_PRIM_TRIFAN:
 
 450                             ("wrong number of vertices %u in TRIFAN/STRIP\n",
 
 456                 DRM_ERROR("invalid primitive type %u\n", prim);
 
 460         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 461                 if (skip > SAVAGE_SKIP_ALL_S3D) {
 
 462                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
 
 465                 vtx_size = 8;   /* full vertex */
 
 467                 if (skip > SAVAGE_SKIP_ALL_S4) {
 
 468                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
 
 471                 vtx_size = 10;  /* full vertex */
 
 474         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
 
 475             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
 
 476             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
 
 478         if (vtx_size > vb_stride) {
 
 479                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
 
 480                           vtx_size, vb_stride);
 
 484         if (start + n > vb_size / (vb_stride * 4)) {
 
 485                 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
 
 486                           start, start + n - 1, vb_size / (vb_stride * 4));
 
 492                 /* Can emit up to 255 vertices (85 triangles) at once. */
 
 493                 unsigned int count = n > 255 ? 255 : n;
 
 495                         /* Need to reorder vertices for correct flat
 
 496                          * shading while preserving the clock sense
 
 497                          * for correct culling. Only on Savage3D. */
 
 498                         int reorder[3] = { -1, -1, -1 };
 
 499                         reorder[start % 3] = 2;
 
 501                         BEGIN_DMA(count * vtx_size + 1);
 
 502                         DMA_DRAW_PRIMITIVE(count, prim, skip);
 
 504                         for (i = start; i < start + count; ++i) {
 
 505                                 unsigned int j = i + reorder[i % 3];
 
 506                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
 
 511                         BEGIN_DMA(count * vtx_size + 1);
 
 512                         DMA_DRAW_PRIMITIVE(count, prim, skip);
 
 514                         if (vb_stride == vtx_size) {
 
 515                                 DMA_COPY(&vtxbuf[vb_stride * start],
 
 518                                 for (i = start; i < start + count; ++i) {
 
 519                                         DMA_COPY(&vtxbuf [vb_stride * i],
 
 530                 prim |= BCI_CMD_DRAW_CONT;
 
 536 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
 
 537                                    const drm_savage_cmd_header_t * cmd_header,
 
 539                                    const struct drm_buf * dmabuf)
 
 541         unsigned char reorder = 0;
 
 542         unsigned int prim = cmd_header->idx.prim;
 
 543         unsigned int skip = cmd_header->idx.skip;
 
 544         unsigned int n = cmd_header->idx.count;
 
 549                 DRM_ERROR("called without dma buffers!\n");
 
 557         case SAVAGE_PRIM_TRILIST_201:
 
 559                 prim = SAVAGE_PRIM_TRILIST;
 
 560         case SAVAGE_PRIM_TRILIST:
 
 562                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
 
 566         case SAVAGE_PRIM_TRISTRIP:
 
 567         case SAVAGE_PRIM_TRIFAN:
 
 570                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
 
 575                 DRM_ERROR("invalid primitive type %u\n", prim);
 
 579         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 581                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
 
 585                 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
 
 586                     (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
 
 587                     (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
 
 588                 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
 
 589                         DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
 
 593                         DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
 
 598         /* Vertex DMA doesn't work with command DMA at the same time,
 
 599          * so we use BCI_... to submit commands here. Flush buffered
 
 600          * faked DMA first. */
 
 603         if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
 
 605                 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
 
 606                 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
 
 607                 dev_priv->state.common.vbaddr = dmabuf->bus_address;
 
 609         if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
 
 610                 /* Workaround for what looks like a hardware bug. If a
 
 611                  * WAIT_3D_IDLE was emitted some time before the
 
 612                  * indexed drawing command then the engine will lock
 
 613                  * up. There are two known workarounds:
 
 614                  * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
 
 616                 for (i = 0; i < 63; ++i)
 
 617                         BCI_WRITE(BCI_CMD_WAIT);
 
 618                 dev_priv->waiting = 0;
 
 623                 /* Can emit up to 255 indices (85 triangles) at once. */
 
 624                 unsigned int count = n > 255 ? 255 : n;
 
 627                 for (i = 0; i < count; ++i) {
 
 628                         if (idx[i] > dmabuf->total / 32) {
 
 629                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
 
 630                                           i, idx[i], dmabuf->total / 32);
 
 636                         /* Need to reorder indices for correct flat
 
 637                          * shading while preserving the clock sense
 
 638                          * for correct culling. Only on Savage3D. */
 
 639                         int reorder[3] = { 2, -1, -1 };
 
 641                         BEGIN_BCI((count + 1 + 1) / 2);
 
 642                         BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
 
 644                         for (i = 1; i + 1 < count; i += 2)
 
 645                                 BCI_WRITE(idx[i + reorder[i % 3]] |
 
 647                                            reorder[(i + 1) % 3]] << 16));
 
 649                                 BCI_WRITE(idx[i + reorder[i % 3]]);
 
 650                 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 651                         BEGIN_BCI((count + 1 + 1) / 2);
 
 652                         BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
 
 654                         for (i = 1; i + 1 < count; i += 2)
 
 655                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
 
 659                         BEGIN_BCI((count + 2 + 1) / 2);
 
 660                         BCI_DRAW_INDICES_S4(count, prim, skip);
 
 662                         for (i = 0; i + 1 < count; i += 2)
 
 663                                 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
 
 671                 prim |= BCI_CMD_DRAW_CONT;
 
 677 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
 
 678                                   const drm_savage_cmd_header_t * cmd_header,
 
 680                                   const uint32_t *vtxbuf,
 
 681                                   unsigned int vb_size, unsigned int vb_stride)
 
 683         unsigned char reorder = 0;
 
 684         unsigned int prim = cmd_header->idx.prim;
 
 685         unsigned int skip = cmd_header->idx.skip;
 
 686         unsigned int n = cmd_header->idx.count;
 
 687         unsigned int vtx_size;
 
 695         case SAVAGE_PRIM_TRILIST_201:
 
 697                 prim = SAVAGE_PRIM_TRILIST;
 
 698         case SAVAGE_PRIM_TRILIST:
 
 700                         DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
 
 704         case SAVAGE_PRIM_TRISTRIP:
 
 705         case SAVAGE_PRIM_TRIFAN:
 
 708                             ("wrong number of indices %u in TRIFAN/STRIP\n", n);
 
 713                 DRM_ERROR("invalid primitive type %u\n", prim);
 
 717         if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
 
 718                 if (skip > SAVAGE_SKIP_ALL_S3D) {
 
 719                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
 
 722                 vtx_size = 8;   /* full vertex */
 
 724                 if (skip > SAVAGE_SKIP_ALL_S4) {
 
 725                         DRM_ERROR("invalid skip flags 0x%04x\n", skip);
 
 728                 vtx_size = 10;  /* full vertex */
 
 731         vtx_size -= (skip & 1) + (skip >> 1 & 1) +
 
 732             (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
 
 733             (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
 
 735         if (vtx_size > vb_stride) {
 
 736                 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
 
 737                           vtx_size, vb_stride);
 
 743                 /* Can emit up to 255 vertices (85 triangles) at once. */
 
 744                 unsigned int count = n > 255 ? 255 : n;
 
 747                 for (i = 0; i < count; ++i) {
 
 748                         if (idx[i] > vb_size / (vb_stride * 4)) {
 
 749                                 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
 
 750                                           i, idx[i], vb_size / (vb_stride * 4));
 
 756                         /* Need to reorder vertices for correct flat
 
 757                          * shading while preserving the clock sense
 
 758                          * for correct culling. Only on Savage3D. */
 
 759                         int reorder[3] = { 2, -1, -1 };
 
 761                         BEGIN_DMA(count * vtx_size + 1);
 
 762                         DMA_DRAW_PRIMITIVE(count, prim, skip);
 
 764                         for (i = 0; i < count; ++i) {
 
 765                                 unsigned int j = idx[i + reorder[i % 3]];
 
 766                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
 
 771                         BEGIN_DMA(count * vtx_size + 1);
 
 772                         DMA_DRAW_PRIMITIVE(count, prim, skip);
 
 774                         for (i = 0; i < count; ++i) {
 
 775                                 unsigned int j = idx[i];
 
 776                                 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
 
 785                 prim |= BCI_CMD_DRAW_CONT;
 
 791 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
 
 792                                  const drm_savage_cmd_header_t * cmd_header,
 
 793                                  const drm_savage_cmd_header_t *data,
 
 795                                  const struct drm_clip_rect *boxes)
 
 797         unsigned int flags = cmd_header->clear0.flags;
 
 798         unsigned int clear_cmd;
 
 799         unsigned int i, nbufs;
 
 805         clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
 
 806             BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
 
 807         BCI_CMD_SET_ROP(clear_cmd, 0xCC);
 
 809         nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
 
 810             ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
 
 814         if (data->clear1.mask != 0xffffffff) {
 
 817                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
 
 818                 DMA_WRITE(data->clear1.mask);
 
 821         for (i = 0; i < nbox; ++i) {
 
 822                 unsigned int x, y, w, h;
 
 824                 x = boxes[i].x1, y = boxes[i].y1;
 
 825                 w = boxes[i].x2 - boxes[i].x1;
 
 826                 h = boxes[i].y2 - boxes[i].y1;
 
 827                 BEGIN_DMA(nbufs * 6);
 
 828                 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
 
 831                         DMA_WRITE(clear_cmd);
 
 834                                 DMA_WRITE(dev_priv->front_offset);
 
 835                                 DMA_WRITE(dev_priv->front_bd);
 
 838                                 DMA_WRITE(dev_priv->back_offset);
 
 839                                 DMA_WRITE(dev_priv->back_bd);
 
 842                                 DMA_WRITE(dev_priv->depth_offset);
 
 843                                 DMA_WRITE(dev_priv->depth_bd);
 
 846                         DMA_WRITE(data->clear1.value);
 
 847                         DMA_WRITE(BCI_X_Y(x, y));
 
 848                         DMA_WRITE(BCI_W_H(w, h));
 
 852         if (data->clear1.mask != 0xffffffff) {
 
 855                 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
 
 856                 DMA_WRITE(0xffffffff);
 
 863 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
 
 864                                 unsigned int nbox, const struct drm_clip_rect *boxes)
 
 866         unsigned int swap_cmd;
 
 873         swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
 
 874             BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
 
 875         BCI_CMD_SET_ROP(swap_cmd, 0xCC);
 
 877         for (i = 0; i < nbox; ++i) {
 
 880                 DMA_WRITE(dev_priv->back_offset);
 
 881                 DMA_WRITE(dev_priv->back_bd);
 
 882                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
 
 883                 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
 
 884                 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
 
 885                                   boxes[i].y2 - boxes[i].y1));
 
 892 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
 
 893                                 const drm_savage_cmd_header_t *start,
 
 894                                 const drm_savage_cmd_header_t *end,
 
 895                                 const struct drm_buf * dmabuf,
 
 896                                 const unsigned int *vtxbuf,
 
 897                                 unsigned int vb_size, unsigned int vb_stride,
 
 899                                 const struct drm_clip_rect *boxes)
 
 904         for (i = 0; i < nbox; ++i) {
 
 905                 const drm_savage_cmd_header_t *cmdbuf;
 
 906                 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
 
 909                 while (cmdbuf < end) {
 
 910                         drm_savage_cmd_header_t cmd_header;
 
 911                         cmd_header = *cmdbuf;
 
 913                         switch (cmd_header.cmd.cmd) {
 
 914                         case SAVAGE_CMD_DMA_PRIM:
 
 915                                 ret = savage_dispatch_dma_prim(
 
 916                                         dev_priv, &cmd_header, dmabuf);
 
 918                         case SAVAGE_CMD_VB_PRIM:
 
 919                                 ret = savage_dispatch_vb_prim(
 
 920                                         dev_priv, &cmd_header,
 
 921                                         vtxbuf, vb_size, vb_stride);
 
 923                         case SAVAGE_CMD_DMA_IDX:
 
 924                                 j = (cmd_header.idx.count + 3) / 4;
 
 925                                 /* j was check in savage_bci_cmdbuf */
 
 926                                 ret = savage_dispatch_dma_idx(dev_priv,
 
 927                                         &cmd_header, (const uint16_t *)cmdbuf,
 
 931                         case SAVAGE_CMD_VB_IDX:
 
 932                                 j = (cmd_header.idx.count + 3) / 4;
 
 933                                 /* j was check in savage_bci_cmdbuf */
 
 934                                 ret = savage_dispatch_vb_idx(dev_priv,
 
 935                                         &cmd_header, (const uint16_t *)cmdbuf,
 
 936                                         (const uint32_t *)vtxbuf, vb_size,
 
 941                                 /* What's the best return code? EFAULT? */
 
 942                                 DRM_ERROR("IMPLEMENTATION ERROR: "
 
 943                                           "non-drawing-command %d\n",
 
 956 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
 
 958         drm_savage_private_t *dev_priv = dev->dev_private;
 
 959         struct drm_device_dma *dma = dev->dma;
 
 960         struct drm_buf *dmabuf;
 
 961         drm_savage_cmdbuf_t *cmdbuf = data;
 
 962         drm_savage_cmd_header_t *kcmd_addr = NULL;
 
 963         drm_savage_cmd_header_t *first_draw_cmd;
 
 964         unsigned int *kvb_addr = NULL;
 
 965         struct drm_clip_rect *kbox_addr = NULL;
 
 971         LOCK_TEST_WITH_RETURN(dev, file_priv);
 
 973         if (dma && dma->buflist) {
 
 974                 if (cmdbuf->dma_idx > dma->buf_count) {
 
 976                             ("vertex buffer index %u out of range (0-%u)\n",
 
 977                              cmdbuf->dma_idx, dma->buf_count - 1);
 
 980                 dmabuf = dma->buflist[cmdbuf->dma_idx];
 
 985         /* Copy the user buffers into kernel temporary areas.  This hasn't been
 
 986          * a performance loss compared to VERIFYAREA_READ/
 
 987          * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
 
 988          * for locking on FreeBSD.
 
 991                 kcmd_addr = drm_alloc(cmdbuf->size * 8, DRM_MEM_DRIVER);
 
 992                 if (kcmd_addr == NULL)
 
 995                 if (DRM_COPY_FROM_USER(kcmd_addr, cmdbuf->cmd_addr,
 
 998                         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
 
1001                 cmdbuf->cmd_addr = kcmd_addr;
 
1003         if (cmdbuf->vb_size) {
 
1004                 kvb_addr = drm_alloc(cmdbuf->vb_size, DRM_MEM_DRIVER);
 
1005                 if (kvb_addr == NULL) {
 
1010                 if (DRM_COPY_FROM_USER(kvb_addr, cmdbuf->vb_addr,
 
1015                 cmdbuf->vb_addr = kvb_addr;
 
1018                 kbox_addr = drm_alloc(cmdbuf->nbox * sizeof(struct drm_clip_rect),
 
1020                 if (kbox_addr == NULL) {
 
1025                 if (DRM_COPY_FROM_USER(kbox_addr, cmdbuf->box_addr,
 
1026                                        cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
 
1030         cmdbuf->box_addr = kbox_addr;
 
1033         /* Make sure writes to DMA buffers are finished before sending
 
1034          * DMA commands to the graphics hardware. */
 
1035         DRM_MEMORYBARRIER();
 
1037         /* Coming from user space. Don't know if the Xserver has
 
1038          * emitted wait commands. Assuming the worst. */
 
1039         dev_priv->waiting = 1;
 
1042         first_draw_cmd = NULL;
 
1043         while (i < cmdbuf->size) {
 
1044                 drm_savage_cmd_header_t cmd_header;
 
1045                 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
 
1049                 /* Group drawing commands with same state to minimize
 
1050                  * iterations over clip rects. */
 
1052                 switch (cmd_header.cmd.cmd) {
 
1053                 case SAVAGE_CMD_DMA_IDX:
 
1054                 case SAVAGE_CMD_VB_IDX:
 
1055                         j = (cmd_header.idx.count + 3) / 4;
 
1056                         if (i + j > cmdbuf->size) {
 
1057                                 DRM_ERROR("indexed drawing command extends "
 
1058                                           "beyond end of command buffer\n");
 
1063                 case SAVAGE_CMD_DMA_PRIM:
 
1064                 case SAVAGE_CMD_VB_PRIM:
 
1065                         if (!first_draw_cmd)
 
1066                                 first_draw_cmd = cmdbuf->cmd_addr - 1;
 
1067                         cmdbuf->cmd_addr += j;
 
1071                         if (first_draw_cmd) {
 
1072                                 ret = savage_dispatch_draw(
 
1073                                       dev_priv, first_draw_cmd,
 
1074                                       cmdbuf->cmd_addr - 1,
 
1075                                       dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
 
1077                                       cmdbuf->nbox, cmdbuf->box_addr);
 
1080                                 first_draw_cmd = NULL;
 
1086                 switch (cmd_header.cmd.cmd) {
 
1087                 case SAVAGE_CMD_STATE:
 
1088                         j = (cmd_header.state.count + 1) / 2;
 
1089                         if (i + j > cmdbuf->size) {
 
1090                                 DRM_ERROR("command SAVAGE_CMD_STATE extends "
 
1091                                           "beyond end of command buffer\n");
 
1096                         ret = savage_dispatch_state(dev_priv, &cmd_header,
 
1097                                 (const uint32_t *)cmdbuf->cmd_addr);
 
1098                         cmdbuf->cmd_addr += j;
 
1101                 case SAVAGE_CMD_CLEAR:
 
1102                         if (i + 1 > cmdbuf->size) {
 
1103                                 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
 
1104                                           "beyond end of command buffer\n");
 
1109                         ret = savage_dispatch_clear(dev_priv, &cmd_header,
 
1116                 case SAVAGE_CMD_SWAP:
 
1117                         ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
 
1121                         DRM_ERROR("invalid command 0x%x\n",
 
1122                                   cmd_header.cmd.cmd);
 
1134         if (first_draw_cmd) {
 
1135                 ret = savage_dispatch_draw (
 
1136                         dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
 
1137                         cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
 
1138                         cmdbuf->nbox, cmdbuf->box_addr);
 
1147         if (dmabuf && cmdbuf->discard) {
 
1148                 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
 
1150                 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
 
1151                 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
 
1152                 savage_freelist_put(dev, dmabuf);
 
1156         /* If we didn't need to allocate them, these'll be NULL */
 
1157         drm_free(kcmd_addr, cmdbuf->size * 8, DRM_MEM_DRIVER);
 
1158         drm_free(kvb_addr, cmdbuf->vb_size, DRM_MEM_DRIVER);
 
1159         drm_free(kbox_addr, cmdbuf->nbox * sizeof(struct drm_clip_rect),