1 /* via_dma.c -- DMA support for the VIA Unichrome/Pro
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Copyright 2004 Digeo, Inc., Palo Alto, CA, U.S.A.
9 * Copyright 2004 The Unichrome project.
10 * All Rights Reserved.
12 * Permission is hereby granted, free of charge, to any person obtaining a
13 * copy of this software and associated documentation files (the "Software"),
14 * to deal in the Software without restriction, including without limitation
15 * the rights to use, copy, modify, merge, publish, distribute, sub license,
16 * and/or sell copies of the Software, and to permit persons to whom the
17 * Software is furnished to do so, subject to the following conditions:
19 * The above copyright notice and this permission notice (including the
20 * next paragraph) shall be included in all copies or substantial portions
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 * USE OR OTHER DEALINGS IN THE SOFTWARE.
41 #include "via_3d_reg.h"
43 #define CMDBUF_ALIGNMENT_SIZE (0x100)
44 #define CMDBUF_ALIGNMENT_MASK (0x0ff)
46 /* defines for VIA 3D registers */
47 #define VIA_REG_STATUS 0x400
48 #define VIA_REG_TRANSET 0x43C
49 #define VIA_REG_TRANSPACE 0x440
51 /* VIA_REG_STATUS(0x400): Engine Status */
52 #define VIA_CMD_RGTR_BUSY 0x00000080 /* Command Regulator is busy */
53 #define VIA_2D_ENG_BUSY 0x00000001 /* 2D Engine is busy */
54 #define VIA_3D_ENG_BUSY 0x00000002 /* 3D Engine is busy */
55 #define VIA_VR_QUEUE_BUSY 0x00020000 /* Virtual Queue is busy */
57 #define SetReg2DAGP(nReg, nData) { \
58 *((uint32_t *)(vb)) = ((nReg) >> 2) | HALCYON_HEADER1; \
59 *((uint32_t *)(vb) + 1) = (nData); \
60 vb = ((uint32_t *)vb) + 2; \
61 dev_priv->dma_low +=8; \
64 #define via_flush_write_combine() DRM_MEMORYBARRIER()
66 #define VIA_OUT_RING_QW(w1,w2) \
69 dev_priv->dma_low += 8;
71 static void via_cmdbuf_start(drm_via_private_t * dev_priv);
72 static void via_cmdbuf_pause(drm_via_private_t * dev_priv);
73 static void via_cmdbuf_reset(drm_via_private_t * dev_priv);
74 static void via_cmdbuf_rewind(drm_via_private_t * dev_priv);
75 static int via_wait_idle(drm_via_private_t * dev_priv);
76 static void via_pad_cache(drm_via_private_t * dev_priv, int qwords);
79 * Free space in command buffer.
82 static uint32_t via_cmdbuf_space(drm_via_private_t * dev_priv)
84 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
85 uint32_t hw_addr = *(dev_priv->hw_addr_ptr) - agp_base;
87 return ((hw_addr <= dev_priv->dma_low) ?
88 (dev_priv->dma_high + hw_addr - dev_priv->dma_low) :
89 (hw_addr - dev_priv->dma_low));
93 * How much does the command regulator lag behind?
96 static uint32_t via_cmdbuf_lag(drm_via_private_t * dev_priv)
98 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
99 uint32_t hw_addr = *(dev_priv->hw_addr_ptr) - agp_base;
101 return ((hw_addr <= dev_priv->dma_low) ?
102 (dev_priv->dma_low - hw_addr) :
103 (dev_priv->dma_wrap + dev_priv->dma_low - hw_addr));
107 * Check that the given size fits in the buffer, otherwise wait.
111 via_cmdbuf_wait(drm_via_private_t * dev_priv, unsigned int size)
113 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
114 uint32_t cur_addr, hw_addr, next_addr;
115 volatile uint32_t *hw_addr_ptr;
117 hw_addr_ptr = dev_priv->hw_addr_ptr;
118 cur_addr = dev_priv->dma_low;
119 next_addr = cur_addr + size + 512 * 1024;
122 hw_addr = *hw_addr_ptr - agp_base;
125 ("via_cmdbuf_wait timed out hw %x cur_addr %x next_addr %x\n",
126 hw_addr, cur_addr, next_addr);
129 } while ((cur_addr < hw_addr) && (next_addr >= hw_addr));
134 * Checks whether buffer head has reach the end. Rewind the ring buffer
137 * Returns virtual pointer to ring buffer.
140 static inline uint32_t *via_check_dma(drm_via_private_t * dev_priv,
143 if ((dev_priv->dma_low + size + 4 * CMDBUF_ALIGNMENT_SIZE) >
144 dev_priv->dma_high) {
145 via_cmdbuf_rewind(dev_priv);
147 if (via_cmdbuf_wait(dev_priv, size) != 0) {
151 return (uint32_t *) (dev_priv->dma_ptr + dev_priv->dma_low);
154 int via_dma_cleanup(struct drm_device * dev)
156 if (dev->dev_private) {
157 drm_via_private_t *dev_priv =
158 (drm_via_private_t *) dev->dev_private;
160 if (dev_priv->ring.virtual_start) {
161 via_cmdbuf_reset(dev_priv);
163 drm_core_ioremapfree(&dev_priv->ring.map, dev);
164 dev_priv->ring.virtual_start = NULL;
172 static int via_initialize(struct drm_device * dev,
173 drm_via_private_t * dev_priv,
174 drm_via_dma_init_t * init)
176 if (!dev_priv || !dev_priv->mmio) {
177 DRM_ERROR("via_dma_init called before via_map_init\n");
181 if (dev_priv->ring.virtual_start != NULL) {
182 DRM_ERROR("called again without calling cleanup\n");
186 if (!dev->agp || !dev->agp->base) {
187 DRM_ERROR("called with no agp memory available\n");
191 if (dev_priv->chipset == VIA_DX9_0) {
192 DRM_ERROR("AGP DMA is not supported on this chip\n");
196 dev_priv->ring.map.offset = dev->agp->base + init->offset;
197 dev_priv->ring.map.size = init->size;
198 dev_priv->ring.map.type = 0;
199 dev_priv->ring.map.flags = 0;
200 dev_priv->ring.map.mtrr = 0;
202 drm_core_ioremap(&dev_priv->ring.map, dev);
204 if (dev_priv->ring.map.handle == NULL) {
205 via_dma_cleanup(dev);
206 DRM_ERROR("can not ioremap virtual address for"
211 dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
213 dev_priv->dma_ptr = dev_priv->ring.virtual_start;
214 dev_priv->dma_low = 0;
215 dev_priv->dma_high = init->size;
216 dev_priv->dma_wrap = init->size;
217 dev_priv->dma_offset = init->offset;
218 dev_priv->last_pause_ptr = NULL;
219 dev_priv->hw_addr_ptr =
220 (volatile uint32_t *)((char *)dev_priv->mmio->handle +
221 init->reg_pause_addr);
223 via_cmdbuf_start(dev_priv);
228 static int via_dma_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
230 drm_via_private_t *dev_priv = (drm_via_private_t *) dev->dev_private;
231 drm_via_dma_init_t *init = data;
234 switch (init->func) {
236 if (!DRM_SUSER(DRM_CURPROC))
239 retcode = via_initialize(dev, dev_priv, init);
241 case VIA_CLEANUP_DMA:
242 if (!DRM_SUSER(DRM_CURPROC))
245 retcode = via_dma_cleanup(dev);
247 case VIA_DMA_INITIALIZED:
248 retcode = (dev_priv->ring.virtual_start != NULL) ?
259 static int via_dispatch_cmdbuffer(struct drm_device * dev, drm_via_cmdbuffer_t * cmd)
261 drm_via_private_t *dev_priv;
265 dev_priv = (drm_via_private_t *) dev->dev_private;
267 if (dev_priv->ring.virtual_start == NULL) {
268 DRM_ERROR("called without initializing AGP ring buffer.\n");
272 if (cmd->size > VIA_PCI_BUF_SIZE) {
276 if (DRM_COPY_FROM_USER(dev_priv->pci_buf, cmd->buf, cmd->size))
280 * Running this function on AGP memory is dead slow. Therefore
281 * we run it on a temporary cacheable system memory buffer and
282 * copy it to AGP memory when ready.
286 via_verify_command_stream((uint32_t *) dev_priv->pci_buf,
287 cmd->size, dev, 1))) {
291 vb = via_check_dma(dev_priv, (cmd->size < 0x100) ? 0x102 : cmd->size);
296 memcpy(vb, dev_priv->pci_buf, cmd->size);
298 dev_priv->dma_low += cmd->size;
301 * Small submissions somehow stalls the CPU. (AGP cache effects?)
302 * pad to greater size.
305 if (cmd->size < 0x100)
306 via_pad_cache(dev_priv, (0x100 - cmd->size) >> 3);
307 via_cmdbuf_pause(dev_priv);
312 int via_driver_dma_quiescent(struct drm_device * dev)
314 drm_via_private_t *dev_priv = dev->dev_private;
316 if (!via_wait_idle(dev_priv)) {
322 static int via_flush_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
325 LOCK_TEST_WITH_RETURN(dev, file_priv);
327 return via_driver_dma_quiescent(dev);
330 static int via_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv)
332 drm_via_cmdbuffer_t *cmdbuf = data;
335 LOCK_TEST_WITH_RETURN(dev, file_priv);
337 DRM_DEBUG("buf %p size %lu\n", cmdbuf->buf, cmdbuf->size);
339 ret = via_dispatch_cmdbuffer(dev, cmdbuf);
347 static int via_dispatch_pci_cmdbuffer(struct drm_device * dev,
348 drm_via_cmdbuffer_t * cmd)
350 drm_via_private_t *dev_priv = dev->dev_private;
353 if (cmd->size > VIA_PCI_BUF_SIZE) {
356 if (DRM_COPY_FROM_USER(dev_priv->pci_buf, cmd->buf, cmd->size))
360 via_verify_command_stream((uint32_t *) dev_priv->pci_buf,
361 cmd->size, dev, 0))) {
366 via_parse_command_stream(dev, (const uint32_t *)dev_priv->pci_buf,
371 static int via_pci_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv)
373 drm_via_cmdbuffer_t *cmdbuf = data;
376 LOCK_TEST_WITH_RETURN(dev, file_priv);
378 DRM_DEBUG("buf %p size %lu\n", cmdbuf->buf, cmdbuf->size);
380 ret = via_dispatch_pci_cmdbuffer(dev, cmdbuf);
388 static inline uint32_t *via_align_buffer(drm_via_private_t * dev_priv,
389 uint32_t * vb, int qw_count)
391 for (; qw_count > 0; --qw_count) {
392 VIA_OUT_RING_QW(HC_DUMMY, HC_DUMMY);
398 * This function is used internally by ring buffer management code.
400 * Returns virtual pointer to ring buffer.
402 static inline uint32_t *via_get_dma(drm_via_private_t * dev_priv)
404 return (uint32_t *) (dev_priv->dma_ptr + dev_priv->dma_low);
408 * Hooks a segment of data into the tail of the ring-buffer by
409 * modifying the pause address stored in the buffer itself. If
410 * the regulator has already paused, restart it.
412 static int via_hook_segment(drm_via_private_t * dev_priv,
413 uint32_t pause_addr_hi, uint32_t pause_addr_lo,
417 volatile uint32_t *paused_at = dev_priv->last_pause_ptr;
421 via_flush_write_combine();
422 (void) *(volatile uint32_t *)(via_get_dma(dev_priv) -1);
423 *paused_at = pause_addr_lo;
424 via_flush_write_combine();
426 reader = *(dev_priv->hw_addr_ptr);
427 ptr = ((volatile char *)paused_at - dev_priv->dma_ptr) +
428 dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr + 4;
429 dev_priv->last_pause_ptr = via_get_dma(dev_priv) - 1;
431 if ((ptr - reader) <= dev_priv->dma_diff ) {
433 while (!(paused = (VIA_READ(0x41c) & 0x80000000)) && count--);
436 if (paused && !no_pci_fire) {
437 reader = *(dev_priv->hw_addr_ptr);
438 if ((ptr - reader) == dev_priv->dma_diff) {
441 * There is a concern that these writes may stall the PCI bus
442 * if the GPU is not idle. However, idling the GPU first
443 * doesn't make a difference.
446 VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16));
447 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi);
448 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo);
449 VIA_READ(VIA_REG_TRANSPACE);
455 static int via_wait_idle(drm_via_private_t * dev_priv)
457 int count = 10000000;
459 while (!(VIA_READ(VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY) && count--);
461 while (count-- && (VIA_READ(VIA_REG_STATUS) &
462 (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY |
467 static uint32_t *via_align_cmd(drm_via_private_t * dev_priv, uint32_t cmd_type,
468 uint32_t addr, uint32_t * cmd_addr_hi,
469 uint32_t * cmd_addr_lo, int skip_wait)
472 uint32_t cmd_addr, addr_lo, addr_hi;
474 uint32_t qw_pad_count;
477 via_cmdbuf_wait(dev_priv, 2 * CMDBUF_ALIGNMENT_SIZE);
479 vb = via_get_dma(dev_priv);
480 VIA_OUT_RING_QW(HC_HEADER2 | ((VIA_REG_TRANSET >> 2) << 12) |
481 (VIA_REG_TRANSPACE >> 2), HC_ParaType_PreCR << 16);
482 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
483 qw_pad_count = (CMDBUF_ALIGNMENT_SIZE >> 3) -
484 ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3);
486 cmd_addr = (addr) ? addr :
487 agp_base + dev_priv->dma_low - 8 + (qw_pad_count << 3);
488 addr_lo = ((HC_SubA_HAGPBpL << 24) | (cmd_type & HC_HAGPBpID_MASK) |
489 (cmd_addr & HC_HAGPBpL_MASK));
490 addr_hi = ((HC_SubA_HAGPBpH << 24) | (cmd_addr >> 24));
492 vb = via_align_buffer(dev_priv, vb, qw_pad_count - 1);
493 VIA_OUT_RING_QW(*cmd_addr_hi = addr_hi, *cmd_addr_lo = addr_lo);
497 static void via_cmdbuf_start(drm_via_private_t * dev_priv)
499 uint32_t pause_addr_lo, pause_addr_hi;
500 uint32_t start_addr, start_addr_lo;
501 uint32_t end_addr, end_addr_lo;
508 dev_priv->dma_low = 0;
510 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
511 start_addr = agp_base;
512 end_addr = agp_base + dev_priv->dma_high;
514 start_addr_lo = ((HC_SubA_HAGPBstL << 24) | (start_addr & 0xFFFFFF));
515 end_addr_lo = ((HC_SubA_HAGPBendL << 24) | (end_addr & 0xFFFFFF));
516 command = ((HC_SubA_HAGPCMNT << 24) | (start_addr >> 24) |
517 ((end_addr & 0xff000000) >> 16));
519 dev_priv->last_pause_ptr =
520 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0,
521 &pause_addr_hi, &pause_addr_lo, 1) - 1;
523 via_flush_write_combine();
524 (void) *(volatile uint32_t *)dev_priv->last_pause_ptr;
526 VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16));
527 VIA_WRITE(VIA_REG_TRANSPACE, command);
528 VIA_WRITE(VIA_REG_TRANSPACE, start_addr_lo);
529 VIA_WRITE(VIA_REG_TRANSPACE, end_addr_lo);
531 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi);
532 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo);
533 DRM_WRITEMEMORYBARRIER();
534 VIA_WRITE(VIA_REG_TRANSPACE, command | HC_HAGPCMNT_MASK);
535 VIA_READ(VIA_REG_TRANSPACE);
537 dev_priv->dma_diff = 0;
540 while (!(VIA_READ(0x41c) & 0x80000000) && count--);
542 reader = *(dev_priv->hw_addr_ptr);
543 ptr = ((volatile char *)dev_priv->last_pause_ptr - dev_priv->dma_ptr) +
544 dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr + 4;
547 * This is the difference between where we tell the
548 * command reader to pause and where it actually pauses.
549 * This differs between hw implementation so we need to
553 dev_priv->dma_diff = ptr - reader;
556 static void via_pad_cache(drm_via_private_t * dev_priv, int qwords)
560 via_cmdbuf_wait(dev_priv, qwords + 2);
561 vb = via_get_dma(dev_priv);
562 VIA_OUT_RING_QW(HC_HEADER2, HC_ParaType_NotTex << 16);
563 via_align_buffer(dev_priv, vb, qwords);
566 static inline void via_dummy_bitblt(drm_via_private_t * dev_priv)
568 uint32_t *vb = via_get_dma(dev_priv);
569 SetReg2DAGP(0x0C, (0 | (0 << 16)));
570 SetReg2DAGP(0x10, 0 | (0 << 16));
571 SetReg2DAGP(0x0, 0x1 | 0x2000 | 0xAA000000);
574 static void via_cmdbuf_jump(drm_via_private_t * dev_priv)
577 uint32_t pause_addr_lo, pause_addr_hi;
578 uint32_t jump_addr_lo, jump_addr_hi;
579 volatile uint32_t *last_pause_ptr;
581 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
582 via_align_cmd(dev_priv, HC_HAGPBpID_JUMP, 0, &jump_addr_hi,
585 dev_priv->dma_wrap = dev_priv->dma_low;
588 * Wrap command buffer to the beginning.
591 dev_priv->dma_low = 0;
592 if (via_cmdbuf_wait(dev_priv, CMDBUF_ALIGNMENT_SIZE) != 0) {
593 DRM_ERROR("via_cmdbuf_jump failed\n");
596 via_dummy_bitblt(dev_priv);
597 via_dummy_bitblt(dev_priv);
600 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0, &pause_addr_hi,
601 &pause_addr_lo, 0) - 1;
602 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0, &pause_addr_hi,
605 *last_pause_ptr = pause_addr_lo;
607 via_hook_segment( dev_priv, jump_addr_hi, jump_addr_lo, 0);
611 static void via_cmdbuf_rewind(drm_via_private_t * dev_priv)
613 via_cmdbuf_jump(dev_priv);
616 static void via_cmdbuf_flush(drm_via_private_t * dev_priv, uint32_t cmd_type)
618 uint32_t pause_addr_lo, pause_addr_hi;
620 via_align_cmd(dev_priv, cmd_type, 0, &pause_addr_hi, &pause_addr_lo, 0);
621 via_hook_segment(dev_priv, pause_addr_hi, pause_addr_lo, 0);
624 static void via_cmdbuf_pause(drm_via_private_t * dev_priv)
626 via_cmdbuf_flush(dev_priv, HC_HAGPBpID_PAUSE);
629 static void via_cmdbuf_reset(drm_via_private_t * dev_priv)
631 via_cmdbuf_flush(dev_priv, HC_HAGPBpID_STOP);
632 via_wait_idle(dev_priv);
636 * User interface to the space and lag functions.
639 static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file *file_priv)
641 drm_via_cmdbuf_size_t *d_siz = data;
643 uint32_t tmp_size, count;
644 drm_via_private_t *dev_priv;
647 LOCK_TEST_WITH_RETURN(dev, file_priv);
649 dev_priv = (drm_via_private_t *) dev->dev_private;
651 if (dev_priv->ring.virtual_start == NULL) {
652 DRM_ERROR("called without initializing AGP ring buffer.\n");
657 tmp_size = d_siz->size;
658 switch (d_siz->func) {
659 case VIA_CMDBUF_SPACE:
660 while (((tmp_size = via_cmdbuf_space(dev_priv)) < d_siz->size)
667 DRM_ERROR("VIA_CMDBUF_SPACE timed out.\n");
672 while (((tmp_size = via_cmdbuf_lag(dev_priv)) > d_siz->size)
679 DRM_ERROR("VIA_CMDBUF_LAG timed out.\n");
686 d_siz->size = tmp_size;
691 struct drm_ioctl_desc via_ioctls[] = {
692 DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
693 DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH),
694 DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
695 DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
696 DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
697 DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
698 DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH),
699 DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
700 DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
701 DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
702 DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
703 DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
704 DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
705 DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
708 int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);