1 /* via_dma.c -- DMA support for the VIA Unichrome/Pro
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Copyright 2004 Digeo, Inc., Palo Alto, CA, U.S.A.
9 * Copyright 2004 The Unichrome project.
10 * All Rights Reserved.
12 * Permission is hereby granted, free of charge, to any person obtaining a
13 * copy of this software and associated documentation files (the "Software"),
14 * to deal in the Software without restriction, including without limitation
15 * the rights to use, copy, modify, merge, publish, distribute, sub license,
16 * and/or sell copies of the Software, and to permit persons to whom the
17 * Software is furnished to do so, subject to the following conditions:
19 * The above copyright notice and this permission notice (including the
20 * next paragraph) shall be included in all copies or substantial portions
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
27 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29 * USE OR OTHER DEALINGS IN THE SOFTWARE.
41 #include "via_3d_reg.h"
43 #define CMDBUF_ALIGNMENT_SIZE (0x100)
44 #define CMDBUF_ALIGNMENT_MASK (0x0ff)
46 /* defines for VIA 3D registers */
47 #define VIA_REG_STATUS 0x400
48 #define VIA_REG_TRANSET 0x43C
49 #define VIA_REG_TRANSPACE 0x440
51 /* VIA_REG_STATUS(0x400): Engine Status */
52 #define VIA_CMD_RGTR_BUSY 0x00000080 /* Command Regulator is busy */
53 #define VIA_2D_ENG_BUSY 0x00000001 /* 2D Engine is busy */
54 #define VIA_3D_ENG_BUSY 0x00000002 /* 3D Engine is busy */
55 #define VIA_VR_QUEUE_BUSY 0x00020000 /* Virtual Queue is busy */
57 #define SetReg2DAGP(nReg, nData) { \
58 *((uint32_t *)(vb)) = ((nReg) >> 2) | HALCYON_HEADER1; \
59 *((uint32_t *)(vb) + 1) = (nData); \
60 vb = ((uint32_t *)vb) + 2; \
61 dev_priv->dma_low +=8; \
64 #define via_flush_write_combine() DRM_MEMORYBARRIER()
66 #define VIA_OUT_RING_QW(w1,w2) \
69 dev_priv->dma_low += 8;
71 static void via_cmdbuf_start(drm_via_private_t * dev_priv);
72 static void via_cmdbuf_pause(drm_via_private_t * dev_priv);
73 static void via_cmdbuf_reset(drm_via_private_t * dev_priv);
74 static void via_cmdbuf_rewind(drm_via_private_t * dev_priv);
75 static int via_wait_idle(drm_via_private_t * dev_priv);
76 static void via_pad_cache(drm_via_private_t * dev_priv, int qwords);
79 * Free space in command buffer.
82 static uint32_t via_cmdbuf_space(drm_via_private_t * dev_priv)
84 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
85 uint32_t hw_addr = *(dev_priv->hw_addr_ptr) - agp_base;
87 return ((hw_addr <= dev_priv->dma_low) ?
88 (dev_priv->dma_high + hw_addr - dev_priv->dma_low) :
89 (hw_addr - dev_priv->dma_low));
93 * How much does the command regulator lag behind?
96 static uint32_t via_cmdbuf_lag(drm_via_private_t * dev_priv)
98 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
99 uint32_t hw_addr = *(dev_priv->hw_addr_ptr) - agp_base;
101 return ((hw_addr <= dev_priv->dma_low) ?
102 (dev_priv->dma_low - hw_addr) :
103 (dev_priv->dma_wrap + dev_priv->dma_low - hw_addr));
107 * Check that the given size fits in the buffer, otherwise wait.
111 via_cmdbuf_wait(drm_via_private_t * dev_priv, unsigned int size)
113 uint32_t agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
114 uint32_t cur_addr, hw_addr, next_addr;
115 volatile uint32_t *hw_addr_ptr;
117 hw_addr_ptr = dev_priv->hw_addr_ptr;
118 cur_addr = dev_priv->dma_low;
119 next_addr = cur_addr + size + 512 * 1024;
122 hw_addr = *hw_addr_ptr - agp_base;
125 ("via_cmdbuf_wait timed out hw %x cur_addr %x next_addr %x\n",
126 hw_addr, cur_addr, next_addr);
129 } while ((cur_addr < hw_addr) && (next_addr >= hw_addr));
134 * Checks whether buffer head has reach the end. Rewind the ring buffer
137 * Returns virtual pointer to ring buffer.
140 static inline uint32_t *via_check_dma(drm_via_private_t * dev_priv,
143 if ((dev_priv->dma_low + size + 4 * CMDBUF_ALIGNMENT_SIZE) >
144 dev_priv->dma_high) {
145 via_cmdbuf_rewind(dev_priv);
147 if (via_cmdbuf_wait(dev_priv, size) != 0) {
151 return (uint32_t *) (dev_priv->dma_ptr + dev_priv->dma_low);
154 int via_dma_cleanup(struct drm_device * dev)
156 if (dev->dev_private) {
157 drm_via_private_t *dev_priv =
158 (drm_via_private_t *) dev->dev_private;
160 if (dev_priv->ring.virtual_start) {
161 via_cmdbuf_reset(dev_priv);
163 drm_core_ioremapfree(&dev_priv->ring.map, dev);
164 dev_priv->ring.virtual_start = NULL;
172 static int via_initialize(struct drm_device * dev,
173 drm_via_private_t * dev_priv,
174 drm_via_dma_init_t * init)
176 if (!dev_priv || !dev_priv->mmio) {
177 DRM_ERROR("via_dma_init called before via_map_init\n");
181 if (dev_priv->ring.virtual_start != NULL) {
182 DRM_ERROR("%s called again without calling cleanup\n",
187 if (!dev->agp || !dev->agp->base) {
188 DRM_ERROR("%s called with no agp memory available\n",
193 if (dev_priv->chipset == VIA_DX9_0) {
194 DRM_ERROR("AGP DMA is not supported on this chip\n");
198 dev_priv->ring.map.offset = dev->agp->base + init->offset;
199 dev_priv->ring.map.size = init->size;
200 dev_priv->ring.map.type = 0;
201 dev_priv->ring.map.flags = 0;
202 dev_priv->ring.map.mtrr = 0;
204 drm_core_ioremap(&dev_priv->ring.map, dev);
206 if (dev_priv->ring.map.handle == NULL) {
207 via_dma_cleanup(dev);
208 DRM_ERROR("can not ioremap virtual address for"
213 dev_priv->ring.virtual_start = dev_priv->ring.map.handle;
215 dev_priv->dma_ptr = dev_priv->ring.virtual_start;
216 dev_priv->dma_low = 0;
217 dev_priv->dma_high = init->size;
218 dev_priv->dma_wrap = init->size;
219 dev_priv->dma_offset = init->offset;
220 dev_priv->last_pause_ptr = NULL;
221 dev_priv->hw_addr_ptr =
222 (volatile uint32_t *)((char *)dev_priv->mmio->handle +
223 init->reg_pause_addr);
225 via_cmdbuf_start(dev_priv);
230 static int via_dma_init(struct drm_device *dev, void *data, struct drm_file *file_priv)
232 drm_via_private_t *dev_priv = (drm_via_private_t *) dev->dev_private;
233 drm_via_dma_init_t *init = data;
236 switch (init->func) {
238 if (!DRM_SUSER(DRM_CURPROC))
241 retcode = via_initialize(dev, dev_priv, init);
243 case VIA_CLEANUP_DMA:
244 if (!DRM_SUSER(DRM_CURPROC))
247 retcode = via_dma_cleanup(dev);
249 case VIA_DMA_INITIALIZED:
250 retcode = (dev_priv->ring.virtual_start != NULL) ?
261 static int via_dispatch_cmdbuffer(struct drm_device * dev, drm_via_cmdbuffer_t * cmd)
263 drm_via_private_t *dev_priv;
267 dev_priv = (drm_via_private_t *) dev->dev_private;
269 if (dev_priv->ring.virtual_start == NULL) {
270 DRM_ERROR("%s called without initializing AGP ring buffer.\n",
275 if (cmd->size > VIA_PCI_BUF_SIZE) {
279 if (DRM_COPY_FROM_USER(dev_priv->pci_buf, cmd->buf, cmd->size))
283 * Running this function on AGP memory is dead slow. Therefore
284 * we run it on a temporary cacheable system memory buffer and
285 * copy it to AGP memory when ready.
289 via_verify_command_stream((uint32_t *) dev_priv->pci_buf,
290 cmd->size, dev, 1))) {
294 vb = via_check_dma(dev_priv, (cmd->size < 0x100) ? 0x102 : cmd->size);
299 memcpy(vb, dev_priv->pci_buf, cmd->size);
301 dev_priv->dma_low += cmd->size;
304 * Small submissions somehow stalls the CPU. (AGP cache effects?)
305 * pad to greater size.
308 if (cmd->size < 0x100)
309 via_pad_cache(dev_priv, (0x100 - cmd->size) >> 3);
310 via_cmdbuf_pause(dev_priv);
315 int via_driver_dma_quiescent(struct drm_device * dev)
317 drm_via_private_t *dev_priv = dev->dev_private;
319 if (!via_wait_idle(dev_priv)) {
325 static int via_flush_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv)
328 LOCK_TEST_WITH_RETURN(dev, file_priv);
330 return via_driver_dma_quiescent(dev);
333 static int via_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv)
335 drm_via_cmdbuffer_t *cmdbuf = data;
338 LOCK_TEST_WITH_RETURN(dev, file_priv);
340 DRM_DEBUG("via cmdbuffer, buf %p size %lu\n", cmdbuf->buf,
343 ret = via_dispatch_cmdbuffer(dev, cmdbuf);
351 static int via_dispatch_pci_cmdbuffer(struct drm_device * dev,
352 drm_via_cmdbuffer_t * cmd)
354 drm_via_private_t *dev_priv = dev->dev_private;
357 if (cmd->size > VIA_PCI_BUF_SIZE) {
360 if (DRM_COPY_FROM_USER(dev_priv->pci_buf, cmd->buf, cmd->size))
364 via_verify_command_stream((uint32_t *) dev_priv->pci_buf,
365 cmd->size, dev, 0))) {
370 via_parse_command_stream(dev, (const uint32_t *)dev_priv->pci_buf,
375 static int via_pci_cmdbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv)
377 drm_via_cmdbuffer_t *cmdbuf = data;
380 LOCK_TEST_WITH_RETURN(dev, file_priv);
382 DRM_DEBUG("via_pci_cmdbuffer, buf %p size %lu\n", cmdbuf->buf,
385 ret = via_dispatch_pci_cmdbuffer(dev, cmdbuf);
393 static inline uint32_t *via_align_buffer(drm_via_private_t * dev_priv,
394 uint32_t * vb, int qw_count)
396 for (; qw_count > 0; --qw_count) {
397 VIA_OUT_RING_QW(HC_DUMMY, HC_DUMMY);
403 * This function is used internally by ring buffer mangement code.
405 * Returns virtual pointer to ring buffer.
407 static inline uint32_t *via_get_dma(drm_via_private_t * dev_priv)
409 return (uint32_t *) (dev_priv->dma_ptr + dev_priv->dma_low);
413 * Hooks a segment of data into the tail of the ring-buffer by
414 * modifying the pause address stored in the buffer itself. If
415 * the regulator has already paused, restart it.
417 static int via_hook_segment(drm_via_private_t * dev_priv,
418 uint32_t pause_addr_hi, uint32_t pause_addr_lo,
422 volatile uint32_t *paused_at = dev_priv->last_pause_ptr;
426 via_flush_write_combine();
427 (void) *(volatile uint32_t *)(via_get_dma(dev_priv) -1);
428 *paused_at = pause_addr_lo;
429 via_flush_write_combine();
431 reader = *(dev_priv->hw_addr_ptr);
432 ptr = ((volatile char *)paused_at - dev_priv->dma_ptr) +
433 dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr + 4;
434 dev_priv->last_pause_ptr = via_get_dma(dev_priv) - 1;
436 if ((ptr - reader) <= dev_priv->dma_diff ) {
438 while (!(paused = (VIA_READ(0x41c) & 0x80000000)) && count--);
441 if (paused && !no_pci_fire) {
442 reader = *(dev_priv->hw_addr_ptr);
443 if ((ptr - reader) == dev_priv->dma_diff) {
446 * There is a concern that these writes may stall the PCI bus
447 * if the GPU is not idle. However, idling the GPU first
448 * doesn't make a difference.
451 VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16));
452 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi);
453 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo);
454 VIA_READ(VIA_REG_TRANSPACE);
460 static int via_wait_idle(drm_via_private_t * dev_priv)
462 int count = 10000000;
464 while (!(VIA_READ(VIA_REG_STATUS) & VIA_VR_QUEUE_BUSY) && count--);
466 while (count-- && (VIA_READ(VIA_REG_STATUS) &
467 (VIA_CMD_RGTR_BUSY | VIA_2D_ENG_BUSY |
472 static uint32_t *via_align_cmd(drm_via_private_t * dev_priv, uint32_t cmd_type,
473 uint32_t addr, uint32_t * cmd_addr_hi,
474 uint32_t * cmd_addr_lo, int skip_wait)
477 uint32_t cmd_addr, addr_lo, addr_hi;
479 uint32_t qw_pad_count;
482 via_cmdbuf_wait(dev_priv, 2 * CMDBUF_ALIGNMENT_SIZE);
484 vb = via_get_dma(dev_priv);
485 VIA_OUT_RING_QW(HC_HEADER2 | ((VIA_REG_TRANSET >> 2) << 12) |
486 (VIA_REG_TRANSPACE >> 2), HC_ParaType_PreCR << 16);
487 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
488 qw_pad_count = (CMDBUF_ALIGNMENT_SIZE >> 3) -
489 ((dev_priv->dma_low & CMDBUF_ALIGNMENT_MASK) >> 3);
491 cmd_addr = (addr) ? addr :
492 agp_base + dev_priv->dma_low - 8 + (qw_pad_count << 3);
493 addr_lo = ((HC_SubA_HAGPBpL << 24) | (cmd_type & HC_HAGPBpID_MASK) |
494 (cmd_addr & HC_HAGPBpL_MASK));
495 addr_hi = ((HC_SubA_HAGPBpH << 24) | (cmd_addr >> 24));
497 vb = via_align_buffer(dev_priv, vb, qw_pad_count - 1);
498 VIA_OUT_RING_QW(*cmd_addr_hi = addr_hi, *cmd_addr_lo = addr_lo);
502 static void via_cmdbuf_start(drm_via_private_t * dev_priv)
504 uint32_t pause_addr_lo, pause_addr_hi;
505 uint32_t start_addr, start_addr_lo;
506 uint32_t end_addr, end_addr_lo;
513 dev_priv->dma_low = 0;
515 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
516 start_addr = agp_base;
517 end_addr = agp_base + dev_priv->dma_high;
519 start_addr_lo = ((HC_SubA_HAGPBstL << 24) | (start_addr & 0xFFFFFF));
520 end_addr_lo = ((HC_SubA_HAGPBendL << 24) | (end_addr & 0xFFFFFF));
521 command = ((HC_SubA_HAGPCMNT << 24) | (start_addr >> 24) |
522 ((end_addr & 0xff000000) >> 16));
524 dev_priv->last_pause_ptr =
525 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0,
526 &pause_addr_hi, &pause_addr_lo, 1) - 1;
528 via_flush_write_combine();
529 (void) *(volatile uint32_t *)dev_priv->last_pause_ptr;
531 VIA_WRITE(VIA_REG_TRANSET, (HC_ParaType_PreCR << 16));
532 VIA_WRITE(VIA_REG_TRANSPACE, command);
533 VIA_WRITE(VIA_REG_TRANSPACE, start_addr_lo);
534 VIA_WRITE(VIA_REG_TRANSPACE, end_addr_lo);
536 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_hi);
537 VIA_WRITE(VIA_REG_TRANSPACE, pause_addr_lo);
538 DRM_WRITEMEMORYBARRIER();
539 VIA_WRITE(VIA_REG_TRANSPACE, command | HC_HAGPCMNT_MASK);
540 VIA_READ(VIA_REG_TRANSPACE);
542 dev_priv->dma_diff = 0;
545 while (!(VIA_READ(0x41c) & 0x80000000) && count--);
547 reader = *(dev_priv->hw_addr_ptr);
548 ptr = ((volatile char *)dev_priv->last_pause_ptr - dev_priv->dma_ptr) +
549 dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr + 4;
552 * This is the difference between where we tell the
553 * command reader to pause and where it actually pauses.
554 * This differs between hw implementation so we need to
558 dev_priv->dma_diff = ptr - reader;
561 static void via_pad_cache(drm_via_private_t * dev_priv, int qwords)
565 via_cmdbuf_wait(dev_priv, qwords + 2);
566 vb = via_get_dma(dev_priv);
567 VIA_OUT_RING_QW(HC_HEADER2, HC_ParaType_NotTex << 16);
568 via_align_buffer(dev_priv, vb, qwords);
571 static inline void via_dummy_bitblt(drm_via_private_t * dev_priv)
573 uint32_t *vb = via_get_dma(dev_priv);
574 SetReg2DAGP(0x0C, (0 | (0 << 16)));
575 SetReg2DAGP(0x10, 0 | (0 << 16));
576 SetReg2DAGP(0x0, 0x1 | 0x2000 | 0xAA000000);
579 static void via_cmdbuf_jump(drm_via_private_t * dev_priv)
582 uint32_t pause_addr_lo, pause_addr_hi;
583 uint32_t jump_addr_lo, jump_addr_hi;
584 volatile uint32_t *last_pause_ptr;
586 agp_base = dev_priv->dma_offset + (uint32_t) dev_priv->agpAddr;
587 via_align_cmd(dev_priv, HC_HAGPBpID_JUMP, 0, &jump_addr_hi,
590 dev_priv->dma_wrap = dev_priv->dma_low;
593 * Wrap command buffer to the beginning.
596 dev_priv->dma_low = 0;
597 if (via_cmdbuf_wait(dev_priv, CMDBUF_ALIGNMENT_SIZE) != 0) {
598 DRM_ERROR("via_cmdbuf_jump failed\n");
601 via_dummy_bitblt(dev_priv);
602 via_dummy_bitblt(dev_priv);
605 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0, &pause_addr_hi,
606 &pause_addr_lo, 0) - 1;
607 via_align_cmd(dev_priv, HC_HAGPBpID_PAUSE, 0, &pause_addr_hi,
610 *last_pause_ptr = pause_addr_lo;
612 via_hook_segment( dev_priv, jump_addr_hi, jump_addr_lo, 0);
616 static void via_cmdbuf_rewind(drm_via_private_t * dev_priv)
618 via_cmdbuf_jump(dev_priv);
621 static void via_cmdbuf_flush(drm_via_private_t * dev_priv, uint32_t cmd_type)
623 uint32_t pause_addr_lo, pause_addr_hi;
625 via_align_cmd(dev_priv, cmd_type, 0, &pause_addr_hi, &pause_addr_lo, 0);
626 via_hook_segment(dev_priv, pause_addr_hi, pause_addr_lo, 0);
629 static void via_cmdbuf_pause(drm_via_private_t * dev_priv)
631 via_cmdbuf_flush(dev_priv, HC_HAGPBpID_PAUSE);
634 static void via_cmdbuf_reset(drm_via_private_t * dev_priv)
636 via_cmdbuf_flush(dev_priv, HC_HAGPBpID_STOP);
637 via_wait_idle(dev_priv);
641 * User interface to the space and lag functions.
644 static int via_cmdbuf_size(struct drm_device *dev, void *data, struct drm_file *file_priv)
646 drm_via_cmdbuf_size_t *d_siz = data;
648 uint32_t tmp_size, count;
649 drm_via_private_t *dev_priv;
651 DRM_DEBUG("via cmdbuf_size\n");
652 LOCK_TEST_WITH_RETURN(dev, file_priv);
654 dev_priv = (drm_via_private_t *) dev->dev_private;
656 if (dev_priv->ring.virtual_start == NULL) {
657 DRM_ERROR("%s called without initializing AGP ring buffer.\n",
663 tmp_size = d_siz->size;
664 switch (d_siz->func) {
665 case VIA_CMDBUF_SPACE:
666 while (((tmp_size = via_cmdbuf_space(dev_priv)) < d_siz->size)
673 DRM_ERROR("VIA_CMDBUF_SPACE timed out.\n");
678 while (((tmp_size = via_cmdbuf_lag(dev_priv)) > d_siz->size)
685 DRM_ERROR("VIA_CMDBUF_LAG timed out.\n");
692 d_siz->size = tmp_size;
697 struct drm_ioctl_desc via_ioctls[] = {
698 DRM_IOCTL_DEF(DRM_VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
699 DRM_IOCTL_DEF(DRM_VIA_FREEMEM, via_mem_free, DRM_AUTH),
700 DRM_IOCTL_DEF(DRM_VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
701 DRM_IOCTL_DEF(DRM_VIA_FB_INIT, via_fb_init, DRM_AUTH|DRM_MASTER),
702 DRM_IOCTL_DEF(DRM_VIA_MAP_INIT, via_map_init, DRM_AUTH|DRM_MASTER),
703 DRM_IOCTL_DEF(DRM_VIA_DEC_FUTEX, via_decoder_futex, DRM_AUTH),
704 DRM_IOCTL_DEF(DRM_VIA_DMA_INIT, via_dma_init, DRM_AUTH),
705 DRM_IOCTL_DEF(DRM_VIA_CMDBUFFER, via_cmdbuffer, DRM_AUTH),
706 DRM_IOCTL_DEF(DRM_VIA_FLUSH, via_flush_ioctl, DRM_AUTH),
707 DRM_IOCTL_DEF(DRM_VIA_PCICMD, via_pci_cmdbuffer, DRM_AUTH),
708 DRM_IOCTL_DEF(DRM_VIA_CMDBUF_SIZE, via_cmdbuf_size, DRM_AUTH),
709 DRM_IOCTL_DEF(DRM_VIA_WAIT_IRQ, via_wait_irq, DRM_AUTH),
710 DRM_IOCTL_DEF(DRM_VIA_DMA_BLIT, via_dma_blit, DRM_AUTH),
711 DRM_IOCTL_DEF(DRM_VIA_BLIT_SYNC, via_dma_blit_sync, DRM_AUTH)
714 int via_max_ioctl = DRM_ARRAY_SIZE(via_ioctls);