2 * Dynamic DMA mapping support for AMD Hammer.
4 * Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
5 * This allows to use PCI devices that only support 32bit addresses on systems
8 * See Documentation/DMA-mapping.txt for the interface specification.
10 * Copyright 2002 Andi Kleen, SuSE Labs.
13 #include <linux/config.h>
14 #include <linux/types.h>
15 #include <linux/ctype.h>
16 #include <linux/agp_backend.h>
17 #include <linux/init.h>
19 #include <linux/string.h>
20 #include <linux/spinlock.h>
21 #include <linux/pci.h>
22 #include <linux/module.h>
23 #include <linux/topology.h>
24 #include <linux/interrupt.h>
25 #include <linux/bitops.h>
26 #include <asm/atomic.h>
29 #include <asm/pgtable.h>
30 #include <asm/proto.h>
31 #include <asm/cacheflush.h>
32 #include <asm/kdebug.h>
33 #include <asm/swiotlb.h>
36 unsigned long iommu_bus_base; /* GART remapping area (physical) */
37 static unsigned long iommu_size; /* size of remapping area bytes */
38 static unsigned long iommu_pages; /* .. and in pages */
40 u32 *iommu_gatt_base; /* Remapping table */
42 /* If this is disabled the IOMMU will use an optimized flushing strategy
43 of only flushing when an mapping is reused. With it true the GART is flushed
44 for every mapping. Problem is that doing the lazy flush seems to trigger
45 bugs with some popular PCI cards, in particular 3ware (but has been also
46 also seen with Qlogic at least). */
47 int iommu_fullflush = 1;
51 /* Allocation bitmap for the remapping area */
52 static DEFINE_SPINLOCK(iommu_bitmap_lock);
53 static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
55 static u32 gart_unmapped_entry;
58 #define GPTE_COHERENT 2
59 #define GPTE_ENCODE(x) \
60 (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
61 #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
63 #define to_pages(addr,size) \
64 (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
66 #define for_all_nb(dev) \
68 while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)
70 static struct pci_dev *northbridges[MAX_NB];
71 static u32 northbridge_flush_word[MAX_NB];
73 #define EMERGENCY_PAGES 32 /* = 128KB */
76 #define AGPEXTERN extern
81 /* backdoor interface to AGP driver */
82 AGPEXTERN int agp_memory_reserved;
83 AGPEXTERN __u32 *agp_gatt_table;
85 static unsigned long next_bit; /* protected by iommu_bitmap_lock */
86 static int need_flush; /* global flush state. set for each gart wrap */
88 static unsigned long alloc_iommu(int size)
90 unsigned long offset, flags;
92 spin_lock_irqsave(&iommu_bitmap_lock, flags);
93 offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
96 offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
99 set_bit_string(iommu_gart_bitmap, offset, size);
100 next_bit = offset+size;
101 if (next_bit >= iommu_pages) {
108 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
112 static void free_iommu(unsigned long offset, int size)
116 clear_bit(offset, iommu_gart_bitmap);
119 spin_lock_irqsave(&iommu_bitmap_lock, flags);
120 __clear_bit_string(iommu_gart_bitmap, offset, size);
121 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
125 * Use global flush state to avoid races with multiple flushers.
127 static void flush_gart(struct device *dev)
133 spin_lock_irqsave(&iommu_bitmap_lock, flags);
136 for (i = 0; i < MAX_NB; i++) {
137 if (!northbridges[i])
139 pci_write_config_dword(northbridges[i], 0x9c,
140 northbridge_flush_word[i] | 1);
144 for (i = 0; i <= max; i++) {
146 if (!northbridges[i])
148 /* Make sure the hardware actually executed the flush. */
150 pci_read_config_dword(northbridges[i], 0x9c, &w);
157 printk("nothing to flush?\n");
160 spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
165 #ifdef CONFIG_IOMMU_LEAK
167 #define SET_LEAK(x) if (iommu_leak_tab) \
168 iommu_leak_tab[x] = __builtin_return_address(0);
169 #define CLEAR_LEAK(x) if (iommu_leak_tab) \
170 iommu_leak_tab[x] = NULL;
172 /* Debugging aid for drivers that don't free their IOMMU tables */
173 static void **iommu_leak_tab;
174 static int leak_trace;
175 int iommu_leak_pages = 20;
180 if (dump || !iommu_leak_tab) return;
182 show_stack(NULL,NULL);
183 /* Very crude. dump some from the end of the table too */
184 printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
185 for (i = 0; i < iommu_leak_pages; i+=2) {
186 printk("%lu: ", iommu_pages-i);
187 printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
188 printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
194 #define CLEAR_LEAK(x)
197 static void iommu_full(struct device *dev, size_t size, int dir)
200 * Ran out of IOMMU space for this operation. This is very bad.
201 * Unfortunately the drivers cannot handle this operation properly.
202 * Return some non mapped prereserved space in the aperture and
203 * let the Northbridge deal with it. This will result in garbage
204 * in the IO operation. When the size exceeds the prereserved space
205 * memory corruption will occur or random memory will be DMAed
206 * out. Hopefully no network devices use single mappings that big.
210 "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n",
213 if (size > PAGE_SIZE*EMERGENCY_PAGES) {
214 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
215 panic("PCI-DMA: Memory would be corrupted\n");
216 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
217 panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n");
220 #ifdef CONFIG_IOMMU_LEAK
225 static inline int need_iommu(struct device *dev, unsigned long addr, size_t size)
227 u64 mask = *dev->dma_mask;
228 int high = addr + size >= mask;
235 static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
237 u64 mask = *dev->dma_mask;
238 int high = addr + size >= mask;
243 /* Map a single continuous physical area into the IOMMU.
244 * Caller needs to check if the iommu is needed and flush.
246 static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
247 size_t size, int dir)
249 unsigned long npages = to_pages(phys_mem, size);
250 unsigned long iommu_page = alloc_iommu(npages);
252 if (iommu_page == -1) {
253 if (!nonforced_iommu(dev, phys_mem, size))
255 if (panic_on_overflow)
256 panic("dma_map_area overflow %lu bytes\n", size);
257 iommu_full(dev, size, dir);
258 return bad_dma_address;
261 for (i = 0; i < npages; i++) {
262 iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
263 SET_LEAK(iommu_page + i);
264 phys_mem += PAGE_SIZE;
266 return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
269 static dma_addr_t gart_map_simple(struct device *dev, char *buf,
270 size_t size, int dir)
272 dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir);
277 /* Map a single area into the IOMMU */
278 dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
280 unsigned long phys_mem, bus;
282 BUG_ON(dir == DMA_NONE);
287 phys_mem = virt_to_phys(addr);
288 if (!need_iommu(dev, phys_mem, size))
291 bus = gart_map_simple(dev, addr, size, dir);
296 * Wrapper for pci_unmap_single working with scatterlists.
298 void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
302 for (i = 0; i < nents; i++) {
303 struct scatterlist *s = &sg[i];
304 if (!s->dma_length || !s->length)
306 dma_unmap_single(dev, s->dma_address, s->dma_length, dir);
310 /* Fallback for dma_map_sg in case of overflow */
311 static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg,
316 #ifdef CONFIG_IOMMU_DEBUG
317 printk(KERN_DEBUG "dma_map_sg overflow\n");
320 for (i = 0; i < nents; i++ ) {
321 struct scatterlist *s = &sg[i];
322 unsigned long addr = page_to_phys(s->page) + s->offset;
323 if (nonforced_iommu(dev, addr, s->length)) {
324 addr = dma_map_area(dev, addr, s->length, dir);
325 if (addr == bad_dma_address) {
327 gart_unmap_sg(dev, sg, i, dir);
329 sg[0].dma_length = 0;
333 s->dma_address = addr;
334 s->dma_length = s->length;
340 /* Map multiple scatterlist entries continuous into the first. */
341 static int __dma_map_cont(struct scatterlist *sg, int start, int stopat,
342 struct scatterlist *sout, unsigned long pages)
344 unsigned long iommu_start = alloc_iommu(pages);
345 unsigned long iommu_page = iommu_start;
348 if (iommu_start == -1)
351 for (i = start; i < stopat; i++) {
352 struct scatterlist *s = &sg[i];
353 unsigned long pages, addr;
354 unsigned long phys_addr = s->dma_address;
356 BUG_ON(i > start && s->offset);
359 sout->dma_address = iommu_bus_base;
360 sout->dma_address += iommu_page*PAGE_SIZE + s->offset;
361 sout->dma_length = s->length;
363 sout->dma_length += s->length;
367 pages = to_pages(s->offset, s->length);
369 iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
370 SET_LEAK(iommu_page);
375 BUG_ON(iommu_page - iommu_start != pages);
379 static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat,
380 struct scatterlist *sout,
381 unsigned long pages, int need)
384 BUG_ON(stopat - start != 1);
386 sout->dma_length = sg[start].length;
389 return __dma_map_cont(sg, start, stopat, sout, pages);
393 * DMA map all entries in a scatterlist.
394 * Merge chunks that have page aligned sizes into a continuous mapping.
396 int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
401 unsigned long pages = 0;
402 int need = 0, nextneed;
404 BUG_ON(dir == DMA_NONE);
413 for (i = 0; i < nents; i++) {
414 struct scatterlist *s = &sg[i];
415 dma_addr_t addr = page_to_phys(s->page) + s->offset;
416 s->dma_address = addr;
417 BUG_ON(s->length == 0);
419 nextneed = need_iommu(dev, addr, s->length);
421 /* Handle the previous not yet processed entries */
423 struct scatterlist *ps = &sg[i-1];
424 /* Can only merge when the last chunk ends on a page
425 boundary and the new one doesn't have an offset. */
426 if (!iommu_merge || !nextneed || !need || s->offset ||
427 (ps->offset + ps->length) % PAGE_SIZE) {
428 if (dma_map_cont(sg, start, i, sg+out, pages,
438 pages += to_pages(s->offset, s->length);
440 if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0)
445 sg[out].dma_length = 0;
450 gart_unmap_sg(dev, sg, nents, dir);
451 /* When it was forced or merged try again in a dumb way */
452 if (force_iommu || iommu_merge) {
453 out = dma_map_sg_nonforce(dev, sg, nents, dir);
457 if (panic_on_overflow)
458 panic("dma_map_sg: overflow on %lu pages\n", pages);
459 iommu_full(dev, pages << PAGE_SHIFT, dir);
460 for (i = 0; i < nents; i++)
461 sg[i].dma_address = bad_dma_address;
466 * Free a DMA mapping.
468 void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
469 size_t size, int direction)
471 unsigned long iommu_page;
475 if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE ||
476 dma_addr >= iommu_bus_base + iommu_size)
478 iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
479 npages = to_pages(dma_addr, size);
480 for (i = 0; i < npages; i++) {
481 iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
482 CLEAR_LEAK(iommu_page + i);
484 free_iommu(iommu_page, npages);
489 static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
493 iommu_size = aper_size;
498 a = aper + iommu_size;
499 iommu_size -= round_up(a, LARGE_PAGE_SIZE) - a;
501 if (iommu_size < 64*1024*1024)
503 "PCI-DMA: Warning: Small IOMMU %luMB. Consider increasing the AGP aperture in BIOS\n",iommu_size>>20);
508 static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
510 unsigned aper_size = 0, aper_base_32;
514 pci_read_config_dword(dev, 0x94, &aper_base_32);
515 pci_read_config_dword(dev, 0x90, &aper_order);
516 aper_order = (aper_order >> 1) & 7;
518 aper_base = aper_base_32 & 0x7fff;
521 aper_size = (32 * 1024 * 1024) << aper_order;
522 if (aper_base + aper_size >= 0xffffffff || !aper_size)
530 * Private Northbridge GATT initialization in case we cannot use the
531 * AGP driver for some reason.
533 static __init int init_k8_gatt(struct agp_kern_info *info)
537 unsigned aper_base, new_aper_base;
538 unsigned aper_size, gatt_size, new_aper_size;
540 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
541 aper_size = aper_base = info->aper_size = 0;
543 new_aper_base = read_aperture(dev, &new_aper_size);
548 aper_size = new_aper_size;
549 aper_base = new_aper_base;
551 if (aper_size != new_aper_size || aper_base != new_aper_base)
556 info->aper_base = aper_base;
557 info->aper_size = aper_size>>20;
559 gatt_size = (aper_size >> PAGE_SHIFT) * sizeof(u32);
560 gatt = (void *)__get_free_pages(GFP_KERNEL, get_order(gatt_size));
562 panic("Cannot allocate GATT table");
563 memset(gatt, 0, gatt_size);
564 agp_gatt_table = gatt;
570 gatt_reg = __pa(gatt) >> 12;
572 pci_write_config_dword(dev, 0x98, gatt_reg);
573 pci_read_config_dword(dev, 0x90, &ctl);
576 ctl &= ~((1<<4) | (1<<5));
578 pci_write_config_dword(dev, 0x90, ctl);
582 printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10);
586 /* Should not happen anymore */
587 printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n"
588 KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n");
592 extern int agp_amd64_init(void);
594 static struct dma_mapping_ops gart_dma_ops = {
595 .mapping_error = NULL,
596 .map_single = gart_map_single,
597 .map_simple = gart_map_simple,
598 .unmap_single = gart_unmap_single,
599 .sync_single_for_cpu = NULL,
600 .sync_single_for_device = NULL,
601 .sync_single_range_for_cpu = NULL,
602 .sync_single_range_for_device = NULL,
603 .sync_sg_for_cpu = NULL,
604 .sync_sg_for_device = NULL,
605 .map_sg = gart_map_sg,
606 .unmap_sg = gart_unmap_sg,
609 static int __init pci_iommu_init(void)
611 struct agp_kern_info info;
612 unsigned long aper_size;
613 unsigned long iommu_start;
615 unsigned long scratch;
618 #ifndef CONFIG_AGP_AMD64
621 /* Makefile puts PCI initialization via subsys_initcall first. */
622 /* Add other K8 AGP bridge drivers here */
624 (agp_amd64_init() < 0) ||
625 (agp_copy_info(agp_bridge, &info) < 0);
632 (!force_iommu && end_pfn <= MAX_DMA32_PFN) ||
634 (no_agp && init_k8_gatt(&info) < 0)) {
635 printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n");
636 if (end_pfn > MAX_DMA32_PFN) {
637 printk(KERN_ERR "WARNING more than 4GB of memory "
638 "but IOMMU not compiled in.\n"
639 KERN_ERR "WARNING 32bit PCI may malfunction.\n"
640 KERN_ERR "You might want to enable "
641 "CONFIG_GART_IOMMU\n");
646 printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n");
647 aper_size = info.aper_size * 1024 * 1024;
648 iommu_size = check_iommu_size(info.aper_base, aper_size);
649 iommu_pages = iommu_size >> PAGE_SHIFT;
651 iommu_gart_bitmap = (void*)__get_free_pages(GFP_KERNEL,
652 get_order(iommu_pages/8));
653 if (!iommu_gart_bitmap)
654 panic("Cannot allocate iommu bitmap\n");
655 memset(iommu_gart_bitmap, 0, iommu_pages/8);
657 #ifdef CONFIG_IOMMU_LEAK
659 iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL,
660 get_order(iommu_pages*sizeof(void *)));
662 memset(iommu_leak_tab, 0, iommu_pages * 8);
664 printk("PCI-DMA: Cannot allocate leak trace area\n");
669 * Out of IOMMU space handling.
670 * Reserve some invalid pages at the beginning of the GART.
672 set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
674 agp_memory_reserved = iommu_size;
676 "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n",
679 iommu_start = aper_size - iommu_size;
680 iommu_bus_base = info.aper_base + iommu_start;
681 bad_dma_address = iommu_bus_base;
682 iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT);
685 * Unmap the IOMMU part of the GART. The alias of the page is
686 * always mapped with cache enabled and there is no full cache
687 * coherency across the GART remapping. The unmapping avoids
688 * automatic prefetches from the CPU allocating cache lines in
689 * there. All CPU accesses are done via the direct mapping to
690 * the backing memory. The GART address is only used by PCI
693 clear_kernel_mapping((unsigned long)__va(iommu_bus_base), iommu_size);
696 * Try to workaround a bug (thanks to BenH)
697 * Set unmapped entries to a scratch page instead of 0.
698 * Any prefetches that hit unmapped entries won't get an bus abort
701 scratch = get_zeroed_page(GFP_KERNEL);
703 panic("Cannot allocate iommu scratch page");
704 gart_unmapped_entry = GPTE_ENCODE(__pa(scratch));
705 for (i = EMERGENCY_PAGES; i < iommu_pages; i++)
706 iommu_gatt_base[i] = gart_unmapped_entry;
710 int cpu = PCI_SLOT(dev->devfn) - 24;
713 northbridges[cpu] = dev;
714 pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */
715 northbridge_flush_word[cpu] = flag;
720 dma_ops = &gart_dma_ops;
725 /* Must execute after PCI subsystem */
726 fs_initcall(pci_iommu_init);
728 void gart_parse_options(char *p)
732 #ifdef CONFIG_IOMMU_LEAK
733 if (!strncmp(p,"leak",4)) {
737 if (isdigit(*p) && get_option(&p, &arg))
738 iommu_leak_pages = arg;
741 if (isdigit(*p) && get_option(&p, &arg))
743 if (!strncmp(p, "fullflush",8))
745 if (!strncmp(p, "nofullflush",11))
747 if (!strncmp(p,"noagp",5))
749 if (!strncmp(p, "noaperture",10))
751 /* duplicated from pci-dma.c */
752 if (!strncmp(p,"force",5))
753 iommu_aperture_allowed = 1;
754 if (!strncmp(p,"allowed",7))
755 iommu_aperture_allowed = 1;
756 if (!strncmp(p, "memaper", 7)) {
757 fallback_aper_force = 1;
761 if (get_option(&p, &arg))
762 fallback_aper_order = arg;