2 * Copyright (c) 2006, Intel Corporation.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
42 #define ROOT_SIZE VTD_PAGE_SIZE
43 #define CONTEXT_SIZE VTD_PAGE_SIZE
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48 #define IOAPIC_RANGE_START (0xfee00000)
49 #define IOAPIC_RANGE_END (0xfeefffff)
50 #define IOVA_START_ADDR (0x1000)
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
66 * 12-63: Context Ptr (12 - (haw-1))
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
76 return (root->val & 1);
78 static inline void set_root_present(struct root_entry *root)
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
84 root->val |= value & VTD_PAGE_MASK;
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
107 struct context_entry {
112 static inline bool context_present(struct context_entry *context)
114 return (context->lo & 1);
116 static inline void context_set_present(struct context_entry *context)
121 static inline void context_set_fault_enable(struct context_entry *context)
123 context->lo &= (((u64)-1) << 2) | 1;
126 #define CONTEXT_TT_MULTI_LEVEL 0
128 static inline void context_set_translation_type(struct context_entry *context,
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
135 static inline void context_set_address_root(struct context_entry *context,
138 context->lo |= value & VTD_PAGE_MASK;
141 static inline void context_set_address_width(struct context_entry *context,
144 context->hi |= value & 7;
147 static inline void context_set_domain_id(struct context_entry *context,
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
153 static inline void context_clear_entry(struct context_entry *context)
165 * 12-63: Host physcial address
171 static inline void dma_clear_pte(struct dma_pte *pte)
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
178 pte->val |= DMA_PTE_READ;
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
183 pte->val |= DMA_PTE_WRITE;
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188 pte->val = (pte->val & ~3) | (prot & 3);
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
193 return (pte->val & VTD_PAGE_MASK);
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198 pte->val |= (addr & VTD_PAGE_MASK);
201 static inline bool dma_pte_present(struct dma_pte *pte)
203 return (pte->val & 3) != 0;
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
209 /* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
215 int id; /* domain id */
216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
225 /* adjusted guest address width, 0 is level 2 30-bit */
228 int flags; /* flags to find out type of domain */
230 int iommu_coherency;/* indicate coherency of iommu access */
231 int iommu_count; /* reference count of iommu */
232 spinlock_t iommu_lock; /* protect iommu set in domain */
235 /* PCI domain-device relationship */
236 struct device_domain_info {
237 struct list_head link; /* link to domain siblings */
238 struct list_head global; /* link to global list */
239 u8 bus; /* PCI bus numer */
240 u8 devfn; /* PCI devfn number */
241 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain *domain; /* pointer to domain */
245 static void flush_unmaps_timeout(unsigned long data);
247 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
249 #define HIGH_WATER_MARK 250
250 struct deferred_flush_tables {
252 struct iova *iova[HIGH_WATER_MARK];
253 struct dmar_domain *domain[HIGH_WATER_MARK];
256 static struct deferred_flush_tables *deferred_flush;
258 /* bitmap for indexing intel_iommus */
259 static int g_num_of_iommus;
261 static DEFINE_SPINLOCK(async_umap_flush_lock);
262 static LIST_HEAD(unmaps_to_do);
265 static long list_size;
267 static void domain_remove_dev_info(struct dmar_domain *domain);
270 static int __initdata dmar_map_gfx = 1;
271 static int dmar_forcedac;
272 static int intel_iommu_strict;
274 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275 static DEFINE_SPINLOCK(device_domain_lock);
276 static LIST_HEAD(device_domain_list);
278 static int __init intel_iommu_setup(char *str)
283 if (!strncmp(str, "off", 3)) {
285 printk(KERN_INFO"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str, "igfx_off", 8)) {
289 "Intel-IOMMU: disable GFX device mapping\n");
290 } else if (!strncmp(str, "forcedac", 8)) {
292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
294 } else if (!strncmp(str, "strict", 6)) {
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict = 1;
300 str += strcspn(str, ",");
306 __setup("intel_iommu=", intel_iommu_setup);
308 static struct kmem_cache *iommu_domain_cache;
309 static struct kmem_cache *iommu_devinfo_cache;
310 static struct kmem_cache *iommu_iova_cache;
312 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
326 static inline void *alloc_pgtable_page(void)
331 /* trying to avoid low memory issues */
332 flags = current->flags & PF_MEMALLOC;
333 current->flags |= PF_MEMALLOC;
334 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335 current->flags &= (~PF_MEMALLOC | flags);
339 static inline void free_pgtable_page(void *vaddr)
341 free_page((unsigned long)vaddr);
344 static inline void *alloc_domain_mem(void)
346 return iommu_kmem_cache_alloc(iommu_domain_cache);
349 static void free_domain_mem(void *vaddr)
351 kmem_cache_free(iommu_domain_cache, vaddr);
354 static inline void * alloc_devinfo_mem(void)
356 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
359 static inline void free_devinfo_mem(void *vaddr)
361 kmem_cache_free(iommu_devinfo_cache, vaddr);
364 struct iova *alloc_iova_mem(void)
366 return iommu_kmem_cache_alloc(iommu_iova_cache);
369 void free_iova_mem(struct iova *iova)
371 kmem_cache_free(iommu_iova_cache, iova);
375 static inline int width_to_agaw(int width);
377 /* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
381 int iommu_calculate_agaw(struct intel_iommu *iommu)
386 sagaw = cap_sagaw(iommu->cap);
387 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
389 if (test_bit(agaw, &sagaw))
396 /* in native case, each domain is related to only one iommu */
397 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
401 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
403 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
407 return g_iommus[iommu_id];
410 /* "Coherency" capability may be different across iommus */
411 static void domain_update_iommu_coherency(struct dmar_domain *domain)
415 domain->iommu_coherency = 1;
417 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 for (; i < g_num_of_iommus; ) {
419 if (!ecap_coherent(g_iommus[i]->ecap)) {
420 domain->iommu_coherency = 0;
423 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
427 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
429 struct dmar_drhd_unit *drhd = NULL;
432 for_each_drhd_unit(drhd) {
436 for (i = 0; i < drhd->devices_cnt; i++)
437 if (drhd->devices[i]->bus->number == bus &&
438 drhd->devices[i]->devfn == devfn)
441 if (drhd->include_all)
448 static void domain_flush_cache(struct dmar_domain *domain,
449 void *addr, int size)
451 if (!domain->iommu_coherency)
452 clflush_cache_range(addr, size);
455 /* Gets context entry for a given bus and devfn */
456 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
459 struct root_entry *root;
460 struct context_entry *context;
461 unsigned long phy_addr;
464 spin_lock_irqsave(&iommu->lock, flags);
465 root = &iommu->root_entry[bus];
466 context = get_context_addr_from_root(root);
468 context = (struct context_entry *)alloc_pgtable_page();
470 spin_unlock_irqrestore(&iommu->lock, flags);
473 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
474 phy_addr = virt_to_phys((void *)context);
475 set_root_value(root, phy_addr);
476 set_root_present(root);
477 __iommu_flush_cache(iommu, root, sizeof(*root));
479 spin_unlock_irqrestore(&iommu->lock, flags);
480 return &context[devfn];
483 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
485 struct root_entry *root;
486 struct context_entry *context;
490 spin_lock_irqsave(&iommu->lock, flags);
491 root = &iommu->root_entry[bus];
492 context = get_context_addr_from_root(root);
497 ret = context_present(&context[devfn]);
499 spin_unlock_irqrestore(&iommu->lock, flags);
503 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
505 struct root_entry *root;
506 struct context_entry *context;
509 spin_lock_irqsave(&iommu->lock, flags);
510 root = &iommu->root_entry[bus];
511 context = get_context_addr_from_root(root);
513 context_clear_entry(&context[devfn]);
514 __iommu_flush_cache(iommu, &context[devfn], \
517 spin_unlock_irqrestore(&iommu->lock, flags);
520 static void free_context_table(struct intel_iommu *iommu)
522 struct root_entry *root;
525 struct context_entry *context;
527 spin_lock_irqsave(&iommu->lock, flags);
528 if (!iommu->root_entry) {
531 for (i = 0; i < ROOT_ENTRY_NR; i++) {
532 root = &iommu->root_entry[i];
533 context = get_context_addr_from_root(root);
535 free_pgtable_page(context);
537 free_pgtable_page(iommu->root_entry);
538 iommu->root_entry = NULL;
540 spin_unlock_irqrestore(&iommu->lock, flags);
543 /* page table handling */
544 #define LEVEL_STRIDE (9)
545 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
547 static inline int agaw_to_level(int agaw)
552 static inline int agaw_to_width(int agaw)
554 return 30 + agaw * LEVEL_STRIDE;
558 static inline int width_to_agaw(int width)
560 return (width - 30) / LEVEL_STRIDE;
563 static inline unsigned int level_to_offset_bits(int level)
565 return (12 + (level - 1) * LEVEL_STRIDE);
568 static inline int address_level_offset(u64 addr, int level)
570 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
573 static inline u64 level_mask(int level)
575 return ((u64)-1 << level_to_offset_bits(level));
578 static inline u64 level_size(int level)
580 return ((u64)1 << level_to_offset_bits(level));
583 static inline u64 align_to_level(u64 addr, int level)
585 return ((addr + level_size(level) - 1) & level_mask(level));
588 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
590 int addr_width = agaw_to_width(domain->agaw);
591 struct dma_pte *parent, *pte = NULL;
592 int level = agaw_to_level(domain->agaw);
596 BUG_ON(!domain->pgd);
598 addr &= (((u64)1) << addr_width) - 1;
599 parent = domain->pgd;
601 spin_lock_irqsave(&domain->mapping_lock, flags);
605 offset = address_level_offset(addr, level);
606 pte = &parent[offset];
610 if (!dma_pte_present(pte)) {
611 tmp_page = alloc_pgtable_page();
614 spin_unlock_irqrestore(&domain->mapping_lock,
618 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
619 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
621 * high level table always sets r/w, last level page
622 * table control read/write
624 dma_set_pte_readable(pte);
625 dma_set_pte_writable(pte);
626 domain_flush_cache(domain, pte, sizeof(*pte));
628 parent = phys_to_virt(dma_pte_addr(pte));
632 spin_unlock_irqrestore(&domain->mapping_lock, flags);
636 /* return address's pte at specific level */
637 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
640 struct dma_pte *parent, *pte = NULL;
641 int total = agaw_to_level(domain->agaw);
644 parent = domain->pgd;
645 while (level <= total) {
646 offset = address_level_offset(addr, total);
647 pte = &parent[offset];
651 if (!dma_pte_present(pte))
653 parent = phys_to_virt(dma_pte_addr(pte));
659 /* clear one page's page table */
660 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
662 struct dma_pte *pte = NULL;
664 /* get last level pte */
665 pte = dma_addr_level_pte(domain, addr, 1);
669 domain_flush_cache(domain, pte, sizeof(*pte));
673 /* clear last level pte, a tlb flush should be followed */
674 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
676 int addr_width = agaw_to_width(domain->agaw);
678 start &= (((u64)1) << addr_width) - 1;
679 end &= (((u64)1) << addr_width) - 1;
680 /* in case it's partial page */
681 start = PAGE_ALIGN(start);
684 /* we don't need lock here, nobody else touches the iova range */
685 while (start < end) {
686 dma_pte_clear_one(domain, start);
687 start += VTD_PAGE_SIZE;
691 /* free page table pages. last level pte should already be cleared */
692 static void dma_pte_free_pagetable(struct dmar_domain *domain,
695 int addr_width = agaw_to_width(domain->agaw);
697 int total = agaw_to_level(domain->agaw);
701 start &= (((u64)1) << addr_width) - 1;
702 end &= (((u64)1) << addr_width) - 1;
704 /* we don't need lock here, nobody else touches the iova range */
706 while (level <= total) {
707 tmp = align_to_level(start, level);
708 if (tmp >= end || (tmp + level_size(level) > end))
712 pte = dma_addr_level_pte(domain, tmp, level);
715 phys_to_virt(dma_pte_addr(pte)));
717 domain_flush_cache(domain, pte, sizeof(*pte));
719 tmp += level_size(level);
724 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
725 free_pgtable_page(domain->pgd);
731 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
733 struct root_entry *root;
736 root = (struct root_entry *)alloc_pgtable_page();
740 __iommu_flush_cache(iommu, root, ROOT_SIZE);
742 spin_lock_irqsave(&iommu->lock, flags);
743 iommu->root_entry = root;
744 spin_unlock_irqrestore(&iommu->lock, flags);
749 static void iommu_set_root_entry(struct intel_iommu *iommu)
755 addr = iommu->root_entry;
757 spin_lock_irqsave(&iommu->register_lock, flag);
758 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
760 cmd = iommu->gcmd | DMA_GCMD_SRTP;
761 writel(cmd, iommu->reg + DMAR_GCMD_REG);
763 /* Make sure hardware complete it */
764 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
765 readl, (sts & DMA_GSTS_RTPS), sts);
767 spin_unlock_irqrestore(&iommu->register_lock, flag);
770 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
775 if (!cap_rwbf(iommu->cap))
777 val = iommu->gcmd | DMA_GCMD_WBF;
779 spin_lock_irqsave(&iommu->register_lock, flag);
780 writel(val, iommu->reg + DMAR_GCMD_REG);
782 /* Make sure hardware complete it */
783 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
784 readl, (!(val & DMA_GSTS_WBFS)), val);
786 spin_unlock_irqrestore(&iommu->register_lock, flag);
789 /* return value determine if we need a write buffer flush */
790 static int __iommu_flush_context(struct intel_iommu *iommu,
791 u16 did, u16 source_id, u8 function_mask, u64 type,
792 int non_present_entry_flush)
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
803 if (non_present_entry_flush) {
804 if (!cap_caching_mode(iommu->cap))
811 case DMA_CCMD_GLOBAL_INVL:
812 val = DMA_CCMD_GLOBAL_INVL;
814 case DMA_CCMD_DOMAIN_INVL:
815 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
817 case DMA_CCMD_DEVICE_INVL:
818 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
819 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
826 spin_lock_irqsave(&iommu->register_lock, flag);
827 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
829 /* Make sure hardware complete it */
830 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
831 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
833 spin_unlock_irqrestore(&iommu->register_lock, flag);
835 /* flush context entry will implicitly flush write buffer */
839 /* return value determine if we need a write buffer flush */
840 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
841 u64 addr, unsigned int size_order, u64 type,
842 int non_present_entry_flush)
844 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
845 u64 val = 0, val_iva = 0;
849 * In the non-present entry flush case, if hardware doesn't cache
850 * non-present entry we do nothing and if hardware cache non-present
851 * entry, we flush entries of domain 0 (the domain id is used to cache
852 * any non-present entries)
854 if (non_present_entry_flush) {
855 if (!cap_caching_mode(iommu->cap))
862 case DMA_TLB_GLOBAL_FLUSH:
863 /* global flush doesn't need set IVA_REG */
864 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
866 case DMA_TLB_DSI_FLUSH:
867 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
869 case DMA_TLB_PSI_FLUSH:
870 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
871 /* Note: always flush non-leaf currently */
872 val_iva = size_order | addr;
877 /* Note: set drain read/write */
880 * This is probably to be super secure.. Looks like we can
881 * ignore it without any impact.
883 if (cap_read_drain(iommu->cap))
884 val |= DMA_TLB_READ_DRAIN;
886 if (cap_write_drain(iommu->cap))
887 val |= DMA_TLB_WRITE_DRAIN;
889 spin_lock_irqsave(&iommu->register_lock, flag);
890 /* Note: Only uses first TLB reg currently */
892 dmar_writeq(iommu->reg + tlb_offset, val_iva);
893 dmar_writeq(iommu->reg + tlb_offset + 8, val);
895 /* Make sure hardware complete it */
896 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
897 dmar_readq, (!(val & DMA_TLB_IVT)), val);
899 spin_unlock_irqrestore(&iommu->register_lock, flag);
901 /* check IOTLB invalidation granularity */
902 if (DMA_TLB_IAIG(val) == 0)
903 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
904 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
905 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
906 (unsigned long long)DMA_TLB_IIRG(type),
907 (unsigned long long)DMA_TLB_IAIG(val));
908 /* flush iotlb entry will implicitly flush write buffer */
912 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
913 u64 addr, unsigned int pages, int non_present_entry_flush)
917 BUG_ON(addr & (~VTD_PAGE_MASK));
920 /* Fallback to domain selective flush if no PSI support */
921 if (!cap_pgsel_inv(iommu->cap))
922 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
924 non_present_entry_flush);
927 * PSI requires page size to be 2 ^ x, and the base address is naturally
928 * aligned to the size
930 mask = ilog2(__roundup_pow_of_two(pages));
931 /* Fallback to domain selective flush if size is too big */
932 if (mask > cap_max_amask_val(iommu->cap))
933 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
934 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
936 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
938 non_present_entry_flush);
941 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
946 spin_lock_irqsave(&iommu->register_lock, flags);
947 pmen = readl(iommu->reg + DMAR_PMEN_REG);
948 pmen &= ~DMA_PMEN_EPM;
949 writel(pmen, iommu->reg + DMAR_PMEN_REG);
951 /* wait for the protected region status bit to clear */
952 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
953 readl, !(pmen & DMA_PMEN_PRS), pmen);
955 spin_unlock_irqrestore(&iommu->register_lock, flags);
958 static int iommu_enable_translation(struct intel_iommu *iommu)
963 spin_lock_irqsave(&iommu->register_lock, flags);
964 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968 readl, (sts & DMA_GSTS_TES), sts);
970 iommu->gcmd |= DMA_GCMD_TE;
971 spin_unlock_irqrestore(&iommu->register_lock, flags);
975 static int iommu_disable_translation(struct intel_iommu *iommu)
980 spin_lock_irqsave(&iommu->register_lock, flag);
981 iommu->gcmd &= ~DMA_GCMD_TE;
982 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
986 readl, (!(sts & DMA_GSTS_TES)), sts);
988 spin_unlock_irqrestore(&iommu->register_lock, flag);
992 /* iommu interrupt handling. Most stuff are MSI-like. */
994 static const char *fault_reason_strings[] =
997 "Present bit in root entry is clear",
998 "Present bit in context entry is clear",
999 "Invalid context entry",
1000 "Access beyond MGAW",
1001 "PTE Write access is not set",
1002 "PTE Read access is not set",
1003 "Next page table ptr is invalid",
1004 "Root table address invalid",
1005 "Context table ptr is invalid",
1006 "non-zero reserved fields in RTP",
1007 "non-zero reserved fields in CTP",
1008 "non-zero reserved fields in PTE",
1010 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1012 const char *dmar_get_fault_reason(u8 fault_reason)
1014 if (fault_reason > MAX_FAULT_REASON_IDX)
1017 return fault_reason_strings[fault_reason];
1020 void dmar_msi_unmask(unsigned int irq)
1022 struct intel_iommu *iommu = get_irq_data(irq);
1026 spin_lock_irqsave(&iommu->register_lock, flag);
1027 writel(0, iommu->reg + DMAR_FECTL_REG);
1028 /* Read a reg to force flush the post write */
1029 readl(iommu->reg + DMAR_FECTL_REG);
1030 spin_unlock_irqrestore(&iommu->register_lock, flag);
1033 void dmar_msi_mask(unsigned int irq)
1036 struct intel_iommu *iommu = get_irq_data(irq);
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1046 void dmar_msi_write(int irq, struct msi_msg *msg)
1048 struct intel_iommu *iommu = get_irq_data(irq);
1051 spin_lock_irqsave(&iommu->register_lock, flag);
1052 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1053 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1054 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1055 spin_unlock_irqrestore(&iommu->register_lock, flag);
1058 void dmar_msi_read(int irq, struct msi_msg *msg)
1060 struct intel_iommu *iommu = get_irq_data(irq);
1063 spin_lock_irqsave(&iommu->register_lock, flag);
1064 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1065 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1066 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1067 spin_unlock_irqrestore(&iommu->register_lock, flag);
1070 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1071 u8 fault_reason, u16 source_id, unsigned long long addr)
1075 reason = dmar_get_fault_reason(fault_reason);
1078 "DMAR:[%s] Request device [%02x:%02x.%d] "
1079 "fault addr %llx \n"
1080 "DMAR:[fault reason %02d] %s\n",
1081 (type ? "DMA Read" : "DMA Write"),
1082 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1083 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1087 #define PRIMARY_FAULT_REG_LEN (16)
1088 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1090 struct intel_iommu *iommu = dev_id;
1091 int reg, fault_index;
1095 spin_lock_irqsave(&iommu->register_lock, flag);
1096 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1098 /* TBD: ignore advanced fault log currently */
1099 if (!(fault_status & DMA_FSTS_PPF))
1100 goto clear_overflow;
1102 fault_index = dma_fsts_fault_record_index(fault_status);
1103 reg = cap_fault_reg_offset(iommu->cap);
1111 /* highest 32 bits */
1112 data = readl(iommu->reg + reg +
1113 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1114 if (!(data & DMA_FRCD_F))
1117 fault_reason = dma_frcd_fault_reason(data);
1118 type = dma_frcd_type(data);
1120 data = readl(iommu->reg + reg +
1121 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1122 source_id = dma_frcd_source_id(data);
1124 guest_addr = dmar_readq(iommu->reg + reg +
1125 fault_index * PRIMARY_FAULT_REG_LEN);
1126 guest_addr = dma_frcd_page_addr(guest_addr);
1127 /* clear the fault */
1128 writel(DMA_FRCD_F, iommu->reg + reg +
1129 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1131 spin_unlock_irqrestore(&iommu->register_lock, flag);
1133 iommu_page_fault_do_one(iommu, type, fault_reason,
1134 source_id, guest_addr);
1137 if (fault_index > cap_num_fault_regs(iommu->cap))
1139 spin_lock_irqsave(&iommu->register_lock, flag);
1142 /* clear primary fault overflow */
1143 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1144 if (fault_status & DMA_FSTS_PFO)
1145 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1147 spin_unlock_irqrestore(&iommu->register_lock, flag);
1151 int dmar_set_interrupt(struct intel_iommu *iommu)
1157 printk(KERN_ERR "IOMMU: no free vectors\n");
1161 set_irq_data(irq, iommu);
1164 ret = arch_setup_dmar_msi(irq);
1166 set_irq_data(irq, NULL);
1172 /* Force fault register is cleared */
1173 iommu_page_fault(irq, iommu);
1175 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1177 printk(KERN_ERR "IOMMU: can't request irq\n");
1181 static int iommu_init_domains(struct intel_iommu *iommu)
1183 unsigned long ndomains;
1184 unsigned long nlongs;
1186 ndomains = cap_ndoms(iommu->cap);
1187 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1188 nlongs = BITS_TO_LONGS(ndomains);
1190 /* TBD: there might be 64K domains,
1191 * consider other allocation for future chip
1193 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1194 if (!iommu->domain_ids) {
1195 printk(KERN_ERR "Allocating domain id array failed\n");
1198 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1200 if (!iommu->domains) {
1201 printk(KERN_ERR "Allocating domain array failed\n");
1202 kfree(iommu->domain_ids);
1206 spin_lock_init(&iommu->lock);
1209 * if Caching mode is set, then invalid translations are tagged
1210 * with domainid 0. Hence we need to pre-allocate it.
1212 if (cap_caching_mode(iommu->cap))
1213 set_bit(0, iommu->domain_ids);
1218 static void domain_exit(struct dmar_domain *domain);
1220 void free_dmar_iommu(struct intel_iommu *iommu)
1222 struct dmar_domain *domain;
1224 unsigned long flags;
1226 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1227 for (; i < cap_ndoms(iommu->cap); ) {
1228 domain = iommu->domains[i];
1229 clear_bit(i, iommu->domain_ids);
1231 spin_lock_irqsave(&domain->iommu_lock, flags);
1232 if (--domain->iommu_count == 0)
1233 domain_exit(domain);
1234 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1236 i = find_next_bit(iommu->domain_ids,
1237 cap_ndoms(iommu->cap), i+1);
1240 if (iommu->gcmd & DMA_GCMD_TE)
1241 iommu_disable_translation(iommu);
1244 set_irq_data(iommu->irq, NULL);
1245 /* This will mask the irq */
1246 free_irq(iommu->irq, iommu);
1247 destroy_irq(iommu->irq);
1250 kfree(iommu->domains);
1251 kfree(iommu->domain_ids);
1253 g_iommus[iommu->seq_id] = NULL;
1255 /* if all iommus are freed, free g_iommus */
1256 for (i = 0; i < g_num_of_iommus; i++) {
1261 if (i == g_num_of_iommus)
1264 /* free context mapping */
1265 free_context_table(iommu);
1268 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1271 unsigned long ndomains;
1272 struct dmar_domain *domain;
1273 unsigned long flags;
1275 domain = alloc_domain_mem();
1279 ndomains = cap_ndoms(iommu->cap);
1281 spin_lock_irqsave(&iommu->lock, flags);
1282 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1283 if (num >= ndomains) {
1284 spin_unlock_irqrestore(&iommu->lock, flags);
1285 free_domain_mem(domain);
1286 printk(KERN_ERR "IOMMU: no free domain ids\n");
1290 set_bit(num, iommu->domain_ids);
1292 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1293 set_bit(iommu->seq_id, &domain->iommu_bmp);
1295 iommu->domains[num] = domain;
1296 spin_unlock_irqrestore(&iommu->lock, flags);
1301 static void iommu_free_domain(struct dmar_domain *domain)
1303 unsigned long flags;
1304 struct intel_iommu *iommu;
1306 iommu = domain_get_iommu(domain);
1308 spin_lock_irqsave(&iommu->lock, flags);
1309 clear_bit(domain->id, iommu->domain_ids);
1310 spin_unlock_irqrestore(&iommu->lock, flags);
1313 static struct iova_domain reserved_iova_list;
1314 static struct lock_class_key reserved_alloc_key;
1315 static struct lock_class_key reserved_rbtree_key;
1317 static void dmar_init_reserved_ranges(void)
1319 struct pci_dev *pdev = NULL;
1324 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1326 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1327 &reserved_alloc_key);
1328 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1329 &reserved_rbtree_key);
1331 /* IOAPIC ranges shouldn't be accessed by DMA */
1332 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1333 IOVA_PFN(IOAPIC_RANGE_END));
1335 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1337 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1338 for_each_pci_dev(pdev) {
1341 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1342 r = &pdev->resource[i];
1343 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1347 size = r->end - addr;
1348 size = PAGE_ALIGN(size);
1349 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1350 IOVA_PFN(size + addr) - 1);
1352 printk(KERN_ERR "Reserve iova failed\n");
1358 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1360 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1363 static inline int guestwidth_to_adjustwidth(int gaw)
1366 int r = (gaw - 12) % 9;
1377 static int domain_init(struct dmar_domain *domain, int guest_width)
1379 struct intel_iommu *iommu;
1380 int adjust_width, agaw;
1381 unsigned long sagaw;
1383 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1384 spin_lock_init(&domain->mapping_lock);
1385 spin_lock_init(&domain->iommu_lock);
1387 domain_reserve_special_ranges(domain);
1389 /* calculate AGAW */
1390 iommu = domain_get_iommu(domain);
1391 if (guest_width > cap_mgaw(iommu->cap))
1392 guest_width = cap_mgaw(iommu->cap);
1393 domain->gaw = guest_width;
1394 adjust_width = guestwidth_to_adjustwidth(guest_width);
1395 agaw = width_to_agaw(adjust_width);
1396 sagaw = cap_sagaw(iommu->cap);
1397 if (!test_bit(agaw, &sagaw)) {
1398 /* hardware doesn't support it, choose a bigger one */
1399 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1400 agaw = find_next_bit(&sagaw, 5, agaw);
1404 domain->agaw = agaw;
1405 INIT_LIST_HEAD(&domain->devices);
1407 if (ecap_coherent(iommu->ecap))
1408 domain->iommu_coherency = 1;
1410 domain->iommu_coherency = 0;
1412 domain->iommu_count = 1;
1414 /* always allocate the top pgd */
1415 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1418 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1422 static void domain_exit(struct dmar_domain *domain)
1426 /* Domain 0 is reserved, so dont process it */
1430 domain_remove_dev_info(domain);
1432 put_iova_domain(&domain->iovad);
1433 end = DOMAIN_MAX_ADDR(domain->gaw);
1434 end = end & (~PAGE_MASK);
1437 dma_pte_clear_range(domain, 0, end);
1439 /* free page tables */
1440 dma_pte_free_pagetable(domain, 0, end);
1442 iommu_free_domain(domain);
1443 free_domain_mem(domain);
1446 static int domain_context_mapping_one(struct dmar_domain *domain,
1449 struct context_entry *context;
1450 unsigned long flags;
1451 struct intel_iommu *iommu;
1453 pr_debug("Set context mapping for %02x:%02x.%d\n",
1454 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1455 BUG_ON(!domain->pgd);
1457 iommu = device_to_iommu(bus, devfn);
1461 context = device_to_context_entry(iommu, bus, devfn);
1464 spin_lock_irqsave(&iommu->lock, flags);
1465 if (context_present(context)) {
1466 spin_unlock_irqrestore(&iommu->lock, flags);
1470 context_set_domain_id(context, domain->id);
1471 context_set_address_width(context, domain->agaw);
1472 context_set_address_root(context, virt_to_phys(domain->pgd));
1473 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1474 context_set_fault_enable(context);
1475 context_set_present(context);
1476 domain_flush_cache(domain, context, sizeof(*context));
1478 /* it's a non-present to present mapping */
1479 if (iommu->flush.flush_context(iommu, domain->id,
1480 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1481 DMA_CCMD_DEVICE_INVL, 1))
1482 iommu_flush_write_buffer(iommu);
1484 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1486 spin_unlock_irqrestore(&iommu->lock, flags);
1488 spin_lock_irqsave(&domain->iommu_lock, flags);
1489 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1490 domain->iommu_count++;
1491 domain_update_iommu_coherency(domain);
1493 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1498 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1501 struct pci_dev *tmp, *parent;
1503 ret = domain_context_mapping_one(domain, pdev->bus->number,
1508 /* dependent device mapping */
1509 tmp = pci_find_upstream_pcie_bridge(pdev);
1512 /* Secondary interface's bus number and devfn 0 */
1513 parent = pdev->bus->self;
1514 while (parent != tmp) {
1515 ret = domain_context_mapping_one(domain, parent->bus->number,
1519 parent = parent->bus->self;
1521 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1522 return domain_context_mapping_one(domain,
1523 tmp->subordinate->number, 0);
1524 else /* this is a legacy PCI bridge */
1525 return domain_context_mapping_one(domain,
1526 tmp->bus->number, tmp->devfn);
1529 static int domain_context_mapped(struct pci_dev *pdev)
1532 struct pci_dev *tmp, *parent;
1533 struct intel_iommu *iommu;
1535 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1539 ret = device_context_mapped(iommu,
1540 pdev->bus->number, pdev->devfn);
1543 /* dependent device mapping */
1544 tmp = pci_find_upstream_pcie_bridge(pdev);
1547 /* Secondary interface's bus number and devfn 0 */
1548 parent = pdev->bus->self;
1549 while (parent != tmp) {
1550 ret = device_context_mapped(iommu, parent->bus->number,
1554 parent = parent->bus->self;
1557 return device_context_mapped(iommu,
1558 tmp->subordinate->number, 0);
1560 return device_context_mapped(iommu,
1561 tmp->bus->number, tmp->devfn);
1565 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1566 u64 hpa, size_t size, int prot)
1568 u64 start_pfn, end_pfn;
1569 struct dma_pte *pte;
1571 int addr_width = agaw_to_width(domain->agaw);
1573 hpa &= (((u64)1) << addr_width) - 1;
1575 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1578 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1579 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1581 while (start_pfn < end_pfn) {
1582 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1585 /* We don't need lock here, nobody else
1586 * touches the iova range
1588 BUG_ON(dma_pte_addr(pte));
1589 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1590 dma_set_pte_prot(pte, prot);
1591 domain_flush_cache(domain, pte, sizeof(*pte));
1598 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1603 clear_context_table(iommu, bus, devfn);
1604 iommu->flush.flush_context(iommu, 0, 0, 0,
1605 DMA_CCMD_GLOBAL_INVL, 0);
1606 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1607 DMA_TLB_GLOBAL_FLUSH, 0);
1610 static void domain_remove_dev_info(struct dmar_domain *domain)
1612 struct device_domain_info *info;
1613 unsigned long flags;
1614 struct intel_iommu *iommu;
1616 spin_lock_irqsave(&device_domain_lock, flags);
1617 while (!list_empty(&domain->devices)) {
1618 info = list_entry(domain->devices.next,
1619 struct device_domain_info, link);
1620 list_del(&info->link);
1621 list_del(&info->global);
1623 info->dev->dev.archdata.iommu = NULL;
1624 spin_unlock_irqrestore(&device_domain_lock, flags);
1626 iommu = device_to_iommu(info->bus, info->devfn);
1627 iommu_detach_dev(iommu, info->bus, info->devfn);
1628 free_devinfo_mem(info);
1630 spin_lock_irqsave(&device_domain_lock, flags);
1632 spin_unlock_irqrestore(&device_domain_lock, flags);
1637 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1639 static struct dmar_domain *
1640 find_domain(struct pci_dev *pdev)
1642 struct device_domain_info *info;
1644 /* No lock here, assumes no domain exit in normal case */
1645 info = pdev->dev.archdata.iommu;
1647 return info->domain;
1651 /* domain is initialized */
1652 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1654 struct dmar_domain *domain, *found = NULL;
1655 struct intel_iommu *iommu;
1656 struct dmar_drhd_unit *drhd;
1657 struct device_domain_info *info, *tmp;
1658 struct pci_dev *dev_tmp;
1659 unsigned long flags;
1660 int bus = 0, devfn = 0;
1662 domain = find_domain(pdev);
1666 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1668 if (dev_tmp->is_pcie) {
1669 bus = dev_tmp->subordinate->number;
1672 bus = dev_tmp->bus->number;
1673 devfn = dev_tmp->devfn;
1675 spin_lock_irqsave(&device_domain_lock, flags);
1676 list_for_each_entry(info, &device_domain_list, global) {
1677 if (info->bus == bus && info->devfn == devfn) {
1678 found = info->domain;
1682 spin_unlock_irqrestore(&device_domain_lock, flags);
1683 /* pcie-pci bridge already has a domain, uses it */
1690 /* Allocate new domain for the device */
1691 drhd = dmar_find_matched_drhd_unit(pdev);
1693 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1697 iommu = drhd->iommu;
1699 domain = iommu_alloc_domain(iommu);
1703 if (domain_init(domain, gaw)) {
1704 domain_exit(domain);
1708 /* register pcie-to-pci device */
1710 info = alloc_devinfo_mem();
1712 domain_exit(domain);
1716 info->devfn = devfn;
1718 info->domain = domain;
1719 /* This domain is shared by devices under p2p bridge */
1720 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1722 /* pcie-to-pci bridge already has a domain, uses it */
1724 spin_lock_irqsave(&device_domain_lock, flags);
1725 list_for_each_entry(tmp, &device_domain_list, global) {
1726 if (tmp->bus == bus && tmp->devfn == devfn) {
1727 found = tmp->domain;
1732 free_devinfo_mem(info);
1733 domain_exit(domain);
1736 list_add(&info->link, &domain->devices);
1737 list_add(&info->global, &device_domain_list);
1739 spin_unlock_irqrestore(&device_domain_lock, flags);
1743 info = alloc_devinfo_mem();
1746 info->bus = pdev->bus->number;
1747 info->devfn = pdev->devfn;
1749 info->domain = domain;
1750 spin_lock_irqsave(&device_domain_lock, flags);
1751 /* somebody is fast */
1752 found = find_domain(pdev);
1753 if (found != NULL) {
1754 spin_unlock_irqrestore(&device_domain_lock, flags);
1755 if (found != domain) {
1756 domain_exit(domain);
1759 free_devinfo_mem(info);
1762 list_add(&info->link, &domain->devices);
1763 list_add(&info->global, &device_domain_list);
1764 pdev->dev.archdata.iommu = info;
1765 spin_unlock_irqrestore(&device_domain_lock, flags);
1768 /* recheck it here, maybe others set it */
1769 return find_domain(pdev);
1772 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1773 unsigned long long start,
1774 unsigned long long end)
1776 struct dmar_domain *domain;
1778 unsigned long long base;
1782 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1783 pci_name(pdev), start, end);
1784 /* page table init */
1785 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1789 /* The address might not be aligned */
1790 base = start & PAGE_MASK;
1792 size = PAGE_ALIGN(size);
1793 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1794 IOVA_PFN(base + size) - 1)) {
1795 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1800 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1801 size, base, pci_name(pdev));
1803 * RMRR range might have overlap with physical memory range,
1806 dma_pte_clear_range(domain, base, base + size);
1808 ret = domain_page_mapping(domain, base, base, size,
1809 DMA_PTE_READ|DMA_PTE_WRITE);
1813 /* context entry init */
1814 ret = domain_context_mapping(domain, pdev);
1818 domain_exit(domain);
1823 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1824 struct pci_dev *pdev)
1826 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1828 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1829 rmrr->end_address + 1);
1832 #ifdef CONFIG_DMAR_GFX_WA
1833 struct iommu_prepare_data {
1834 struct pci_dev *pdev;
1838 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1839 unsigned long end_pfn, void *datax)
1841 struct iommu_prepare_data *data;
1843 data = (struct iommu_prepare_data *)datax;
1845 data->ret = iommu_prepare_identity_map(data->pdev,
1846 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1851 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1854 struct iommu_prepare_data data;
1859 for_each_online_node(nid) {
1860 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1867 static void __init iommu_prepare_gfx_mapping(void)
1869 struct pci_dev *pdev = NULL;
1872 for_each_pci_dev(pdev) {
1873 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1874 !IS_GFX_DEVICE(pdev))
1876 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1878 ret = iommu_prepare_with_active_regions(pdev);
1880 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1883 #else /* !CONFIG_DMAR_GFX_WA */
1884 static inline void iommu_prepare_gfx_mapping(void)
1890 #ifdef CONFIG_DMAR_FLOPPY_WA
1891 static inline void iommu_prepare_isa(void)
1893 struct pci_dev *pdev;
1896 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1900 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1901 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1904 printk("IOMMU: Failed to create 0-64M identity map, "
1905 "floppy might not work\n");
1909 static inline void iommu_prepare_isa(void)
1913 #endif /* !CONFIG_DMAR_FLPY_WA */
1915 static int __init init_dmars(void)
1917 struct dmar_drhd_unit *drhd;
1918 struct dmar_rmrr_unit *rmrr;
1919 struct pci_dev *pdev;
1920 struct intel_iommu *iommu;
1921 int i, ret, unit = 0;
1926 * initialize and program root entry to not present
1929 for_each_drhd_unit(drhd) {
1932 * lock not needed as this is only incremented in the single
1933 * threaded kernel __init code path all other access are read
1938 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1941 printk(KERN_ERR "Allocating global iommu array failed\n");
1946 deferred_flush = kzalloc(g_num_of_iommus *
1947 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1948 if (!deferred_flush) {
1954 for_each_drhd_unit(drhd) {
1958 iommu = drhd->iommu;
1959 g_iommus[iommu->seq_id] = iommu;
1961 ret = iommu_init_domains(iommu);
1967 * we could share the same root & context tables
1968 * amoung all IOMMU's. Need to Split it later.
1970 ret = iommu_alloc_root_entry(iommu);
1972 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1977 for_each_drhd_unit(drhd) {
1981 iommu = drhd->iommu;
1982 if (dmar_enable_qi(iommu)) {
1984 * Queued Invalidate not enabled, use Register Based
1987 iommu->flush.flush_context = __iommu_flush_context;
1988 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1989 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1991 (unsigned long long)drhd->reg_base_addr);
1993 iommu->flush.flush_context = qi_flush_context;
1994 iommu->flush.flush_iotlb = qi_flush_iotlb;
1995 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1997 (unsigned long long)drhd->reg_base_addr);
2003 * for each dev attached to rmrr
2005 * locate drhd for dev, alloc domain for dev
2006 * allocate free domain
2007 * allocate page table entries for rmrr
2008 * if context not allocated for bus
2009 * allocate and init context
2010 * set present in root table for this bus
2011 * init context with domain, translation etc
2015 for_each_rmrr_units(rmrr) {
2016 for (i = 0; i < rmrr->devices_cnt; i++) {
2017 pdev = rmrr->devices[i];
2018 /* some BIOS lists non-exist devices in DMAR table */
2021 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2024 "IOMMU: mapping reserved region failed\n");
2028 iommu_prepare_gfx_mapping();
2030 iommu_prepare_isa();
2035 * global invalidate context cache
2036 * global invalidate iotlb
2037 * enable translation
2039 for_each_drhd_unit(drhd) {
2042 iommu = drhd->iommu;
2043 sprintf (iommu->name, "dmar%d", unit++);
2045 iommu_flush_write_buffer(iommu);
2047 ret = dmar_set_interrupt(iommu);
2051 iommu_set_root_entry(iommu);
2053 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2055 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2057 iommu_disable_protect_mem_regions(iommu);
2059 ret = iommu_enable_translation(iommu);
2066 for_each_drhd_unit(drhd) {
2069 iommu = drhd->iommu;
2076 static inline u64 aligned_size(u64 host_addr, size_t size)
2079 addr = (host_addr & (~PAGE_MASK)) + size;
2080 return PAGE_ALIGN(addr);
2084 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2088 /* Make sure it's in range */
2089 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2090 if (!size || (IOVA_START_ADDR + size > end))
2093 piova = alloc_iova(&domain->iovad,
2094 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2098 static struct iova *
2099 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2100 size_t size, u64 dma_mask)
2102 struct pci_dev *pdev = to_pci_dev(dev);
2103 struct iova *iova = NULL;
2105 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2106 iova = iommu_alloc_iova(domain, size, dma_mask);
2109 * First try to allocate an io virtual address in
2110 * DMA_32BIT_MASK and if that fails then try allocating
2113 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2115 iova = iommu_alloc_iova(domain, size, dma_mask);
2119 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2126 static struct dmar_domain *
2127 get_valid_domain_for_dev(struct pci_dev *pdev)
2129 struct dmar_domain *domain;
2132 domain = get_domain_for_dev(pdev,
2133 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2136 "Allocating domain for %s failed", pci_name(pdev));
2140 /* make sure context mapping is ok */
2141 if (unlikely(!domain_context_mapped(pdev))) {
2142 ret = domain_context_mapping(domain, pdev);
2145 "Domain context map for %s failed",
2154 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2155 size_t size, int dir, u64 dma_mask)
2157 struct pci_dev *pdev = to_pci_dev(hwdev);
2158 struct dmar_domain *domain;
2159 phys_addr_t start_paddr;
2163 struct intel_iommu *iommu;
2165 BUG_ON(dir == DMA_NONE);
2166 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2169 domain = get_valid_domain_for_dev(pdev);
2173 iommu = domain_get_iommu(domain);
2174 size = aligned_size((u64)paddr, size);
2176 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2180 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2183 * Check if DMAR supports zero-length reads on write only
2186 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2187 !cap_zlr(iommu->cap))
2188 prot |= DMA_PTE_READ;
2189 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2190 prot |= DMA_PTE_WRITE;
2192 * paddr - (paddr + size) might be partial page, we should map the whole
2193 * page. Note: if two part of one page are separately mapped, we
2194 * might have two guest_addr mapping to the same host paddr, but this
2195 * is not a big problem
2197 ret = domain_page_mapping(domain, start_paddr,
2198 ((u64)paddr) & PAGE_MASK, size, prot);
2202 /* it's a non-present to present mapping */
2203 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2204 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2206 iommu_flush_write_buffer(iommu);
2208 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2212 __free_iova(&domain->iovad, iova);
2213 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2214 pci_name(pdev), size, (unsigned long long)paddr, dir);
2218 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2219 size_t size, int dir)
2221 return __intel_map_single(hwdev, paddr, size, dir,
2222 to_pci_dev(hwdev)->dma_mask);
2225 static void flush_unmaps(void)
2231 /* just flush them all */
2232 for (i = 0; i < g_num_of_iommus; i++) {
2233 struct intel_iommu *iommu = g_iommus[i];
2237 if (deferred_flush[i].next) {
2238 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2239 DMA_TLB_GLOBAL_FLUSH, 0);
2240 for (j = 0; j < deferred_flush[i].next; j++) {
2241 __free_iova(&deferred_flush[i].domain[j]->iovad,
2242 deferred_flush[i].iova[j]);
2244 deferred_flush[i].next = 0;
2251 static void flush_unmaps_timeout(unsigned long data)
2253 unsigned long flags;
2255 spin_lock_irqsave(&async_umap_flush_lock, flags);
2257 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2260 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2262 unsigned long flags;
2264 struct intel_iommu *iommu;
2266 spin_lock_irqsave(&async_umap_flush_lock, flags);
2267 if (list_size == HIGH_WATER_MARK)
2270 iommu = domain_get_iommu(dom);
2271 iommu_id = iommu->seq_id;
2273 next = deferred_flush[iommu_id].next;
2274 deferred_flush[iommu_id].domain[next] = dom;
2275 deferred_flush[iommu_id].iova[next] = iova;
2276 deferred_flush[iommu_id].next++;
2279 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2283 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2286 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2289 struct pci_dev *pdev = to_pci_dev(dev);
2290 struct dmar_domain *domain;
2291 unsigned long start_addr;
2293 struct intel_iommu *iommu;
2295 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2297 domain = find_domain(pdev);
2300 iommu = domain_get_iommu(domain);
2302 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2306 start_addr = iova->pfn_lo << PAGE_SHIFT;
2307 size = aligned_size((u64)dev_addr, size);
2309 pr_debug("Device %s unmapping: %lx@%llx\n",
2310 pci_name(pdev), size, (unsigned long long)start_addr);
2312 /* clear the whole page */
2313 dma_pte_clear_range(domain, start_addr, start_addr + size);
2314 /* free page tables */
2315 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2316 if (intel_iommu_strict) {
2317 if (iommu_flush_iotlb_psi(iommu,
2318 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2319 iommu_flush_write_buffer(iommu);
2321 __free_iova(&domain->iovad, iova);
2323 add_unmap(domain, iova);
2325 * queue up the release of the unmap to save the 1/6th of the
2326 * cpu used up by the iotlb flush operation...
2331 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2332 dma_addr_t *dma_handle, gfp_t flags)
2337 size = PAGE_ALIGN(size);
2338 order = get_order(size);
2339 flags &= ~(GFP_DMA | GFP_DMA32);
2341 vaddr = (void *)__get_free_pages(flags, order);
2344 memset(vaddr, 0, size);
2346 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2348 hwdev->coherent_dma_mask);
2351 free_pages((unsigned long)vaddr, order);
2355 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2356 dma_addr_t dma_handle)
2360 size = PAGE_ALIGN(size);
2361 order = get_order(size);
2363 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2364 free_pages((unsigned long)vaddr, order);
2367 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2369 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2370 int nelems, int dir)
2373 struct pci_dev *pdev = to_pci_dev(hwdev);
2374 struct dmar_domain *domain;
2375 unsigned long start_addr;
2379 struct scatterlist *sg;
2380 struct intel_iommu *iommu;
2382 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2385 domain = find_domain(pdev);
2388 iommu = domain_get_iommu(domain);
2390 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2393 for_each_sg(sglist, sg, nelems, i) {
2394 addr = SG_ENT_VIRT_ADDRESS(sg);
2395 size += aligned_size((u64)addr, sg->length);
2398 start_addr = iova->pfn_lo << PAGE_SHIFT;
2400 /* clear the whole page */
2401 dma_pte_clear_range(domain, start_addr, start_addr + size);
2402 /* free page tables */
2403 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2405 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2406 size >> VTD_PAGE_SHIFT, 0))
2407 iommu_flush_write_buffer(iommu);
2410 __free_iova(&domain->iovad, iova);
2413 static int intel_nontranslate_map_sg(struct device *hddev,
2414 struct scatterlist *sglist, int nelems, int dir)
2417 struct scatterlist *sg;
2419 for_each_sg(sglist, sg, nelems, i) {
2420 BUG_ON(!sg_page(sg));
2421 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2422 sg->dma_length = sg->length;
2427 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2432 struct pci_dev *pdev = to_pci_dev(hwdev);
2433 struct dmar_domain *domain;
2437 struct iova *iova = NULL;
2439 struct scatterlist *sg;
2440 unsigned long start_addr;
2441 struct intel_iommu *iommu;
2443 BUG_ON(dir == DMA_NONE);
2444 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2445 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2447 domain = get_valid_domain_for_dev(pdev);
2451 iommu = domain_get_iommu(domain);
2453 for_each_sg(sglist, sg, nelems, i) {
2454 addr = SG_ENT_VIRT_ADDRESS(sg);
2455 addr = (void *)virt_to_phys(addr);
2456 size += aligned_size((u64)addr, sg->length);
2459 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2461 sglist->dma_length = 0;
2466 * Check if DMAR supports zero-length reads on write only
2469 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2470 !cap_zlr(iommu->cap))
2471 prot |= DMA_PTE_READ;
2472 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2473 prot |= DMA_PTE_WRITE;
2475 start_addr = iova->pfn_lo << PAGE_SHIFT;
2477 for_each_sg(sglist, sg, nelems, i) {
2478 addr = SG_ENT_VIRT_ADDRESS(sg);
2479 addr = (void *)virt_to_phys(addr);
2480 size = aligned_size((u64)addr, sg->length);
2481 ret = domain_page_mapping(domain, start_addr + offset,
2482 ((u64)addr) & PAGE_MASK,
2485 /* clear the page */
2486 dma_pte_clear_range(domain, start_addr,
2487 start_addr + offset);
2488 /* free page tables */
2489 dma_pte_free_pagetable(domain, start_addr,
2490 start_addr + offset);
2492 __free_iova(&domain->iovad, iova);
2495 sg->dma_address = start_addr + offset +
2496 ((u64)addr & (~PAGE_MASK));
2497 sg->dma_length = sg->length;
2501 /* it's a non-present to present mapping */
2502 if (iommu_flush_iotlb_psi(iommu, domain->id,
2503 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2504 iommu_flush_write_buffer(iommu);
2508 static struct dma_mapping_ops intel_dma_ops = {
2509 .alloc_coherent = intel_alloc_coherent,
2510 .free_coherent = intel_free_coherent,
2511 .map_single = intel_map_single,
2512 .unmap_single = intel_unmap_single,
2513 .map_sg = intel_map_sg,
2514 .unmap_sg = intel_unmap_sg,
2517 static inline int iommu_domain_cache_init(void)
2521 iommu_domain_cache = kmem_cache_create("iommu_domain",
2522 sizeof(struct dmar_domain),
2527 if (!iommu_domain_cache) {
2528 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2535 static inline int iommu_devinfo_cache_init(void)
2539 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2540 sizeof(struct device_domain_info),
2544 if (!iommu_devinfo_cache) {
2545 printk(KERN_ERR "Couldn't create devinfo cache\n");
2552 static inline int iommu_iova_cache_init(void)
2556 iommu_iova_cache = kmem_cache_create("iommu_iova",
2557 sizeof(struct iova),
2561 if (!iommu_iova_cache) {
2562 printk(KERN_ERR "Couldn't create iova cache\n");
2569 static int __init iommu_init_mempool(void)
2572 ret = iommu_iova_cache_init();
2576 ret = iommu_domain_cache_init();
2580 ret = iommu_devinfo_cache_init();
2584 kmem_cache_destroy(iommu_domain_cache);
2586 kmem_cache_destroy(iommu_iova_cache);
2591 static void __init iommu_exit_mempool(void)
2593 kmem_cache_destroy(iommu_devinfo_cache);
2594 kmem_cache_destroy(iommu_domain_cache);
2595 kmem_cache_destroy(iommu_iova_cache);
2599 static void __init init_no_remapping_devices(void)
2601 struct dmar_drhd_unit *drhd;
2603 for_each_drhd_unit(drhd) {
2604 if (!drhd->include_all) {
2606 for (i = 0; i < drhd->devices_cnt; i++)
2607 if (drhd->devices[i] != NULL)
2609 /* ignore DMAR unit if no pci devices exist */
2610 if (i == drhd->devices_cnt)
2618 for_each_drhd_unit(drhd) {
2620 if (drhd->ignored || drhd->include_all)
2623 for (i = 0; i < drhd->devices_cnt; i++)
2624 if (drhd->devices[i] &&
2625 !IS_GFX_DEVICE(drhd->devices[i]))
2628 if (i < drhd->devices_cnt)
2631 /* bypass IOMMU if it is just for gfx devices */
2633 for (i = 0; i < drhd->devices_cnt; i++) {
2634 if (!drhd->devices[i])
2636 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2641 int __init intel_iommu_init(void)
2645 if (dmar_table_init())
2648 if (dmar_dev_scope_init())
2652 * Check the need for DMA-remapping initialization now.
2653 * Above initialization will also be used by Interrupt-remapping.
2655 if (no_iommu || swiotlb || dmar_disabled)
2658 iommu_init_mempool();
2659 dmar_init_reserved_ranges();
2661 init_no_remapping_devices();
2665 printk(KERN_ERR "IOMMU: dmar init failed\n");
2666 put_iova_domain(&reserved_iova_list);
2667 iommu_exit_mempool();
2671 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2673 init_timer(&unmap_timer);
2675 dma_ops = &intel_dma_ops;
2679 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2680 struct pci_dev *pdev)
2682 struct device_domain_info *info;
2683 unsigned long flags;
2685 info = alloc_devinfo_mem();
2689 info->bus = pdev->bus->number;
2690 info->devfn = pdev->devfn;
2692 info->domain = domain;
2694 spin_lock_irqsave(&device_domain_lock, flags);
2695 list_add(&info->link, &domain->devices);
2696 list_add(&info->global, &device_domain_list);
2697 pdev->dev.archdata.iommu = info;
2698 spin_unlock_irqrestore(&device_domain_lock, flags);
2703 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2704 struct pci_dev *pdev)
2706 struct device_domain_info *info;
2707 struct intel_iommu *iommu;
2708 unsigned long flags;
2710 struct list_head *entry, *tmp;
2712 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2716 spin_lock_irqsave(&device_domain_lock, flags);
2717 list_for_each_safe(entry, tmp, &domain->devices) {
2718 info = list_entry(entry, struct device_domain_info, link);
2719 if (info->bus == pdev->bus->number &&
2720 info->devfn == pdev->devfn) {
2721 list_del(&info->link);
2722 list_del(&info->global);
2724 info->dev->dev.archdata.iommu = NULL;
2725 spin_unlock_irqrestore(&device_domain_lock, flags);
2727 iommu_detach_dev(iommu, info->bus, info->devfn);
2728 free_devinfo_mem(info);
2730 spin_lock_irqsave(&device_domain_lock, flags);
2738 /* if there is no other devices under the same iommu
2739 * owned by this domain, clear this iommu in iommu_bmp
2740 * update iommu count and coherency
2742 if (device_to_iommu(info->bus, info->devfn) == iommu)
2747 unsigned long tmp_flags;
2748 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2749 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2750 domain->iommu_count--;
2751 domain_update_iommu_coherency(domain);
2752 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2755 spin_unlock_irqrestore(&device_domain_lock, flags);
2758 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2760 struct device_domain_info *info;
2761 struct intel_iommu *iommu;
2762 unsigned long flags1, flags2;
2764 spin_lock_irqsave(&device_domain_lock, flags1);
2765 while (!list_empty(&domain->devices)) {
2766 info = list_entry(domain->devices.next,
2767 struct device_domain_info, link);
2768 list_del(&info->link);
2769 list_del(&info->global);
2771 info->dev->dev.archdata.iommu = NULL;
2773 spin_unlock_irqrestore(&device_domain_lock, flags1);
2775 iommu = device_to_iommu(info->bus, info->devfn);
2776 iommu_detach_dev(iommu, info->bus, info->devfn);
2778 /* clear this iommu in iommu_bmp, update iommu count
2781 spin_lock_irqsave(&domain->iommu_lock, flags2);
2782 if (test_and_clear_bit(iommu->seq_id,
2783 &domain->iommu_bmp)) {
2784 domain->iommu_count--;
2785 domain_update_iommu_coherency(domain);
2787 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2789 free_devinfo_mem(info);
2790 spin_lock_irqsave(&device_domain_lock, flags1);
2792 spin_unlock_irqrestore(&device_domain_lock, flags1);
2795 void intel_iommu_domain_exit(struct dmar_domain *domain)
2799 /* Domain 0 is reserved, so dont process it */
2803 end = DOMAIN_MAX_ADDR(domain->gaw);
2804 end = end & (~VTD_PAGE_MASK);
2807 dma_pte_clear_range(domain, 0, end);
2809 /* free page tables */
2810 dma_pte_free_pagetable(domain, 0, end);
2812 iommu_free_domain(domain);
2813 free_domain_mem(domain);
2815 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2817 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2819 struct dmar_drhd_unit *drhd;
2820 struct dmar_domain *domain;
2821 struct intel_iommu *iommu;
2823 drhd = dmar_find_matched_drhd_unit(pdev);
2825 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2829 iommu = drhd->iommu;
2832 "intel_iommu_domain_alloc: iommu == NULL\n");
2835 domain = iommu_alloc_domain(iommu);
2838 "intel_iommu_domain_alloc: domain == NULL\n");
2841 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2843 "intel_iommu_domain_alloc: domain_init() failed\n");
2844 intel_iommu_domain_exit(domain);
2849 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2851 int intel_iommu_context_mapping(
2852 struct dmar_domain *domain, struct pci_dev *pdev)
2855 rc = domain_context_mapping(domain, pdev);
2858 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2860 int intel_iommu_page_mapping(
2861 struct dmar_domain *domain, dma_addr_t iova,
2862 u64 hpa, size_t size, int prot)
2865 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2868 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2870 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2872 struct intel_iommu *iommu;
2874 iommu = device_to_iommu(bus, devfn);
2875 iommu_detach_dev(iommu, bus, devfn);
2877 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2879 struct dmar_domain *
2880 intel_iommu_find_domain(struct pci_dev *pdev)
2882 return find_domain(pdev);
2884 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2886 int intel_iommu_found(void)
2888 return g_num_of_iommus;
2890 EXPORT_SYMBOL_GPL(intel_iommu_found);
2892 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2894 struct dma_pte *pte;
2898 pte = addr_to_dma_pte(domain, iova);
2901 pfn = dma_pte_addr(pte);
2903 return pfn >> VTD_PAGE_SHIFT;
2905 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);