Add domain_flush_cache
[linux-2.6] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE               VTD_PAGE_SIZE
43 #define CONTEXT_SIZE            VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START      (0xfee00000)
49 #define IOAPIC_RANGE_END        (0xfeefffff)
50 #define IOVA_START_ADDR         (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN           IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN           IOVA_PFN(DMA_64BIT_MASK)
59
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
62
63 /*
64  * 0: Present
65  * 1-11: Reserved
66  * 12-63: Context Ptr (12 - (haw-1))
67  * 64-127: Reserved
68  */
69 struct root_entry {
70         u64     val;
71         u64     rsvd1;
72 };
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
75 {
76         return (root->val & 1);
77 }
78 static inline void set_root_present(struct root_entry *root)
79 {
80         root->val |= 1;
81 }
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
83 {
84         root->val |= value & VTD_PAGE_MASK;
85 }
86
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
89 {
90         return (struct context_entry *)
91                 (root_present(root)?phys_to_virt(
92                 root->val & VTD_PAGE_MASK) :
93                 NULL);
94 }
95
96 /*
97  * low 64 bits:
98  * 0: present
99  * 1: fault processing disable
100  * 2-3: translation type
101  * 12-63: address space root
102  * high 64 bits:
103  * 0-2: address width
104  * 3-6: aval
105  * 8-23: domain id
106  */
107 struct context_entry {
108         u64 lo;
109         u64 hi;
110 };
111
112 static inline bool context_present(struct context_entry *context)
113 {
114         return (context->lo & 1);
115 }
116 static inline void context_set_present(struct context_entry *context)
117 {
118         context->lo |= 1;
119 }
120
121 static inline void context_set_fault_enable(struct context_entry *context)
122 {
123         context->lo &= (((u64)-1) << 2) | 1;
124 }
125
126 #define CONTEXT_TT_MULTI_LEVEL 0
127
128 static inline void context_set_translation_type(struct context_entry *context,
129                                                 unsigned long value)
130 {
131         context->lo &= (((u64)-1) << 4) | 3;
132         context->lo |= (value & 3) << 2;
133 }
134
135 static inline void context_set_address_root(struct context_entry *context,
136                                             unsigned long value)
137 {
138         context->lo |= value & VTD_PAGE_MASK;
139 }
140
141 static inline void context_set_address_width(struct context_entry *context,
142                                              unsigned long value)
143 {
144         context->hi |= value & 7;
145 }
146
147 static inline void context_set_domain_id(struct context_entry *context,
148                                          unsigned long value)
149 {
150         context->hi |= (value & ((1 << 16) - 1)) << 8;
151 }
152
153 static inline void context_clear_entry(struct context_entry *context)
154 {
155         context->lo = 0;
156         context->hi = 0;
157 }
158
159 /*
160  * 0: readable
161  * 1: writable
162  * 2-6: reserved
163  * 7: super page
164  * 8-11: available
165  * 12-63: Host physcial address
166  */
167 struct dma_pte {
168         u64 val;
169 };
170
171 static inline void dma_clear_pte(struct dma_pte *pte)
172 {
173         pte->val = 0;
174 }
175
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
177 {
178         pte->val |= DMA_PTE_READ;
179 }
180
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
182 {
183         pte->val |= DMA_PTE_WRITE;
184 }
185
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187 {
188         pte->val = (pte->val & ~3) | (prot & 3);
189 }
190
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
192 {
193         return (pte->val & VTD_PAGE_MASK);
194 }
195
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197 {
198         pte->val |= (addr & VTD_PAGE_MASK);
199 }
200
201 static inline bool dma_pte_present(struct dma_pte *pte)
202 {
203         return (pte->val & 3) != 0;
204 }
205
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
209 /* domain represents a virtual machine, more than one devices
210  * across iommus may be owned in one domain, e.g. kvm guest.
211  */
212 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
213
214 struct dmar_domain {
215         int     id;                     /* domain id */
216         unsigned long iommu_bmp;        /* bitmap of iommus this domain uses*/
217
218         struct list_head devices;       /* all devices' list */
219         struct iova_domain iovad;       /* iova's that belong to this domain */
220
221         struct dma_pte  *pgd;           /* virtual address */
222         spinlock_t      mapping_lock;   /* page table lock */
223         int             gaw;            /* max guest address width */
224
225         /* adjusted guest address width, 0 is level 2 30-bit */
226         int             agaw;
227
228         int             flags;          /* flags to find out type of domain */
229
230         int             iommu_coherency;/* indicate coherency of iommu access */
231         int             iommu_count;    /* reference count of iommu */
232         spinlock_t      iommu_lock;     /* protect iommu set in domain */
233 };
234
235 /* PCI domain-device relationship */
236 struct device_domain_info {
237         struct list_head link;  /* link to domain siblings */
238         struct list_head global; /* link to global list */
239         u8 bus;                 /* PCI bus numer */
240         u8 devfn;               /* PCI devfn number */
241         struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242         struct dmar_domain *domain; /* pointer to domain */
243 };
244
245 static void flush_unmaps_timeout(unsigned long data);
246
247 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
248
249 #define HIGH_WATER_MARK 250
250 struct deferred_flush_tables {
251         int next;
252         struct iova *iova[HIGH_WATER_MARK];
253         struct dmar_domain *domain[HIGH_WATER_MARK];
254 };
255
256 static struct deferred_flush_tables *deferred_flush;
257
258 /* bitmap for indexing intel_iommus */
259 static int g_num_of_iommus;
260
261 static DEFINE_SPINLOCK(async_umap_flush_lock);
262 static LIST_HEAD(unmaps_to_do);
263
264 static int timer_on;
265 static long list_size;
266
267 static void domain_remove_dev_info(struct dmar_domain *domain);
268
269 int dmar_disabled;
270 static int __initdata dmar_map_gfx = 1;
271 static int dmar_forcedac;
272 static int intel_iommu_strict;
273
274 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275 static DEFINE_SPINLOCK(device_domain_lock);
276 static LIST_HEAD(device_domain_list);
277
278 static int __init intel_iommu_setup(char *str)
279 {
280         if (!str)
281                 return -EINVAL;
282         while (*str) {
283                 if (!strncmp(str, "off", 3)) {
284                         dmar_disabled = 1;
285                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
286                 } else if (!strncmp(str, "igfx_off", 8)) {
287                         dmar_map_gfx = 0;
288                         printk(KERN_INFO
289                                 "Intel-IOMMU: disable GFX device mapping\n");
290                 } else if (!strncmp(str, "forcedac", 8)) {
291                         printk(KERN_INFO
292                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
293                         dmar_forcedac = 1;
294                 } else if (!strncmp(str, "strict", 6)) {
295                         printk(KERN_INFO
296                                 "Intel-IOMMU: disable batched IOTLB flush\n");
297                         intel_iommu_strict = 1;
298                 }
299
300                 str += strcspn(str, ",");
301                 while (*str == ',')
302                         str++;
303         }
304         return 0;
305 }
306 __setup("intel_iommu=", intel_iommu_setup);
307
308 static struct kmem_cache *iommu_domain_cache;
309 static struct kmem_cache *iommu_devinfo_cache;
310 static struct kmem_cache *iommu_iova_cache;
311
312 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
313 {
314         unsigned int flags;
315         void *vaddr;
316
317         /* trying to avoid low memory issues */
318         flags = current->flags & PF_MEMALLOC;
319         current->flags |= PF_MEMALLOC;
320         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321         current->flags &= (~PF_MEMALLOC | flags);
322         return vaddr;
323 }
324
325
326 static inline void *alloc_pgtable_page(void)
327 {
328         unsigned int flags;
329         void *vaddr;
330
331         /* trying to avoid low memory issues */
332         flags = current->flags & PF_MEMALLOC;
333         current->flags |= PF_MEMALLOC;
334         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335         current->flags &= (~PF_MEMALLOC | flags);
336         return vaddr;
337 }
338
339 static inline void free_pgtable_page(void *vaddr)
340 {
341         free_page((unsigned long)vaddr);
342 }
343
344 static inline void *alloc_domain_mem(void)
345 {
346         return iommu_kmem_cache_alloc(iommu_domain_cache);
347 }
348
349 static void free_domain_mem(void *vaddr)
350 {
351         kmem_cache_free(iommu_domain_cache, vaddr);
352 }
353
354 static inline void * alloc_devinfo_mem(void)
355 {
356         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
357 }
358
359 static inline void free_devinfo_mem(void *vaddr)
360 {
361         kmem_cache_free(iommu_devinfo_cache, vaddr);
362 }
363
364 struct iova *alloc_iova_mem(void)
365 {
366         return iommu_kmem_cache_alloc(iommu_iova_cache);
367 }
368
369 void free_iova_mem(struct iova *iova)
370 {
371         kmem_cache_free(iommu_iova_cache, iova);
372 }
373
374
375 static inline int width_to_agaw(int width);
376
377 /* calculate agaw for each iommu.
378  * "SAGAW" may be different across iommus, use a default agaw, and
379  * get a supported less agaw for iommus that don't support the default agaw.
380  */
381 int iommu_calculate_agaw(struct intel_iommu *iommu)
382 {
383         unsigned long sagaw;
384         int agaw = -1;
385
386         sagaw = cap_sagaw(iommu->cap);
387         for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
388              agaw >= 0; agaw--) {
389                 if (test_bit(agaw, &sagaw))
390                         break;
391         }
392
393         return agaw;
394 }
395
396 /* in native case, each domain is related to only one iommu */
397 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
398 {
399         int iommu_id;
400
401         BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
402
403         iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
405                 return NULL;
406
407         return g_iommus[iommu_id];
408 }
409
410 /* "Coherency" capability may be different across iommus */
411 static void domain_update_iommu_coherency(struct dmar_domain *domain)
412 {
413         int i;
414
415         domain->iommu_coherency = 1;
416
417         i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418         for (; i < g_num_of_iommus; ) {
419                 if (!ecap_coherent(g_iommus[i]->ecap)) {
420                         domain->iommu_coherency = 0;
421                         break;
422                 }
423                 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
424         }
425 }
426
427 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
428 {
429         struct dmar_drhd_unit *drhd = NULL;
430         int i;
431
432         for_each_drhd_unit(drhd) {
433                 if (drhd->ignored)
434                         continue;
435
436                 for (i = 0; i < drhd->devices_cnt; i++)
437                         if (drhd->devices[i]->bus->number == bus &&
438                             drhd->devices[i]->devfn == devfn)
439                                 return drhd->iommu;
440
441                 if (drhd->include_all)
442                         return drhd->iommu;
443         }
444
445         return NULL;
446 }
447
448 static void domain_flush_cache(struct dmar_domain *domain,
449                                void *addr, int size)
450 {
451         if (!domain->iommu_coherency)
452                 clflush_cache_range(addr, size);
453 }
454
455 /* Gets context entry for a given bus and devfn */
456 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
457                 u8 bus, u8 devfn)
458 {
459         struct root_entry *root;
460         struct context_entry *context;
461         unsigned long phy_addr;
462         unsigned long flags;
463
464         spin_lock_irqsave(&iommu->lock, flags);
465         root = &iommu->root_entry[bus];
466         context = get_context_addr_from_root(root);
467         if (!context) {
468                 context = (struct context_entry *)alloc_pgtable_page();
469                 if (!context) {
470                         spin_unlock_irqrestore(&iommu->lock, flags);
471                         return NULL;
472                 }
473                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
474                 phy_addr = virt_to_phys((void *)context);
475                 set_root_value(root, phy_addr);
476                 set_root_present(root);
477                 __iommu_flush_cache(iommu, root, sizeof(*root));
478         }
479         spin_unlock_irqrestore(&iommu->lock, flags);
480         return &context[devfn];
481 }
482
483 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
484 {
485         struct root_entry *root;
486         struct context_entry *context;
487         int ret;
488         unsigned long flags;
489
490         spin_lock_irqsave(&iommu->lock, flags);
491         root = &iommu->root_entry[bus];
492         context = get_context_addr_from_root(root);
493         if (!context) {
494                 ret = 0;
495                 goto out;
496         }
497         ret = context_present(&context[devfn]);
498 out:
499         spin_unlock_irqrestore(&iommu->lock, flags);
500         return ret;
501 }
502
503 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
504 {
505         struct root_entry *root;
506         struct context_entry *context;
507         unsigned long flags;
508
509         spin_lock_irqsave(&iommu->lock, flags);
510         root = &iommu->root_entry[bus];
511         context = get_context_addr_from_root(root);
512         if (context) {
513                 context_clear_entry(&context[devfn]);
514                 __iommu_flush_cache(iommu, &context[devfn], \
515                         sizeof(*context));
516         }
517         spin_unlock_irqrestore(&iommu->lock, flags);
518 }
519
520 static void free_context_table(struct intel_iommu *iommu)
521 {
522         struct root_entry *root;
523         int i;
524         unsigned long flags;
525         struct context_entry *context;
526
527         spin_lock_irqsave(&iommu->lock, flags);
528         if (!iommu->root_entry) {
529                 goto out;
530         }
531         for (i = 0; i < ROOT_ENTRY_NR; i++) {
532                 root = &iommu->root_entry[i];
533                 context = get_context_addr_from_root(root);
534                 if (context)
535                         free_pgtable_page(context);
536         }
537         free_pgtable_page(iommu->root_entry);
538         iommu->root_entry = NULL;
539 out:
540         spin_unlock_irqrestore(&iommu->lock, flags);
541 }
542
543 /* page table handling */
544 #define LEVEL_STRIDE            (9)
545 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
546
547 static inline int agaw_to_level(int agaw)
548 {
549         return agaw + 2;
550 }
551
552 static inline int agaw_to_width(int agaw)
553 {
554         return 30 + agaw * LEVEL_STRIDE;
555
556 }
557
558 static inline int width_to_agaw(int width)
559 {
560         return (width - 30) / LEVEL_STRIDE;
561 }
562
563 static inline unsigned int level_to_offset_bits(int level)
564 {
565         return (12 + (level - 1) * LEVEL_STRIDE);
566 }
567
568 static inline int address_level_offset(u64 addr, int level)
569 {
570         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
571 }
572
573 static inline u64 level_mask(int level)
574 {
575         return ((u64)-1 << level_to_offset_bits(level));
576 }
577
578 static inline u64 level_size(int level)
579 {
580         return ((u64)1 << level_to_offset_bits(level));
581 }
582
583 static inline u64 align_to_level(u64 addr, int level)
584 {
585         return ((addr + level_size(level) - 1) & level_mask(level));
586 }
587
588 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
589 {
590         int addr_width = agaw_to_width(domain->agaw);
591         struct dma_pte *parent, *pte = NULL;
592         int level = agaw_to_level(domain->agaw);
593         int offset;
594         unsigned long flags;
595
596         BUG_ON(!domain->pgd);
597
598         addr &= (((u64)1) << addr_width) - 1;
599         parent = domain->pgd;
600
601         spin_lock_irqsave(&domain->mapping_lock, flags);
602         while (level > 0) {
603                 void *tmp_page;
604
605                 offset = address_level_offset(addr, level);
606                 pte = &parent[offset];
607                 if (level == 1)
608                         break;
609
610                 if (!dma_pte_present(pte)) {
611                         tmp_page = alloc_pgtable_page();
612
613                         if (!tmp_page) {
614                                 spin_unlock_irqrestore(&domain->mapping_lock,
615                                         flags);
616                                 return NULL;
617                         }
618                         domain_flush_cache(domain, tmp_page, PAGE_SIZE);
619                         dma_set_pte_addr(pte, virt_to_phys(tmp_page));
620                         /*
621                          * high level table always sets r/w, last level page
622                          * table control read/write
623                          */
624                         dma_set_pte_readable(pte);
625                         dma_set_pte_writable(pte);
626                         domain_flush_cache(domain, pte, sizeof(*pte));
627                 }
628                 parent = phys_to_virt(dma_pte_addr(pte));
629                 level--;
630         }
631
632         spin_unlock_irqrestore(&domain->mapping_lock, flags);
633         return pte;
634 }
635
636 /* return address's pte at specific level */
637 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
638                 int level)
639 {
640         struct dma_pte *parent, *pte = NULL;
641         int total = agaw_to_level(domain->agaw);
642         int offset;
643
644         parent = domain->pgd;
645         while (level <= total) {
646                 offset = address_level_offset(addr, total);
647                 pte = &parent[offset];
648                 if (level == total)
649                         return pte;
650
651                 if (!dma_pte_present(pte))
652                         break;
653                 parent = phys_to_virt(dma_pte_addr(pte));
654                 total--;
655         }
656         return NULL;
657 }
658
659 /* clear one page's page table */
660 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
661 {
662         struct dma_pte *pte = NULL;
663
664         /* get last level pte */
665         pte = dma_addr_level_pte(domain, addr, 1);
666
667         if (pte) {
668                 dma_clear_pte(pte);
669                 domain_flush_cache(domain, pte, sizeof(*pte));
670         }
671 }
672
673 /* clear last level pte, a tlb flush should be followed */
674 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
675 {
676         int addr_width = agaw_to_width(domain->agaw);
677
678         start &= (((u64)1) << addr_width) - 1;
679         end &= (((u64)1) << addr_width) - 1;
680         /* in case it's partial page */
681         start = PAGE_ALIGN(start);
682         end &= PAGE_MASK;
683
684         /* we don't need lock here, nobody else touches the iova range */
685         while (start < end) {
686                 dma_pte_clear_one(domain, start);
687                 start += VTD_PAGE_SIZE;
688         }
689 }
690
691 /* free page table pages. last level pte should already be cleared */
692 static void dma_pte_free_pagetable(struct dmar_domain *domain,
693         u64 start, u64 end)
694 {
695         int addr_width = agaw_to_width(domain->agaw);
696         struct dma_pte *pte;
697         int total = agaw_to_level(domain->agaw);
698         int level;
699         u64 tmp;
700
701         start &= (((u64)1) << addr_width) - 1;
702         end &= (((u64)1) << addr_width) - 1;
703
704         /* we don't need lock here, nobody else touches the iova range */
705         level = 2;
706         while (level <= total) {
707                 tmp = align_to_level(start, level);
708                 if (tmp >= end || (tmp + level_size(level) > end))
709                         return;
710
711                 while (tmp < end) {
712                         pte = dma_addr_level_pte(domain, tmp, level);
713                         if (pte) {
714                                 free_pgtable_page(
715                                         phys_to_virt(dma_pte_addr(pte)));
716                                 dma_clear_pte(pte);
717                                 domain_flush_cache(domain, pte, sizeof(*pte));
718                         }
719                         tmp += level_size(level);
720                 }
721                 level++;
722         }
723         /* free pgd */
724         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
725                 free_pgtable_page(domain->pgd);
726                 domain->pgd = NULL;
727         }
728 }
729
730 /* iommu handling */
731 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
732 {
733         struct root_entry *root;
734         unsigned long flags;
735
736         root = (struct root_entry *)alloc_pgtable_page();
737         if (!root)
738                 return -ENOMEM;
739
740         __iommu_flush_cache(iommu, root, ROOT_SIZE);
741
742         spin_lock_irqsave(&iommu->lock, flags);
743         iommu->root_entry = root;
744         spin_unlock_irqrestore(&iommu->lock, flags);
745
746         return 0;
747 }
748
749 static void iommu_set_root_entry(struct intel_iommu *iommu)
750 {
751         void *addr;
752         u32 cmd, sts;
753         unsigned long flag;
754
755         addr = iommu->root_entry;
756
757         spin_lock_irqsave(&iommu->register_lock, flag);
758         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
759
760         cmd = iommu->gcmd | DMA_GCMD_SRTP;
761         writel(cmd, iommu->reg + DMAR_GCMD_REG);
762
763         /* Make sure hardware complete it */
764         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
765                 readl, (sts & DMA_GSTS_RTPS), sts);
766
767         spin_unlock_irqrestore(&iommu->register_lock, flag);
768 }
769
770 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
771 {
772         u32 val;
773         unsigned long flag;
774
775         if (!cap_rwbf(iommu->cap))
776                 return;
777         val = iommu->gcmd | DMA_GCMD_WBF;
778
779         spin_lock_irqsave(&iommu->register_lock, flag);
780         writel(val, iommu->reg + DMAR_GCMD_REG);
781
782         /* Make sure hardware complete it */
783         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
784                         readl, (!(val & DMA_GSTS_WBFS)), val);
785
786         spin_unlock_irqrestore(&iommu->register_lock, flag);
787 }
788
789 /* return value determine if we need a write buffer flush */
790 static int __iommu_flush_context(struct intel_iommu *iommu,
791         u16 did, u16 source_id, u8 function_mask, u64 type,
792         int non_present_entry_flush)
793 {
794         u64 val = 0;
795         unsigned long flag;
796
797         /*
798          * In the non-present entry flush case, if hardware doesn't cache
799          * non-present entry we do nothing and if hardware cache non-present
800          * entry, we flush entries of domain 0 (the domain id is used to cache
801          * any non-present entries)
802          */
803         if (non_present_entry_flush) {
804                 if (!cap_caching_mode(iommu->cap))
805                         return 1;
806                 else
807                         did = 0;
808         }
809
810         switch (type) {
811         case DMA_CCMD_GLOBAL_INVL:
812                 val = DMA_CCMD_GLOBAL_INVL;
813                 break;
814         case DMA_CCMD_DOMAIN_INVL:
815                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
816                 break;
817         case DMA_CCMD_DEVICE_INVL:
818                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
819                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
820                 break;
821         default:
822                 BUG();
823         }
824         val |= DMA_CCMD_ICC;
825
826         spin_lock_irqsave(&iommu->register_lock, flag);
827         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
828
829         /* Make sure hardware complete it */
830         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
831                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
832
833         spin_unlock_irqrestore(&iommu->register_lock, flag);
834
835         /* flush context entry will implicitly flush write buffer */
836         return 0;
837 }
838
839 /* return value determine if we need a write buffer flush */
840 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
841         u64 addr, unsigned int size_order, u64 type,
842         int non_present_entry_flush)
843 {
844         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
845         u64 val = 0, val_iva = 0;
846         unsigned long flag;
847
848         /*
849          * In the non-present entry flush case, if hardware doesn't cache
850          * non-present entry we do nothing and if hardware cache non-present
851          * entry, we flush entries of domain 0 (the domain id is used to cache
852          * any non-present entries)
853          */
854         if (non_present_entry_flush) {
855                 if (!cap_caching_mode(iommu->cap))
856                         return 1;
857                 else
858                         did = 0;
859         }
860
861         switch (type) {
862         case DMA_TLB_GLOBAL_FLUSH:
863                 /* global flush doesn't need set IVA_REG */
864                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
865                 break;
866         case DMA_TLB_DSI_FLUSH:
867                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
868                 break;
869         case DMA_TLB_PSI_FLUSH:
870                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
871                 /* Note: always flush non-leaf currently */
872                 val_iva = size_order | addr;
873                 break;
874         default:
875                 BUG();
876         }
877         /* Note: set drain read/write */
878 #if 0
879         /*
880          * This is probably to be super secure.. Looks like we can
881          * ignore it without any impact.
882          */
883         if (cap_read_drain(iommu->cap))
884                 val |= DMA_TLB_READ_DRAIN;
885 #endif
886         if (cap_write_drain(iommu->cap))
887                 val |= DMA_TLB_WRITE_DRAIN;
888
889         spin_lock_irqsave(&iommu->register_lock, flag);
890         /* Note: Only uses first TLB reg currently */
891         if (val_iva)
892                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
893         dmar_writeq(iommu->reg + tlb_offset + 8, val);
894
895         /* Make sure hardware complete it */
896         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
897                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
898
899         spin_unlock_irqrestore(&iommu->register_lock, flag);
900
901         /* check IOTLB invalidation granularity */
902         if (DMA_TLB_IAIG(val) == 0)
903                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
904         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
905                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
906                         (unsigned long long)DMA_TLB_IIRG(type),
907                         (unsigned long long)DMA_TLB_IAIG(val));
908         /* flush iotlb entry will implicitly flush write buffer */
909         return 0;
910 }
911
912 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
913         u64 addr, unsigned int pages, int non_present_entry_flush)
914 {
915         unsigned int mask;
916
917         BUG_ON(addr & (~VTD_PAGE_MASK));
918         BUG_ON(pages == 0);
919
920         /* Fallback to domain selective flush if no PSI support */
921         if (!cap_pgsel_inv(iommu->cap))
922                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
923                                                 DMA_TLB_DSI_FLUSH,
924                                                 non_present_entry_flush);
925
926         /*
927          * PSI requires page size to be 2 ^ x, and the base address is naturally
928          * aligned to the size
929          */
930         mask = ilog2(__roundup_pow_of_two(pages));
931         /* Fallback to domain selective flush if size is too big */
932         if (mask > cap_max_amask_val(iommu->cap))
933                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
934                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
935
936         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
937                                         DMA_TLB_PSI_FLUSH,
938                                         non_present_entry_flush);
939 }
940
941 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
942 {
943         u32 pmen;
944         unsigned long flags;
945
946         spin_lock_irqsave(&iommu->register_lock, flags);
947         pmen = readl(iommu->reg + DMAR_PMEN_REG);
948         pmen &= ~DMA_PMEN_EPM;
949         writel(pmen, iommu->reg + DMAR_PMEN_REG);
950
951         /* wait for the protected region status bit to clear */
952         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
953                 readl, !(pmen & DMA_PMEN_PRS), pmen);
954
955         spin_unlock_irqrestore(&iommu->register_lock, flags);
956 }
957
958 static int iommu_enable_translation(struct intel_iommu *iommu)
959 {
960         u32 sts;
961         unsigned long flags;
962
963         spin_lock_irqsave(&iommu->register_lock, flags);
964         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
965
966         /* Make sure hardware complete it */
967         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968                 readl, (sts & DMA_GSTS_TES), sts);
969
970         iommu->gcmd |= DMA_GCMD_TE;
971         spin_unlock_irqrestore(&iommu->register_lock, flags);
972         return 0;
973 }
974
975 static int iommu_disable_translation(struct intel_iommu *iommu)
976 {
977         u32 sts;
978         unsigned long flag;
979
980         spin_lock_irqsave(&iommu->register_lock, flag);
981         iommu->gcmd &= ~DMA_GCMD_TE;
982         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
983
984         /* Make sure hardware complete it */
985         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
986                 readl, (!(sts & DMA_GSTS_TES)), sts);
987
988         spin_unlock_irqrestore(&iommu->register_lock, flag);
989         return 0;
990 }
991
992 /* iommu interrupt handling. Most stuff are MSI-like. */
993
994 static const char *fault_reason_strings[] =
995 {
996         "Software",
997         "Present bit in root entry is clear",
998         "Present bit in context entry is clear",
999         "Invalid context entry",
1000         "Access beyond MGAW",
1001         "PTE Write access is not set",
1002         "PTE Read access is not set",
1003         "Next page table ptr is invalid",
1004         "Root table address invalid",
1005         "Context table ptr is invalid",
1006         "non-zero reserved fields in RTP",
1007         "non-zero reserved fields in CTP",
1008         "non-zero reserved fields in PTE",
1009 };
1010 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1011
1012 const char *dmar_get_fault_reason(u8 fault_reason)
1013 {
1014         if (fault_reason > MAX_FAULT_REASON_IDX)
1015                 return "Unknown";
1016         else
1017                 return fault_reason_strings[fault_reason];
1018 }
1019
1020 void dmar_msi_unmask(unsigned int irq)
1021 {
1022         struct intel_iommu *iommu = get_irq_data(irq);
1023         unsigned long flag;
1024
1025         /* unmask it */
1026         spin_lock_irqsave(&iommu->register_lock, flag);
1027         writel(0, iommu->reg + DMAR_FECTL_REG);
1028         /* Read a reg to force flush the post write */
1029         readl(iommu->reg + DMAR_FECTL_REG);
1030         spin_unlock_irqrestore(&iommu->register_lock, flag);
1031 }
1032
1033 void dmar_msi_mask(unsigned int irq)
1034 {
1035         unsigned long flag;
1036         struct intel_iommu *iommu = get_irq_data(irq);
1037
1038         /* mask it */
1039         spin_lock_irqsave(&iommu->register_lock, flag);
1040         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1041         /* Read a reg to force flush the post write */
1042         readl(iommu->reg + DMAR_FECTL_REG);
1043         spin_unlock_irqrestore(&iommu->register_lock, flag);
1044 }
1045
1046 void dmar_msi_write(int irq, struct msi_msg *msg)
1047 {
1048         struct intel_iommu *iommu = get_irq_data(irq);
1049         unsigned long flag;
1050
1051         spin_lock_irqsave(&iommu->register_lock, flag);
1052         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1053         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1054         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1055         spin_unlock_irqrestore(&iommu->register_lock, flag);
1056 }
1057
1058 void dmar_msi_read(int irq, struct msi_msg *msg)
1059 {
1060         struct intel_iommu *iommu = get_irq_data(irq);
1061         unsigned long flag;
1062
1063         spin_lock_irqsave(&iommu->register_lock, flag);
1064         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1065         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1066         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1067         spin_unlock_irqrestore(&iommu->register_lock, flag);
1068 }
1069
1070 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1071                 u8 fault_reason, u16 source_id, unsigned long long addr)
1072 {
1073         const char *reason;
1074
1075         reason = dmar_get_fault_reason(fault_reason);
1076
1077         printk(KERN_ERR
1078                 "DMAR:[%s] Request device [%02x:%02x.%d] "
1079                 "fault addr %llx \n"
1080                 "DMAR:[fault reason %02d] %s\n",
1081                 (type ? "DMA Read" : "DMA Write"),
1082                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1083                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1084         return 0;
1085 }
1086
1087 #define PRIMARY_FAULT_REG_LEN (16)
1088 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1089 {
1090         struct intel_iommu *iommu = dev_id;
1091         int reg, fault_index;
1092         u32 fault_status;
1093         unsigned long flag;
1094
1095         spin_lock_irqsave(&iommu->register_lock, flag);
1096         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1097
1098         /* TBD: ignore advanced fault log currently */
1099         if (!(fault_status & DMA_FSTS_PPF))
1100                 goto clear_overflow;
1101
1102         fault_index = dma_fsts_fault_record_index(fault_status);
1103         reg = cap_fault_reg_offset(iommu->cap);
1104         while (1) {
1105                 u8 fault_reason;
1106                 u16 source_id;
1107                 u64 guest_addr;
1108                 int type;
1109                 u32 data;
1110
1111                 /* highest 32 bits */
1112                 data = readl(iommu->reg + reg +
1113                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1114                 if (!(data & DMA_FRCD_F))
1115                         break;
1116
1117                 fault_reason = dma_frcd_fault_reason(data);
1118                 type = dma_frcd_type(data);
1119
1120                 data = readl(iommu->reg + reg +
1121                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1122                 source_id = dma_frcd_source_id(data);
1123
1124                 guest_addr = dmar_readq(iommu->reg + reg +
1125                                 fault_index * PRIMARY_FAULT_REG_LEN);
1126                 guest_addr = dma_frcd_page_addr(guest_addr);
1127                 /* clear the fault */
1128                 writel(DMA_FRCD_F, iommu->reg + reg +
1129                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1130
1131                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1132
1133                 iommu_page_fault_do_one(iommu, type, fault_reason,
1134                                 source_id, guest_addr);
1135
1136                 fault_index++;
1137                 if (fault_index > cap_num_fault_regs(iommu->cap))
1138                         fault_index = 0;
1139                 spin_lock_irqsave(&iommu->register_lock, flag);
1140         }
1141 clear_overflow:
1142         /* clear primary fault overflow */
1143         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1144         if (fault_status & DMA_FSTS_PFO)
1145                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1146
1147         spin_unlock_irqrestore(&iommu->register_lock, flag);
1148         return IRQ_HANDLED;
1149 }
1150
1151 int dmar_set_interrupt(struct intel_iommu *iommu)
1152 {
1153         int irq, ret;
1154
1155         irq = create_irq();
1156         if (!irq) {
1157                 printk(KERN_ERR "IOMMU: no free vectors\n");
1158                 return -EINVAL;
1159         }
1160
1161         set_irq_data(irq, iommu);
1162         iommu->irq = irq;
1163
1164         ret = arch_setup_dmar_msi(irq);
1165         if (ret) {
1166                 set_irq_data(irq, NULL);
1167                 iommu->irq = 0;
1168                 destroy_irq(irq);
1169                 return 0;
1170         }
1171
1172         /* Force fault register is cleared */
1173         iommu_page_fault(irq, iommu);
1174
1175         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1176         if (ret)
1177                 printk(KERN_ERR "IOMMU: can't request irq\n");
1178         return ret;
1179 }
1180
1181 static int iommu_init_domains(struct intel_iommu *iommu)
1182 {
1183         unsigned long ndomains;
1184         unsigned long nlongs;
1185
1186         ndomains = cap_ndoms(iommu->cap);
1187         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1188         nlongs = BITS_TO_LONGS(ndomains);
1189
1190         /* TBD: there might be 64K domains,
1191          * consider other allocation for future chip
1192          */
1193         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1194         if (!iommu->domain_ids) {
1195                 printk(KERN_ERR "Allocating domain id array failed\n");
1196                 return -ENOMEM;
1197         }
1198         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1199                         GFP_KERNEL);
1200         if (!iommu->domains) {
1201                 printk(KERN_ERR "Allocating domain array failed\n");
1202                 kfree(iommu->domain_ids);
1203                 return -ENOMEM;
1204         }
1205
1206         spin_lock_init(&iommu->lock);
1207
1208         /*
1209          * if Caching mode is set, then invalid translations are tagged
1210          * with domainid 0. Hence we need to pre-allocate it.
1211          */
1212         if (cap_caching_mode(iommu->cap))
1213                 set_bit(0, iommu->domain_ids);
1214         return 0;
1215 }
1216
1217
1218 static void domain_exit(struct dmar_domain *domain);
1219
1220 void free_dmar_iommu(struct intel_iommu *iommu)
1221 {
1222         struct dmar_domain *domain;
1223         int i;
1224         unsigned long flags;
1225
1226         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1227         for (; i < cap_ndoms(iommu->cap); ) {
1228                 domain = iommu->domains[i];
1229                 clear_bit(i, iommu->domain_ids);
1230
1231                 spin_lock_irqsave(&domain->iommu_lock, flags);
1232                 if (--domain->iommu_count == 0)
1233                         domain_exit(domain);
1234                 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1235
1236                 i = find_next_bit(iommu->domain_ids,
1237                         cap_ndoms(iommu->cap), i+1);
1238         }
1239
1240         if (iommu->gcmd & DMA_GCMD_TE)
1241                 iommu_disable_translation(iommu);
1242
1243         if (iommu->irq) {
1244                 set_irq_data(iommu->irq, NULL);
1245                 /* This will mask the irq */
1246                 free_irq(iommu->irq, iommu);
1247                 destroy_irq(iommu->irq);
1248         }
1249
1250         kfree(iommu->domains);
1251         kfree(iommu->domain_ids);
1252
1253         g_iommus[iommu->seq_id] = NULL;
1254
1255         /* if all iommus are freed, free g_iommus */
1256         for (i = 0; i < g_num_of_iommus; i++) {
1257                 if (g_iommus[i])
1258                         break;
1259         }
1260
1261         if (i == g_num_of_iommus)
1262                 kfree(g_iommus);
1263
1264         /* free context mapping */
1265         free_context_table(iommu);
1266 }
1267
1268 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1269 {
1270         unsigned long num;
1271         unsigned long ndomains;
1272         struct dmar_domain *domain;
1273         unsigned long flags;
1274
1275         domain = alloc_domain_mem();
1276         if (!domain)
1277                 return NULL;
1278
1279         ndomains = cap_ndoms(iommu->cap);
1280
1281         spin_lock_irqsave(&iommu->lock, flags);
1282         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1283         if (num >= ndomains) {
1284                 spin_unlock_irqrestore(&iommu->lock, flags);
1285                 free_domain_mem(domain);
1286                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1287                 return NULL;
1288         }
1289
1290         set_bit(num, iommu->domain_ids);
1291         domain->id = num;
1292         memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1293         set_bit(iommu->seq_id, &domain->iommu_bmp);
1294         domain->flags = 0;
1295         iommu->domains[num] = domain;
1296         spin_unlock_irqrestore(&iommu->lock, flags);
1297
1298         return domain;
1299 }
1300
1301 static void iommu_free_domain(struct dmar_domain *domain)
1302 {
1303         unsigned long flags;
1304         struct intel_iommu *iommu;
1305
1306         iommu = domain_get_iommu(domain);
1307
1308         spin_lock_irqsave(&iommu->lock, flags);
1309         clear_bit(domain->id, iommu->domain_ids);
1310         spin_unlock_irqrestore(&iommu->lock, flags);
1311 }
1312
1313 static struct iova_domain reserved_iova_list;
1314 static struct lock_class_key reserved_alloc_key;
1315 static struct lock_class_key reserved_rbtree_key;
1316
1317 static void dmar_init_reserved_ranges(void)
1318 {
1319         struct pci_dev *pdev = NULL;
1320         struct iova *iova;
1321         int i;
1322         u64 addr, size;
1323
1324         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1325
1326         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1327                 &reserved_alloc_key);
1328         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1329                 &reserved_rbtree_key);
1330
1331         /* IOAPIC ranges shouldn't be accessed by DMA */
1332         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1333                 IOVA_PFN(IOAPIC_RANGE_END));
1334         if (!iova)
1335                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1336
1337         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1338         for_each_pci_dev(pdev) {
1339                 struct resource *r;
1340
1341                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1342                         r = &pdev->resource[i];
1343                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1344                                 continue;
1345                         addr = r->start;
1346                         addr &= PAGE_MASK;
1347                         size = r->end - addr;
1348                         size = PAGE_ALIGN(size);
1349                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1350                                 IOVA_PFN(size + addr) - 1);
1351                         if (!iova)
1352                                 printk(KERN_ERR "Reserve iova failed\n");
1353                 }
1354         }
1355
1356 }
1357
1358 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1359 {
1360         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1361 }
1362
1363 static inline int guestwidth_to_adjustwidth(int gaw)
1364 {
1365         int agaw;
1366         int r = (gaw - 12) % 9;
1367
1368         if (r == 0)
1369                 agaw = gaw;
1370         else
1371                 agaw = gaw + 9 - r;
1372         if (agaw > 64)
1373                 agaw = 64;
1374         return agaw;
1375 }
1376
1377 static int domain_init(struct dmar_domain *domain, int guest_width)
1378 {
1379         struct intel_iommu *iommu;
1380         int adjust_width, agaw;
1381         unsigned long sagaw;
1382
1383         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1384         spin_lock_init(&domain->mapping_lock);
1385         spin_lock_init(&domain->iommu_lock);
1386
1387         domain_reserve_special_ranges(domain);
1388
1389         /* calculate AGAW */
1390         iommu = domain_get_iommu(domain);
1391         if (guest_width > cap_mgaw(iommu->cap))
1392                 guest_width = cap_mgaw(iommu->cap);
1393         domain->gaw = guest_width;
1394         adjust_width = guestwidth_to_adjustwidth(guest_width);
1395         agaw = width_to_agaw(adjust_width);
1396         sagaw = cap_sagaw(iommu->cap);
1397         if (!test_bit(agaw, &sagaw)) {
1398                 /* hardware doesn't support it, choose a bigger one */
1399                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1400                 agaw = find_next_bit(&sagaw, 5, agaw);
1401                 if (agaw >= 5)
1402                         return -ENODEV;
1403         }
1404         domain->agaw = agaw;
1405         INIT_LIST_HEAD(&domain->devices);
1406
1407         if (ecap_coherent(iommu->ecap))
1408                 domain->iommu_coherency = 1;
1409         else
1410                 domain->iommu_coherency = 0;
1411
1412         domain->iommu_count = 1;
1413
1414         /* always allocate the top pgd */
1415         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1416         if (!domain->pgd)
1417                 return -ENOMEM;
1418         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1419         return 0;
1420 }
1421
1422 static void domain_exit(struct dmar_domain *domain)
1423 {
1424         u64 end;
1425
1426         /* Domain 0 is reserved, so dont process it */
1427         if (!domain)
1428                 return;
1429
1430         domain_remove_dev_info(domain);
1431         /* destroy iovas */
1432         put_iova_domain(&domain->iovad);
1433         end = DOMAIN_MAX_ADDR(domain->gaw);
1434         end = end & (~PAGE_MASK);
1435
1436         /* clear ptes */
1437         dma_pte_clear_range(domain, 0, end);
1438
1439         /* free page tables */
1440         dma_pte_free_pagetable(domain, 0, end);
1441
1442         iommu_free_domain(domain);
1443         free_domain_mem(domain);
1444 }
1445
1446 static int domain_context_mapping_one(struct dmar_domain *domain,
1447                 u8 bus, u8 devfn)
1448 {
1449         struct context_entry *context;
1450         unsigned long flags;
1451         struct intel_iommu *iommu;
1452
1453         pr_debug("Set context mapping for %02x:%02x.%d\n",
1454                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1455         BUG_ON(!domain->pgd);
1456
1457         iommu = device_to_iommu(bus, devfn);
1458         if (!iommu)
1459                 return -ENODEV;
1460
1461         context = device_to_context_entry(iommu, bus, devfn);
1462         if (!context)
1463                 return -ENOMEM;
1464         spin_lock_irqsave(&iommu->lock, flags);
1465         if (context_present(context)) {
1466                 spin_unlock_irqrestore(&iommu->lock, flags);
1467                 return 0;
1468         }
1469
1470         context_set_domain_id(context, domain->id);
1471         context_set_address_width(context, domain->agaw);
1472         context_set_address_root(context, virt_to_phys(domain->pgd));
1473         context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1474         context_set_fault_enable(context);
1475         context_set_present(context);
1476         domain_flush_cache(domain, context, sizeof(*context));
1477
1478         /* it's a non-present to present mapping */
1479         if (iommu->flush.flush_context(iommu, domain->id,
1480                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1481                 DMA_CCMD_DEVICE_INVL, 1))
1482                 iommu_flush_write_buffer(iommu);
1483         else
1484                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1485
1486         spin_unlock_irqrestore(&iommu->lock, flags);
1487
1488         spin_lock_irqsave(&domain->iommu_lock, flags);
1489         if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1490                 domain->iommu_count++;
1491                 domain_update_iommu_coherency(domain);
1492         }
1493         spin_unlock_irqrestore(&domain->iommu_lock, flags);
1494         return 0;
1495 }
1496
1497 static int
1498 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1499 {
1500         int ret;
1501         struct pci_dev *tmp, *parent;
1502
1503         ret = domain_context_mapping_one(domain, pdev->bus->number,
1504                 pdev->devfn);
1505         if (ret)
1506                 return ret;
1507
1508         /* dependent device mapping */
1509         tmp = pci_find_upstream_pcie_bridge(pdev);
1510         if (!tmp)
1511                 return 0;
1512         /* Secondary interface's bus number and devfn 0 */
1513         parent = pdev->bus->self;
1514         while (parent != tmp) {
1515                 ret = domain_context_mapping_one(domain, parent->bus->number,
1516                         parent->devfn);
1517                 if (ret)
1518                         return ret;
1519                 parent = parent->bus->self;
1520         }
1521         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1522                 return domain_context_mapping_one(domain,
1523                         tmp->subordinate->number, 0);
1524         else /* this is a legacy PCI bridge */
1525                 return domain_context_mapping_one(domain,
1526                         tmp->bus->number, tmp->devfn);
1527 }
1528
1529 static int domain_context_mapped(struct pci_dev *pdev)
1530 {
1531         int ret;
1532         struct pci_dev *tmp, *parent;
1533         struct intel_iommu *iommu;
1534
1535         iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1536         if (!iommu)
1537                 return -ENODEV;
1538
1539         ret = device_context_mapped(iommu,
1540                 pdev->bus->number, pdev->devfn);
1541         if (!ret)
1542                 return ret;
1543         /* dependent device mapping */
1544         tmp = pci_find_upstream_pcie_bridge(pdev);
1545         if (!tmp)
1546                 return ret;
1547         /* Secondary interface's bus number and devfn 0 */
1548         parent = pdev->bus->self;
1549         while (parent != tmp) {
1550                 ret = device_context_mapped(iommu, parent->bus->number,
1551                         parent->devfn);
1552                 if (!ret)
1553                         return ret;
1554                 parent = parent->bus->self;
1555         }
1556         if (tmp->is_pcie)
1557                 return device_context_mapped(iommu,
1558                         tmp->subordinate->number, 0);
1559         else
1560                 return device_context_mapped(iommu,
1561                         tmp->bus->number, tmp->devfn);
1562 }
1563
1564 static int
1565 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1566                         u64 hpa, size_t size, int prot)
1567 {
1568         u64 start_pfn, end_pfn;
1569         struct dma_pte *pte;
1570         int index;
1571         int addr_width = agaw_to_width(domain->agaw);
1572
1573         hpa &= (((u64)1) << addr_width) - 1;
1574
1575         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1576                 return -EINVAL;
1577         iova &= PAGE_MASK;
1578         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1579         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1580         index = 0;
1581         while (start_pfn < end_pfn) {
1582                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1583                 if (!pte)
1584                         return -ENOMEM;
1585                 /* We don't need lock here, nobody else
1586                  * touches the iova range
1587                  */
1588                 BUG_ON(dma_pte_addr(pte));
1589                 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1590                 dma_set_pte_prot(pte, prot);
1591                 domain_flush_cache(domain, pte, sizeof(*pte));
1592                 start_pfn++;
1593                 index++;
1594         }
1595         return 0;
1596 }
1597
1598 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1599 {
1600         if (!iommu)
1601                 return;
1602
1603         clear_context_table(iommu, bus, devfn);
1604         iommu->flush.flush_context(iommu, 0, 0, 0,
1605                                            DMA_CCMD_GLOBAL_INVL, 0);
1606         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1607                                          DMA_TLB_GLOBAL_FLUSH, 0);
1608 }
1609
1610 static void domain_remove_dev_info(struct dmar_domain *domain)
1611 {
1612         struct device_domain_info *info;
1613         unsigned long flags;
1614         struct intel_iommu *iommu;
1615
1616         spin_lock_irqsave(&device_domain_lock, flags);
1617         while (!list_empty(&domain->devices)) {
1618                 info = list_entry(domain->devices.next,
1619                         struct device_domain_info, link);
1620                 list_del(&info->link);
1621                 list_del(&info->global);
1622                 if (info->dev)
1623                         info->dev->dev.archdata.iommu = NULL;
1624                 spin_unlock_irqrestore(&device_domain_lock, flags);
1625
1626                 iommu = device_to_iommu(info->bus, info->devfn);
1627                 iommu_detach_dev(iommu, info->bus, info->devfn);
1628                 free_devinfo_mem(info);
1629
1630                 spin_lock_irqsave(&device_domain_lock, flags);
1631         }
1632         spin_unlock_irqrestore(&device_domain_lock, flags);
1633 }
1634
1635 /*
1636  * find_domain
1637  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1638  */
1639 static struct dmar_domain *
1640 find_domain(struct pci_dev *pdev)
1641 {
1642         struct device_domain_info *info;
1643
1644         /* No lock here, assumes no domain exit in normal case */
1645         info = pdev->dev.archdata.iommu;
1646         if (info)
1647                 return info->domain;
1648         return NULL;
1649 }
1650
1651 /* domain is initialized */
1652 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1653 {
1654         struct dmar_domain *domain, *found = NULL;
1655         struct intel_iommu *iommu;
1656         struct dmar_drhd_unit *drhd;
1657         struct device_domain_info *info, *tmp;
1658         struct pci_dev *dev_tmp;
1659         unsigned long flags;
1660         int bus = 0, devfn = 0;
1661
1662         domain = find_domain(pdev);
1663         if (domain)
1664                 return domain;
1665
1666         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1667         if (dev_tmp) {
1668                 if (dev_tmp->is_pcie) {
1669                         bus = dev_tmp->subordinate->number;
1670                         devfn = 0;
1671                 } else {
1672                         bus = dev_tmp->bus->number;
1673                         devfn = dev_tmp->devfn;
1674                 }
1675                 spin_lock_irqsave(&device_domain_lock, flags);
1676                 list_for_each_entry(info, &device_domain_list, global) {
1677                         if (info->bus == bus && info->devfn == devfn) {
1678                                 found = info->domain;
1679                                 break;
1680                         }
1681                 }
1682                 spin_unlock_irqrestore(&device_domain_lock, flags);
1683                 /* pcie-pci bridge already has a domain, uses it */
1684                 if (found) {
1685                         domain = found;
1686                         goto found_domain;
1687                 }
1688         }
1689
1690         /* Allocate new domain for the device */
1691         drhd = dmar_find_matched_drhd_unit(pdev);
1692         if (!drhd) {
1693                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1694                         pci_name(pdev));
1695                 return NULL;
1696         }
1697         iommu = drhd->iommu;
1698
1699         domain = iommu_alloc_domain(iommu);
1700         if (!domain)
1701                 goto error;
1702
1703         if (domain_init(domain, gaw)) {
1704                 domain_exit(domain);
1705                 goto error;
1706         }
1707
1708         /* register pcie-to-pci device */
1709         if (dev_tmp) {
1710                 info = alloc_devinfo_mem();
1711                 if (!info) {
1712                         domain_exit(domain);
1713                         goto error;
1714                 }
1715                 info->bus = bus;
1716                 info->devfn = devfn;
1717                 info->dev = NULL;
1718                 info->domain = domain;
1719                 /* This domain is shared by devices under p2p bridge */
1720                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1721
1722                 /* pcie-to-pci bridge already has a domain, uses it */
1723                 found = NULL;
1724                 spin_lock_irqsave(&device_domain_lock, flags);
1725                 list_for_each_entry(tmp, &device_domain_list, global) {
1726                         if (tmp->bus == bus && tmp->devfn == devfn) {
1727                                 found = tmp->domain;
1728                                 break;
1729                         }
1730                 }
1731                 if (found) {
1732                         free_devinfo_mem(info);
1733                         domain_exit(domain);
1734                         domain = found;
1735                 } else {
1736                         list_add(&info->link, &domain->devices);
1737                         list_add(&info->global, &device_domain_list);
1738                 }
1739                 spin_unlock_irqrestore(&device_domain_lock, flags);
1740         }
1741
1742 found_domain:
1743         info = alloc_devinfo_mem();
1744         if (!info)
1745                 goto error;
1746         info->bus = pdev->bus->number;
1747         info->devfn = pdev->devfn;
1748         info->dev = pdev;
1749         info->domain = domain;
1750         spin_lock_irqsave(&device_domain_lock, flags);
1751         /* somebody is fast */
1752         found = find_domain(pdev);
1753         if (found != NULL) {
1754                 spin_unlock_irqrestore(&device_domain_lock, flags);
1755                 if (found != domain) {
1756                         domain_exit(domain);
1757                         domain = found;
1758                 }
1759                 free_devinfo_mem(info);
1760                 return domain;
1761         }
1762         list_add(&info->link, &domain->devices);
1763         list_add(&info->global, &device_domain_list);
1764         pdev->dev.archdata.iommu = info;
1765         spin_unlock_irqrestore(&device_domain_lock, flags);
1766         return domain;
1767 error:
1768         /* recheck it here, maybe others set it */
1769         return find_domain(pdev);
1770 }
1771
1772 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1773                                       unsigned long long start,
1774                                       unsigned long long end)
1775 {
1776         struct dmar_domain *domain;
1777         unsigned long size;
1778         unsigned long long base;
1779         int ret;
1780
1781         printk(KERN_INFO
1782                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1783                 pci_name(pdev), start, end);
1784         /* page table init */
1785         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1786         if (!domain)
1787                 return -ENOMEM;
1788
1789         /* The address might not be aligned */
1790         base = start & PAGE_MASK;
1791         size = end - base;
1792         size = PAGE_ALIGN(size);
1793         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1794                         IOVA_PFN(base + size) - 1)) {
1795                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1796                 ret = -ENOMEM;
1797                 goto error;
1798         }
1799
1800         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1801                 size, base, pci_name(pdev));
1802         /*
1803          * RMRR range might have overlap with physical memory range,
1804          * clear it first
1805          */
1806         dma_pte_clear_range(domain, base, base + size);
1807
1808         ret = domain_page_mapping(domain, base, base, size,
1809                 DMA_PTE_READ|DMA_PTE_WRITE);
1810         if (ret)
1811                 goto error;
1812
1813         /* context entry init */
1814         ret = domain_context_mapping(domain, pdev);
1815         if (!ret)
1816                 return 0;
1817 error:
1818         domain_exit(domain);
1819         return ret;
1820
1821 }
1822
1823 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1824         struct pci_dev *pdev)
1825 {
1826         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1827                 return 0;
1828         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1829                 rmrr->end_address + 1);
1830 }
1831
1832 #ifdef CONFIG_DMAR_GFX_WA
1833 struct iommu_prepare_data {
1834         struct pci_dev *pdev;
1835         int ret;
1836 };
1837
1838 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1839                                          unsigned long end_pfn, void *datax)
1840 {
1841         struct iommu_prepare_data *data;
1842
1843         data = (struct iommu_prepare_data *)datax;
1844
1845         data->ret = iommu_prepare_identity_map(data->pdev,
1846                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1847         return data->ret;
1848
1849 }
1850
1851 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1852 {
1853         int nid;
1854         struct iommu_prepare_data data;
1855
1856         data.pdev = pdev;
1857         data.ret = 0;
1858
1859         for_each_online_node(nid) {
1860                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1861                 if (data.ret)
1862                         return data.ret;
1863         }
1864         return data.ret;
1865 }
1866
1867 static void __init iommu_prepare_gfx_mapping(void)
1868 {
1869         struct pci_dev *pdev = NULL;
1870         int ret;
1871
1872         for_each_pci_dev(pdev) {
1873                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1874                                 !IS_GFX_DEVICE(pdev))
1875                         continue;
1876                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1877                         pci_name(pdev));
1878                 ret = iommu_prepare_with_active_regions(pdev);
1879                 if (ret)
1880                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1881         }
1882 }
1883 #else /* !CONFIG_DMAR_GFX_WA */
1884 static inline void iommu_prepare_gfx_mapping(void)
1885 {
1886         return;
1887 }
1888 #endif
1889
1890 #ifdef CONFIG_DMAR_FLOPPY_WA
1891 static inline void iommu_prepare_isa(void)
1892 {
1893         struct pci_dev *pdev;
1894         int ret;
1895
1896         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1897         if (!pdev)
1898                 return;
1899
1900         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1901         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1902
1903         if (ret)
1904                 printk("IOMMU: Failed to create 0-64M identity map, "
1905                         "floppy might not work\n");
1906
1907 }
1908 #else
1909 static inline void iommu_prepare_isa(void)
1910 {
1911         return;
1912 }
1913 #endif /* !CONFIG_DMAR_FLPY_WA */
1914
1915 static int __init init_dmars(void)
1916 {
1917         struct dmar_drhd_unit *drhd;
1918         struct dmar_rmrr_unit *rmrr;
1919         struct pci_dev *pdev;
1920         struct intel_iommu *iommu;
1921         int i, ret, unit = 0;
1922
1923         /*
1924          * for each drhd
1925          *    allocate root
1926          *    initialize and program root entry to not present
1927          * endfor
1928          */
1929         for_each_drhd_unit(drhd) {
1930                 g_num_of_iommus++;
1931                 /*
1932                  * lock not needed as this is only incremented in the single
1933                  * threaded kernel __init code path all other access are read
1934                  * only
1935                  */
1936         }
1937
1938         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1939                         GFP_KERNEL);
1940         if (!g_iommus) {
1941                 printk(KERN_ERR "Allocating global iommu array failed\n");
1942                 ret = -ENOMEM;
1943                 goto error;
1944         }
1945
1946         deferred_flush = kzalloc(g_num_of_iommus *
1947                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1948         if (!deferred_flush) {
1949                 kfree(g_iommus);
1950                 ret = -ENOMEM;
1951                 goto error;
1952         }
1953
1954         for_each_drhd_unit(drhd) {
1955                 if (drhd->ignored)
1956                         continue;
1957
1958                 iommu = drhd->iommu;
1959                 g_iommus[iommu->seq_id] = iommu;
1960
1961                 ret = iommu_init_domains(iommu);
1962                 if (ret)
1963                         goto error;
1964
1965                 /*
1966                  * TBD:
1967                  * we could share the same root & context tables
1968                  * amoung all IOMMU's. Need to Split it later.
1969                  */
1970                 ret = iommu_alloc_root_entry(iommu);
1971                 if (ret) {
1972                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1973                         goto error;
1974                 }
1975         }
1976
1977         for_each_drhd_unit(drhd) {
1978                 if (drhd->ignored)
1979                         continue;
1980
1981                 iommu = drhd->iommu;
1982                 if (dmar_enable_qi(iommu)) {
1983                         /*
1984                          * Queued Invalidate not enabled, use Register Based
1985                          * Invalidate
1986                          */
1987                         iommu->flush.flush_context = __iommu_flush_context;
1988                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1989                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1990                                "invalidation\n",
1991                                (unsigned long long)drhd->reg_base_addr);
1992                 } else {
1993                         iommu->flush.flush_context = qi_flush_context;
1994                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1995                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1996                                "invalidation\n",
1997                                (unsigned long long)drhd->reg_base_addr);
1998                 }
1999         }
2000
2001         /*
2002          * For each rmrr
2003          *   for each dev attached to rmrr
2004          *   do
2005          *     locate drhd for dev, alloc domain for dev
2006          *     allocate free domain
2007          *     allocate page table entries for rmrr
2008          *     if context not allocated for bus
2009          *           allocate and init context
2010          *           set present in root table for this bus
2011          *     init context with domain, translation etc
2012          *    endfor
2013          * endfor
2014          */
2015         for_each_rmrr_units(rmrr) {
2016                 for (i = 0; i < rmrr->devices_cnt; i++) {
2017                         pdev = rmrr->devices[i];
2018                         /* some BIOS lists non-exist devices in DMAR table */
2019                         if (!pdev)
2020                                 continue;
2021                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2022                         if (ret)
2023                                 printk(KERN_ERR
2024                                  "IOMMU: mapping reserved region failed\n");
2025                 }
2026         }
2027
2028         iommu_prepare_gfx_mapping();
2029
2030         iommu_prepare_isa();
2031
2032         /*
2033          * for each drhd
2034          *   enable fault log
2035          *   global invalidate context cache
2036          *   global invalidate iotlb
2037          *   enable translation
2038          */
2039         for_each_drhd_unit(drhd) {
2040                 if (drhd->ignored)
2041                         continue;
2042                 iommu = drhd->iommu;
2043                 sprintf (iommu->name, "dmar%d", unit++);
2044
2045                 iommu_flush_write_buffer(iommu);
2046
2047                 ret = dmar_set_interrupt(iommu);
2048                 if (ret)
2049                         goto error;
2050
2051                 iommu_set_root_entry(iommu);
2052
2053                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2054                                            0);
2055                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2056                                          0);
2057                 iommu_disable_protect_mem_regions(iommu);
2058
2059                 ret = iommu_enable_translation(iommu);
2060                 if (ret)
2061                         goto error;
2062         }
2063
2064         return 0;
2065 error:
2066         for_each_drhd_unit(drhd) {
2067                 if (drhd->ignored)
2068                         continue;
2069                 iommu = drhd->iommu;
2070                 free_iommu(iommu);
2071         }
2072         kfree(g_iommus);
2073         return ret;
2074 }
2075
2076 static inline u64 aligned_size(u64 host_addr, size_t size)
2077 {
2078         u64 addr;
2079         addr = (host_addr & (~PAGE_MASK)) + size;
2080         return PAGE_ALIGN(addr);
2081 }
2082
2083 struct iova *
2084 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2085 {
2086         struct iova *piova;
2087
2088         /* Make sure it's in range */
2089         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2090         if (!size || (IOVA_START_ADDR + size > end))
2091                 return NULL;
2092
2093         piova = alloc_iova(&domain->iovad,
2094                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2095         return piova;
2096 }
2097
2098 static struct iova *
2099 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2100                    size_t size, u64 dma_mask)
2101 {
2102         struct pci_dev *pdev = to_pci_dev(dev);
2103         struct iova *iova = NULL;
2104
2105         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2106                 iova = iommu_alloc_iova(domain, size, dma_mask);
2107         else {
2108                 /*
2109                  * First try to allocate an io virtual address in
2110                  * DMA_32BIT_MASK and if that fails then try allocating
2111                  * from higher range
2112                  */
2113                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2114                 if (!iova)
2115                         iova = iommu_alloc_iova(domain, size, dma_mask);
2116         }
2117
2118         if (!iova) {
2119                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2120                 return NULL;
2121         }
2122
2123         return iova;
2124 }
2125
2126 static struct dmar_domain *
2127 get_valid_domain_for_dev(struct pci_dev *pdev)
2128 {
2129         struct dmar_domain *domain;
2130         int ret;
2131
2132         domain = get_domain_for_dev(pdev,
2133                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2134         if (!domain) {
2135                 printk(KERN_ERR
2136                         "Allocating domain for %s failed", pci_name(pdev));
2137                 return NULL;
2138         }
2139
2140         /* make sure context mapping is ok */
2141         if (unlikely(!domain_context_mapped(pdev))) {
2142                 ret = domain_context_mapping(domain, pdev);
2143                 if (ret) {
2144                         printk(KERN_ERR
2145                                 "Domain context map for %s failed",
2146                                 pci_name(pdev));
2147                         return NULL;
2148                 }
2149         }
2150
2151         return domain;
2152 }
2153
2154 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2155                                      size_t size, int dir, u64 dma_mask)
2156 {
2157         struct pci_dev *pdev = to_pci_dev(hwdev);
2158         struct dmar_domain *domain;
2159         phys_addr_t start_paddr;
2160         struct iova *iova;
2161         int prot = 0;
2162         int ret;
2163         struct intel_iommu *iommu;
2164
2165         BUG_ON(dir == DMA_NONE);
2166         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2167                 return paddr;
2168
2169         domain = get_valid_domain_for_dev(pdev);
2170         if (!domain)
2171                 return 0;
2172
2173         iommu = domain_get_iommu(domain);
2174         size = aligned_size((u64)paddr, size);
2175
2176         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2177         if (!iova)
2178                 goto error;
2179
2180         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2181
2182         /*
2183          * Check if DMAR supports zero-length reads on write only
2184          * mappings..
2185          */
2186         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2187                         !cap_zlr(iommu->cap))
2188                 prot |= DMA_PTE_READ;
2189         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2190                 prot |= DMA_PTE_WRITE;
2191         /*
2192          * paddr - (paddr + size) might be partial page, we should map the whole
2193          * page.  Note: if two part of one page are separately mapped, we
2194          * might have two guest_addr mapping to the same host paddr, but this
2195          * is not a big problem
2196          */
2197         ret = domain_page_mapping(domain, start_paddr,
2198                 ((u64)paddr) & PAGE_MASK, size, prot);
2199         if (ret)
2200                 goto error;
2201
2202         /* it's a non-present to present mapping */
2203         ret = iommu_flush_iotlb_psi(iommu, domain->id,
2204                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
2205         if (ret)
2206                 iommu_flush_write_buffer(iommu);
2207
2208         return start_paddr + ((u64)paddr & (~PAGE_MASK));
2209
2210 error:
2211         if (iova)
2212                 __free_iova(&domain->iovad, iova);
2213         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2214                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2215         return 0;
2216 }
2217
2218 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2219                             size_t size, int dir)
2220 {
2221         return __intel_map_single(hwdev, paddr, size, dir,
2222                                   to_pci_dev(hwdev)->dma_mask);
2223 }
2224
2225 static void flush_unmaps(void)
2226 {
2227         int i, j;
2228
2229         timer_on = 0;
2230
2231         /* just flush them all */
2232         for (i = 0; i < g_num_of_iommus; i++) {
2233                 struct intel_iommu *iommu = g_iommus[i];
2234                 if (!iommu)
2235                         continue;
2236
2237                 if (deferred_flush[i].next) {
2238                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2239                                                  DMA_TLB_GLOBAL_FLUSH, 0);
2240                         for (j = 0; j < deferred_flush[i].next; j++) {
2241                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
2242                                                 deferred_flush[i].iova[j]);
2243                         }
2244                         deferred_flush[i].next = 0;
2245                 }
2246         }
2247
2248         list_size = 0;
2249 }
2250
2251 static void flush_unmaps_timeout(unsigned long data)
2252 {
2253         unsigned long flags;
2254
2255         spin_lock_irqsave(&async_umap_flush_lock, flags);
2256         flush_unmaps();
2257         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2258 }
2259
2260 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2261 {
2262         unsigned long flags;
2263         int next, iommu_id;
2264         struct intel_iommu *iommu;
2265
2266         spin_lock_irqsave(&async_umap_flush_lock, flags);
2267         if (list_size == HIGH_WATER_MARK)
2268                 flush_unmaps();
2269
2270         iommu = domain_get_iommu(dom);
2271         iommu_id = iommu->seq_id;
2272
2273         next = deferred_flush[iommu_id].next;
2274         deferred_flush[iommu_id].domain[next] = dom;
2275         deferred_flush[iommu_id].iova[next] = iova;
2276         deferred_flush[iommu_id].next++;
2277
2278         if (!timer_on) {
2279                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2280                 timer_on = 1;
2281         }
2282         list_size++;
2283         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2284 }
2285
2286 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2287                         int dir)
2288 {
2289         struct pci_dev *pdev = to_pci_dev(dev);
2290         struct dmar_domain *domain;
2291         unsigned long start_addr;
2292         struct iova *iova;
2293         struct intel_iommu *iommu;
2294
2295         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2296                 return;
2297         domain = find_domain(pdev);
2298         BUG_ON(!domain);
2299
2300         iommu = domain_get_iommu(domain);
2301
2302         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2303         if (!iova)
2304                 return;
2305
2306         start_addr = iova->pfn_lo << PAGE_SHIFT;
2307         size = aligned_size((u64)dev_addr, size);
2308
2309         pr_debug("Device %s unmapping: %lx@%llx\n",
2310                 pci_name(pdev), size, (unsigned long long)start_addr);
2311
2312         /*  clear the whole page */
2313         dma_pte_clear_range(domain, start_addr, start_addr + size);
2314         /* free page tables */
2315         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2316         if (intel_iommu_strict) {
2317                 if (iommu_flush_iotlb_psi(iommu,
2318                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2319                         iommu_flush_write_buffer(iommu);
2320                 /* free iova */
2321                 __free_iova(&domain->iovad, iova);
2322         } else {
2323                 add_unmap(domain, iova);
2324                 /*
2325                  * queue up the release of the unmap to save the 1/6th of the
2326                  * cpu used up by the iotlb flush operation...
2327                  */
2328         }
2329 }
2330
2331 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2332                            dma_addr_t *dma_handle, gfp_t flags)
2333 {
2334         void *vaddr;
2335         int order;
2336
2337         size = PAGE_ALIGN(size);
2338         order = get_order(size);
2339         flags &= ~(GFP_DMA | GFP_DMA32);
2340
2341         vaddr = (void *)__get_free_pages(flags, order);
2342         if (!vaddr)
2343                 return NULL;
2344         memset(vaddr, 0, size);
2345
2346         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2347                                          DMA_BIDIRECTIONAL,
2348                                          hwdev->coherent_dma_mask);
2349         if (*dma_handle)
2350                 return vaddr;
2351         free_pages((unsigned long)vaddr, order);
2352         return NULL;
2353 }
2354
2355 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2356                          dma_addr_t dma_handle)
2357 {
2358         int order;
2359
2360         size = PAGE_ALIGN(size);
2361         order = get_order(size);
2362
2363         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2364         free_pages((unsigned long)vaddr, order);
2365 }
2366
2367 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2368
2369 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2370                     int nelems, int dir)
2371 {
2372         int i;
2373         struct pci_dev *pdev = to_pci_dev(hwdev);
2374         struct dmar_domain *domain;
2375         unsigned long start_addr;
2376         struct iova *iova;
2377         size_t size = 0;
2378         void *addr;
2379         struct scatterlist *sg;
2380         struct intel_iommu *iommu;
2381
2382         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2383                 return;
2384
2385         domain = find_domain(pdev);
2386         BUG_ON(!domain);
2387
2388         iommu = domain_get_iommu(domain);
2389
2390         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2391         if (!iova)
2392                 return;
2393         for_each_sg(sglist, sg, nelems, i) {
2394                 addr = SG_ENT_VIRT_ADDRESS(sg);
2395                 size += aligned_size((u64)addr, sg->length);
2396         }
2397
2398         start_addr = iova->pfn_lo << PAGE_SHIFT;
2399
2400         /*  clear the whole page */
2401         dma_pte_clear_range(domain, start_addr, start_addr + size);
2402         /* free page tables */
2403         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2404
2405         if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2406                         size >> VTD_PAGE_SHIFT, 0))
2407                 iommu_flush_write_buffer(iommu);
2408
2409         /* free iova */
2410         __free_iova(&domain->iovad, iova);
2411 }
2412
2413 static int intel_nontranslate_map_sg(struct device *hddev,
2414         struct scatterlist *sglist, int nelems, int dir)
2415 {
2416         int i;
2417         struct scatterlist *sg;
2418
2419         for_each_sg(sglist, sg, nelems, i) {
2420                 BUG_ON(!sg_page(sg));
2421                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2422                 sg->dma_length = sg->length;
2423         }
2424         return nelems;
2425 }
2426
2427 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2428                  int dir)
2429 {
2430         void *addr;
2431         int i;
2432         struct pci_dev *pdev = to_pci_dev(hwdev);
2433         struct dmar_domain *domain;
2434         size_t size = 0;
2435         int prot = 0;
2436         size_t offset = 0;
2437         struct iova *iova = NULL;
2438         int ret;
2439         struct scatterlist *sg;
2440         unsigned long start_addr;
2441         struct intel_iommu *iommu;
2442
2443         BUG_ON(dir == DMA_NONE);
2444         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2445                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2446
2447         domain = get_valid_domain_for_dev(pdev);
2448         if (!domain)
2449                 return 0;
2450
2451         iommu = domain_get_iommu(domain);
2452
2453         for_each_sg(sglist, sg, nelems, i) {
2454                 addr = SG_ENT_VIRT_ADDRESS(sg);
2455                 addr = (void *)virt_to_phys(addr);
2456                 size += aligned_size((u64)addr, sg->length);
2457         }
2458
2459         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2460         if (!iova) {
2461                 sglist->dma_length = 0;
2462                 return 0;
2463         }
2464
2465         /*
2466          * Check if DMAR supports zero-length reads on write only
2467          * mappings..
2468          */
2469         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2470                         !cap_zlr(iommu->cap))
2471                 prot |= DMA_PTE_READ;
2472         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2473                 prot |= DMA_PTE_WRITE;
2474
2475         start_addr = iova->pfn_lo << PAGE_SHIFT;
2476         offset = 0;
2477         for_each_sg(sglist, sg, nelems, i) {
2478                 addr = SG_ENT_VIRT_ADDRESS(sg);
2479                 addr = (void *)virt_to_phys(addr);
2480                 size = aligned_size((u64)addr, sg->length);
2481                 ret = domain_page_mapping(domain, start_addr + offset,
2482                         ((u64)addr) & PAGE_MASK,
2483                         size, prot);
2484                 if (ret) {
2485                         /*  clear the page */
2486                         dma_pte_clear_range(domain, start_addr,
2487                                   start_addr + offset);
2488                         /* free page tables */
2489                         dma_pte_free_pagetable(domain, start_addr,
2490                                   start_addr + offset);
2491                         /* free iova */
2492                         __free_iova(&domain->iovad, iova);
2493                         return 0;
2494                 }
2495                 sg->dma_address = start_addr + offset +
2496                                 ((u64)addr & (~PAGE_MASK));
2497                 sg->dma_length = sg->length;
2498                 offset += size;
2499         }
2500
2501         /* it's a non-present to present mapping */
2502         if (iommu_flush_iotlb_psi(iommu, domain->id,
2503                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2504                 iommu_flush_write_buffer(iommu);
2505         return nelems;
2506 }
2507
2508 static struct dma_mapping_ops intel_dma_ops = {
2509         .alloc_coherent = intel_alloc_coherent,
2510         .free_coherent = intel_free_coherent,
2511         .map_single = intel_map_single,
2512         .unmap_single = intel_unmap_single,
2513         .map_sg = intel_map_sg,
2514         .unmap_sg = intel_unmap_sg,
2515 };
2516
2517 static inline int iommu_domain_cache_init(void)
2518 {
2519         int ret = 0;
2520
2521         iommu_domain_cache = kmem_cache_create("iommu_domain",
2522                                          sizeof(struct dmar_domain),
2523                                          0,
2524                                          SLAB_HWCACHE_ALIGN,
2525
2526                                          NULL);
2527         if (!iommu_domain_cache) {
2528                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2529                 ret = -ENOMEM;
2530         }
2531
2532         return ret;
2533 }
2534
2535 static inline int iommu_devinfo_cache_init(void)
2536 {
2537         int ret = 0;
2538
2539         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2540                                          sizeof(struct device_domain_info),
2541                                          0,
2542                                          SLAB_HWCACHE_ALIGN,
2543                                          NULL);
2544         if (!iommu_devinfo_cache) {
2545                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2546                 ret = -ENOMEM;
2547         }
2548
2549         return ret;
2550 }
2551
2552 static inline int iommu_iova_cache_init(void)
2553 {
2554         int ret = 0;
2555
2556         iommu_iova_cache = kmem_cache_create("iommu_iova",
2557                                          sizeof(struct iova),
2558                                          0,
2559                                          SLAB_HWCACHE_ALIGN,
2560                                          NULL);
2561         if (!iommu_iova_cache) {
2562                 printk(KERN_ERR "Couldn't create iova cache\n");
2563                 ret = -ENOMEM;
2564         }
2565
2566         return ret;
2567 }
2568
2569 static int __init iommu_init_mempool(void)
2570 {
2571         int ret;
2572         ret = iommu_iova_cache_init();
2573         if (ret)
2574                 return ret;
2575
2576         ret = iommu_domain_cache_init();
2577         if (ret)
2578                 goto domain_error;
2579
2580         ret = iommu_devinfo_cache_init();
2581         if (!ret)
2582                 return ret;
2583
2584         kmem_cache_destroy(iommu_domain_cache);
2585 domain_error:
2586         kmem_cache_destroy(iommu_iova_cache);
2587
2588         return -ENOMEM;
2589 }
2590
2591 static void __init iommu_exit_mempool(void)
2592 {
2593         kmem_cache_destroy(iommu_devinfo_cache);
2594         kmem_cache_destroy(iommu_domain_cache);
2595         kmem_cache_destroy(iommu_iova_cache);
2596
2597 }
2598
2599 static void __init init_no_remapping_devices(void)
2600 {
2601         struct dmar_drhd_unit *drhd;
2602
2603         for_each_drhd_unit(drhd) {
2604                 if (!drhd->include_all) {
2605                         int i;
2606                         for (i = 0; i < drhd->devices_cnt; i++)
2607                                 if (drhd->devices[i] != NULL)
2608                                         break;
2609                         /* ignore DMAR unit if no pci devices exist */
2610                         if (i == drhd->devices_cnt)
2611                                 drhd->ignored = 1;
2612                 }
2613         }
2614
2615         if (dmar_map_gfx)
2616                 return;
2617
2618         for_each_drhd_unit(drhd) {
2619                 int i;
2620                 if (drhd->ignored || drhd->include_all)
2621                         continue;
2622
2623                 for (i = 0; i < drhd->devices_cnt; i++)
2624                         if (drhd->devices[i] &&
2625                                 !IS_GFX_DEVICE(drhd->devices[i]))
2626                                 break;
2627
2628                 if (i < drhd->devices_cnt)
2629                         continue;
2630
2631                 /* bypass IOMMU if it is just for gfx devices */
2632                 drhd->ignored = 1;
2633                 for (i = 0; i < drhd->devices_cnt; i++) {
2634                         if (!drhd->devices[i])
2635                                 continue;
2636                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2637                 }
2638         }
2639 }
2640
2641 int __init intel_iommu_init(void)
2642 {
2643         int ret = 0;
2644
2645         if (dmar_table_init())
2646                 return  -ENODEV;
2647
2648         if (dmar_dev_scope_init())
2649                 return  -ENODEV;
2650
2651         /*
2652          * Check the need for DMA-remapping initialization now.
2653          * Above initialization will also be used by Interrupt-remapping.
2654          */
2655         if (no_iommu || swiotlb || dmar_disabled)
2656                 return -ENODEV;
2657
2658         iommu_init_mempool();
2659         dmar_init_reserved_ranges();
2660
2661         init_no_remapping_devices();
2662
2663         ret = init_dmars();
2664         if (ret) {
2665                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2666                 put_iova_domain(&reserved_iova_list);
2667                 iommu_exit_mempool();
2668                 return ret;
2669         }
2670         printk(KERN_INFO
2671         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2672
2673         init_timer(&unmap_timer);
2674         force_iommu = 1;
2675         dma_ops = &intel_dma_ops;
2676         return 0;
2677 }
2678
2679 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2680                                   struct pci_dev *pdev)
2681 {
2682         struct device_domain_info *info;
2683         unsigned long flags;
2684
2685         info = alloc_devinfo_mem();
2686         if (!info)
2687                 return -ENOMEM;
2688
2689         info->bus = pdev->bus->number;
2690         info->devfn = pdev->devfn;
2691         info->dev = pdev;
2692         info->domain = domain;
2693
2694         spin_lock_irqsave(&device_domain_lock, flags);
2695         list_add(&info->link, &domain->devices);
2696         list_add(&info->global, &device_domain_list);
2697         pdev->dev.archdata.iommu = info;
2698         spin_unlock_irqrestore(&device_domain_lock, flags);
2699
2700         return 0;
2701 }
2702
2703 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2704                                           struct pci_dev *pdev)
2705 {
2706         struct device_domain_info *info;
2707         struct intel_iommu *iommu;
2708         unsigned long flags;
2709         int found = 0;
2710         struct list_head *entry, *tmp;
2711
2712         iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2713         if (!iommu)
2714                 return;
2715
2716         spin_lock_irqsave(&device_domain_lock, flags);
2717         list_for_each_safe(entry, tmp, &domain->devices) {
2718                 info = list_entry(entry, struct device_domain_info, link);
2719                 if (info->bus == pdev->bus->number &&
2720                     info->devfn == pdev->devfn) {
2721                         list_del(&info->link);
2722                         list_del(&info->global);
2723                         if (info->dev)
2724                                 info->dev->dev.archdata.iommu = NULL;
2725                         spin_unlock_irqrestore(&device_domain_lock, flags);
2726
2727                         iommu_detach_dev(iommu, info->bus, info->devfn);
2728                         free_devinfo_mem(info);
2729
2730                         spin_lock_irqsave(&device_domain_lock, flags);
2731
2732                         if (found)
2733                                 break;
2734                         else
2735                                 continue;
2736                 }
2737
2738                 /* if there is no other devices under the same iommu
2739                  * owned by this domain, clear this iommu in iommu_bmp
2740                  * update iommu count and coherency
2741                  */
2742                 if (device_to_iommu(info->bus, info->devfn) == iommu)
2743                         found = 1;
2744         }
2745
2746         if (found == 0) {
2747                 unsigned long tmp_flags;
2748                 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2749                 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2750                 domain->iommu_count--;
2751                 domain_update_iommu_coherency(domain);
2752                 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2753         }
2754
2755         spin_unlock_irqrestore(&device_domain_lock, flags);
2756 }
2757
2758 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2759 {
2760         struct device_domain_info *info;
2761         struct intel_iommu *iommu;
2762         unsigned long flags1, flags2;
2763
2764         spin_lock_irqsave(&device_domain_lock, flags1);
2765         while (!list_empty(&domain->devices)) {
2766                 info = list_entry(domain->devices.next,
2767                         struct device_domain_info, link);
2768                 list_del(&info->link);
2769                 list_del(&info->global);
2770                 if (info->dev)
2771                         info->dev->dev.archdata.iommu = NULL;
2772
2773                 spin_unlock_irqrestore(&device_domain_lock, flags1);
2774
2775                 iommu = device_to_iommu(info->bus, info->devfn);
2776                 iommu_detach_dev(iommu, info->bus, info->devfn);
2777
2778                 /* clear this iommu in iommu_bmp, update iommu count
2779                  * and coherency
2780                  */
2781                 spin_lock_irqsave(&domain->iommu_lock, flags2);
2782                 if (test_and_clear_bit(iommu->seq_id,
2783                                        &domain->iommu_bmp)) {
2784                         domain->iommu_count--;
2785                         domain_update_iommu_coherency(domain);
2786                 }
2787                 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2788
2789                 free_devinfo_mem(info);
2790                 spin_lock_irqsave(&device_domain_lock, flags1);
2791         }
2792         spin_unlock_irqrestore(&device_domain_lock, flags1);
2793 }
2794
2795 void intel_iommu_domain_exit(struct dmar_domain *domain)
2796 {
2797         u64 end;
2798
2799         /* Domain 0 is reserved, so dont process it */
2800         if (!domain)
2801                 return;
2802
2803         end = DOMAIN_MAX_ADDR(domain->gaw);
2804         end = end & (~VTD_PAGE_MASK);
2805
2806         /* clear ptes */
2807         dma_pte_clear_range(domain, 0, end);
2808
2809         /* free page tables */
2810         dma_pte_free_pagetable(domain, 0, end);
2811
2812         iommu_free_domain(domain);
2813         free_domain_mem(domain);
2814 }
2815 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2816
2817 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2818 {
2819         struct dmar_drhd_unit *drhd;
2820         struct dmar_domain *domain;
2821         struct intel_iommu *iommu;
2822
2823         drhd = dmar_find_matched_drhd_unit(pdev);
2824         if (!drhd) {
2825                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2826                 return NULL;
2827         }
2828
2829         iommu = drhd->iommu;
2830         if (!iommu) {
2831                 printk(KERN_ERR
2832                         "intel_iommu_domain_alloc: iommu == NULL\n");
2833                 return NULL;
2834         }
2835         domain = iommu_alloc_domain(iommu);
2836         if (!domain) {
2837                 printk(KERN_ERR
2838                         "intel_iommu_domain_alloc: domain == NULL\n");
2839                 return NULL;
2840         }
2841         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2842                 printk(KERN_ERR
2843                         "intel_iommu_domain_alloc: domain_init() failed\n");
2844                 intel_iommu_domain_exit(domain);
2845                 return NULL;
2846         }
2847         return domain;
2848 }
2849 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2850
2851 int intel_iommu_context_mapping(
2852         struct dmar_domain *domain, struct pci_dev *pdev)
2853 {
2854         int rc;
2855         rc = domain_context_mapping(domain, pdev);
2856         return rc;
2857 }
2858 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2859
2860 int intel_iommu_page_mapping(
2861         struct dmar_domain *domain, dma_addr_t iova,
2862         u64 hpa, size_t size, int prot)
2863 {
2864         int rc;
2865         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2866         return rc;
2867 }
2868 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2869
2870 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2871 {
2872         struct intel_iommu *iommu;
2873
2874         iommu = device_to_iommu(bus, devfn);
2875         iommu_detach_dev(iommu, bus, devfn);
2876 }
2877 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2878
2879 struct dmar_domain *
2880 intel_iommu_find_domain(struct pci_dev *pdev)
2881 {
2882         return find_domain(pdev);
2883 }
2884 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2885
2886 int intel_iommu_found(void)
2887 {
2888         return g_num_of_iommus;
2889 }
2890 EXPORT_SYMBOL_GPL(intel_iommu_found);
2891
2892 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2893 {
2894         struct dma_pte *pte;
2895         u64 pfn;
2896
2897         pfn = 0;
2898         pte = addr_to_dma_pte(domain, iova);
2899
2900         if (pte)
2901                 pfn = dma_pte_addr(pte);
2902
2903         return pfn >> VTD_PAGE_SHIFT;
2904 }
2905 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);