Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/sysdev.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/timer.h>
37 #include <linux/iova.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
42
43 #define ROOT_SIZE               VTD_PAGE_SIZE
44 #define CONTEXT_SIZE            VTD_PAGE_SIZE
45
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49 #define IOAPIC_RANGE_START      (0xfee00000)
50 #define IOAPIC_RANGE_END        (0xfeefffff)
51 #define IOVA_START_ADDR         (0x1000)
52
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
57
58 static void flush_unmaps_timeout(unsigned long data);
59
60 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
61
62 #define HIGH_WATER_MARK 250
63 struct deferred_flush_tables {
64         int next;
65         struct iova *iova[HIGH_WATER_MARK];
66         struct dmar_domain *domain[HIGH_WATER_MARK];
67 };
68
69 static struct deferred_flush_tables *deferred_flush;
70
71 /* bitmap for indexing intel_iommus */
72 static int g_num_of_iommus;
73
74 static DEFINE_SPINLOCK(async_umap_flush_lock);
75 static LIST_HEAD(unmaps_to_do);
76
77 static int timer_on;
78 static long list_size;
79
80 static void domain_remove_dev_info(struct dmar_domain *domain);
81
82 int dmar_disabled;
83 static int __initdata dmar_map_gfx = 1;
84 static int dmar_forcedac;
85 static int intel_iommu_strict;
86
87 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
88 static DEFINE_SPINLOCK(device_domain_lock);
89 static LIST_HEAD(device_domain_list);
90
91 static int __init intel_iommu_setup(char *str)
92 {
93         if (!str)
94                 return -EINVAL;
95         while (*str) {
96                 if (!strncmp(str, "off", 3)) {
97                         dmar_disabled = 1;
98                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
99                 } else if (!strncmp(str, "igfx_off", 8)) {
100                         dmar_map_gfx = 0;
101                         printk(KERN_INFO
102                                 "Intel-IOMMU: disable GFX device mapping\n");
103                 } else if (!strncmp(str, "forcedac", 8)) {
104                         printk(KERN_INFO
105                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
106                         dmar_forcedac = 1;
107                 } else if (!strncmp(str, "strict", 6)) {
108                         printk(KERN_INFO
109                                 "Intel-IOMMU: disable batched IOTLB flush\n");
110                         intel_iommu_strict = 1;
111                 }
112
113                 str += strcspn(str, ",");
114                 while (*str == ',')
115                         str++;
116         }
117         return 0;
118 }
119 __setup("intel_iommu=", intel_iommu_setup);
120
121 static struct kmem_cache *iommu_domain_cache;
122 static struct kmem_cache *iommu_devinfo_cache;
123 static struct kmem_cache *iommu_iova_cache;
124
125 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
126 {
127         unsigned int flags;
128         void *vaddr;
129
130         /* trying to avoid low memory issues */
131         flags = current->flags & PF_MEMALLOC;
132         current->flags |= PF_MEMALLOC;
133         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
134         current->flags &= (~PF_MEMALLOC | flags);
135         return vaddr;
136 }
137
138
139 static inline void *alloc_pgtable_page(void)
140 {
141         unsigned int flags;
142         void *vaddr;
143
144         /* trying to avoid low memory issues */
145         flags = current->flags & PF_MEMALLOC;
146         current->flags |= PF_MEMALLOC;
147         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
148         current->flags &= (~PF_MEMALLOC | flags);
149         return vaddr;
150 }
151
152 static inline void free_pgtable_page(void *vaddr)
153 {
154         free_page((unsigned long)vaddr);
155 }
156
157 static inline void *alloc_domain_mem(void)
158 {
159         return iommu_kmem_cache_alloc(iommu_domain_cache);
160 }
161
162 static void free_domain_mem(void *vaddr)
163 {
164         kmem_cache_free(iommu_domain_cache, vaddr);
165 }
166
167 static inline void * alloc_devinfo_mem(void)
168 {
169         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
170 }
171
172 static inline void free_devinfo_mem(void *vaddr)
173 {
174         kmem_cache_free(iommu_devinfo_cache, vaddr);
175 }
176
177 struct iova *alloc_iova_mem(void)
178 {
179         return iommu_kmem_cache_alloc(iommu_iova_cache);
180 }
181
182 void free_iova_mem(struct iova *iova)
183 {
184         kmem_cache_free(iommu_iova_cache, iova);
185 }
186
187 /* Gets context entry for a given bus and devfn */
188 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
189                 u8 bus, u8 devfn)
190 {
191         struct root_entry *root;
192         struct context_entry *context;
193         unsigned long phy_addr;
194         unsigned long flags;
195
196         spin_lock_irqsave(&iommu->lock, flags);
197         root = &iommu->root_entry[bus];
198         context = get_context_addr_from_root(root);
199         if (!context) {
200                 context = (struct context_entry *)alloc_pgtable_page();
201                 if (!context) {
202                         spin_unlock_irqrestore(&iommu->lock, flags);
203                         return NULL;
204                 }
205                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
206                 phy_addr = virt_to_phys((void *)context);
207                 set_root_value(root, phy_addr);
208                 set_root_present(root);
209                 __iommu_flush_cache(iommu, root, sizeof(*root));
210         }
211         spin_unlock_irqrestore(&iommu->lock, flags);
212         return &context[devfn];
213 }
214
215 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
216 {
217         struct root_entry *root;
218         struct context_entry *context;
219         int ret;
220         unsigned long flags;
221
222         spin_lock_irqsave(&iommu->lock, flags);
223         root = &iommu->root_entry[bus];
224         context = get_context_addr_from_root(root);
225         if (!context) {
226                 ret = 0;
227                 goto out;
228         }
229         ret = context_present(context[devfn]);
230 out:
231         spin_unlock_irqrestore(&iommu->lock, flags);
232         return ret;
233 }
234
235 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
236 {
237         struct root_entry *root;
238         struct context_entry *context;
239         unsigned long flags;
240
241         spin_lock_irqsave(&iommu->lock, flags);
242         root = &iommu->root_entry[bus];
243         context = get_context_addr_from_root(root);
244         if (context) {
245                 context_clear_entry(context[devfn]);
246                 __iommu_flush_cache(iommu, &context[devfn], \
247                         sizeof(*context));
248         }
249         spin_unlock_irqrestore(&iommu->lock, flags);
250 }
251
252 static void free_context_table(struct intel_iommu *iommu)
253 {
254         struct root_entry *root;
255         int i;
256         unsigned long flags;
257         struct context_entry *context;
258
259         spin_lock_irqsave(&iommu->lock, flags);
260         if (!iommu->root_entry) {
261                 goto out;
262         }
263         for (i = 0; i < ROOT_ENTRY_NR; i++) {
264                 root = &iommu->root_entry[i];
265                 context = get_context_addr_from_root(root);
266                 if (context)
267                         free_pgtable_page(context);
268         }
269         free_pgtable_page(iommu->root_entry);
270         iommu->root_entry = NULL;
271 out:
272         spin_unlock_irqrestore(&iommu->lock, flags);
273 }
274
275 /* page table handling */
276 #define LEVEL_STRIDE            (9)
277 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
278
279 static inline int agaw_to_level(int agaw)
280 {
281         return agaw + 2;
282 }
283
284 static inline int agaw_to_width(int agaw)
285 {
286         return 30 + agaw * LEVEL_STRIDE;
287
288 }
289
290 static inline int width_to_agaw(int width)
291 {
292         return (width - 30) / LEVEL_STRIDE;
293 }
294
295 static inline unsigned int level_to_offset_bits(int level)
296 {
297         return (12 + (level - 1) * LEVEL_STRIDE);
298 }
299
300 static inline int address_level_offset(u64 addr, int level)
301 {
302         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
303 }
304
305 static inline u64 level_mask(int level)
306 {
307         return ((u64)-1 << level_to_offset_bits(level));
308 }
309
310 static inline u64 level_size(int level)
311 {
312         return ((u64)1 << level_to_offset_bits(level));
313 }
314
315 static inline u64 align_to_level(u64 addr, int level)
316 {
317         return ((addr + level_size(level) - 1) & level_mask(level));
318 }
319
320 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
321 {
322         int addr_width = agaw_to_width(domain->agaw);
323         struct dma_pte *parent, *pte = NULL;
324         int level = agaw_to_level(domain->agaw);
325         int offset;
326         unsigned long flags;
327
328         BUG_ON(!domain->pgd);
329
330         addr &= (((u64)1) << addr_width) - 1;
331         parent = domain->pgd;
332
333         spin_lock_irqsave(&domain->mapping_lock, flags);
334         while (level > 0) {
335                 void *tmp_page;
336
337                 offset = address_level_offset(addr, level);
338                 pte = &parent[offset];
339                 if (level == 1)
340                         break;
341
342                 if (!dma_pte_present(*pte)) {
343                         tmp_page = alloc_pgtable_page();
344
345                         if (!tmp_page) {
346                                 spin_unlock_irqrestore(&domain->mapping_lock,
347                                         flags);
348                                 return NULL;
349                         }
350                         __iommu_flush_cache(domain->iommu, tmp_page,
351                                         PAGE_SIZE);
352                         dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
353                         /*
354                          * high level table always sets r/w, last level page
355                          * table control read/write
356                          */
357                         dma_set_pte_readable(*pte);
358                         dma_set_pte_writable(*pte);
359                         __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
360                 }
361                 parent = phys_to_virt(dma_pte_addr(*pte));
362                 level--;
363         }
364
365         spin_unlock_irqrestore(&domain->mapping_lock, flags);
366         return pte;
367 }
368
369 /* return address's pte at specific level */
370 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
371                 int level)
372 {
373         struct dma_pte *parent, *pte = NULL;
374         int total = agaw_to_level(domain->agaw);
375         int offset;
376
377         parent = domain->pgd;
378         while (level <= total) {
379                 offset = address_level_offset(addr, total);
380                 pte = &parent[offset];
381                 if (level == total)
382                         return pte;
383
384                 if (!dma_pte_present(*pte))
385                         break;
386                 parent = phys_to_virt(dma_pte_addr(*pte));
387                 total--;
388         }
389         return NULL;
390 }
391
392 /* clear one page's page table */
393 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
394 {
395         struct dma_pte *pte = NULL;
396
397         /* get last level pte */
398         pte = dma_addr_level_pte(domain, addr, 1);
399
400         if (pte) {
401                 dma_clear_pte(*pte);
402                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
403         }
404 }
405
406 /* clear last level pte, a tlb flush should be followed */
407 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
408 {
409         int addr_width = agaw_to_width(domain->agaw);
410
411         start &= (((u64)1) << addr_width) - 1;
412         end &= (((u64)1) << addr_width) - 1;
413         /* in case it's partial page */
414         start = PAGE_ALIGN(start);
415         end &= PAGE_MASK;
416
417         /* we don't need lock here, nobody else touches the iova range */
418         while (start < end) {
419                 dma_pte_clear_one(domain, start);
420                 start += VTD_PAGE_SIZE;
421         }
422 }
423
424 /* free page table pages. last level pte should already be cleared */
425 static void dma_pte_free_pagetable(struct dmar_domain *domain,
426         u64 start, u64 end)
427 {
428         int addr_width = agaw_to_width(domain->agaw);
429         struct dma_pte *pte;
430         int total = agaw_to_level(domain->agaw);
431         int level;
432         u64 tmp;
433
434         start &= (((u64)1) << addr_width) - 1;
435         end &= (((u64)1) << addr_width) - 1;
436
437         /* we don't need lock here, nobody else touches the iova range */
438         level = 2;
439         while (level <= total) {
440                 tmp = align_to_level(start, level);
441                 if (tmp >= end || (tmp + level_size(level) > end))
442                         return;
443
444                 while (tmp < end) {
445                         pte = dma_addr_level_pte(domain, tmp, level);
446                         if (pte) {
447                                 free_pgtable_page(
448                                         phys_to_virt(dma_pte_addr(*pte)));
449                                 dma_clear_pte(*pte);
450                                 __iommu_flush_cache(domain->iommu,
451                                                 pte, sizeof(*pte));
452                         }
453                         tmp += level_size(level);
454                 }
455                 level++;
456         }
457         /* free pgd */
458         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
459                 free_pgtable_page(domain->pgd);
460                 domain->pgd = NULL;
461         }
462 }
463
464 /* iommu handling */
465 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
466 {
467         struct root_entry *root;
468         unsigned long flags;
469
470         root = (struct root_entry *)alloc_pgtable_page();
471         if (!root)
472                 return -ENOMEM;
473
474         __iommu_flush_cache(iommu, root, ROOT_SIZE);
475
476         spin_lock_irqsave(&iommu->lock, flags);
477         iommu->root_entry = root;
478         spin_unlock_irqrestore(&iommu->lock, flags);
479
480         return 0;
481 }
482
483 static void iommu_set_root_entry(struct intel_iommu *iommu)
484 {
485         void *addr;
486         u32 cmd, sts;
487         unsigned long flag;
488
489         addr = iommu->root_entry;
490
491         spin_lock_irqsave(&iommu->register_lock, flag);
492         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
493
494         cmd = iommu->gcmd | DMA_GCMD_SRTP;
495         writel(cmd, iommu->reg + DMAR_GCMD_REG);
496
497         /* Make sure hardware complete it */
498         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
499                 readl, (sts & DMA_GSTS_RTPS), sts);
500
501         spin_unlock_irqrestore(&iommu->register_lock, flag);
502 }
503
504 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
505 {
506         u32 val;
507         unsigned long flag;
508
509         if (!cap_rwbf(iommu->cap))
510                 return;
511         val = iommu->gcmd | DMA_GCMD_WBF;
512
513         spin_lock_irqsave(&iommu->register_lock, flag);
514         writel(val, iommu->reg + DMAR_GCMD_REG);
515
516         /* Make sure hardware complete it */
517         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
518                         readl, (!(val & DMA_GSTS_WBFS)), val);
519
520         spin_unlock_irqrestore(&iommu->register_lock, flag);
521 }
522
523 /* return value determine if we need a write buffer flush */
524 static int __iommu_flush_context(struct intel_iommu *iommu,
525         u16 did, u16 source_id, u8 function_mask, u64 type,
526         int non_present_entry_flush)
527 {
528         u64 val = 0;
529         unsigned long flag;
530
531         /*
532          * In the non-present entry flush case, if hardware doesn't cache
533          * non-present entry we do nothing and if hardware cache non-present
534          * entry, we flush entries of domain 0 (the domain id is used to cache
535          * any non-present entries)
536          */
537         if (non_present_entry_flush) {
538                 if (!cap_caching_mode(iommu->cap))
539                         return 1;
540                 else
541                         did = 0;
542         }
543
544         switch (type) {
545         case DMA_CCMD_GLOBAL_INVL:
546                 val = DMA_CCMD_GLOBAL_INVL;
547                 break;
548         case DMA_CCMD_DOMAIN_INVL:
549                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
550                 break;
551         case DMA_CCMD_DEVICE_INVL:
552                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
553                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
554                 break;
555         default:
556                 BUG();
557         }
558         val |= DMA_CCMD_ICC;
559
560         spin_lock_irqsave(&iommu->register_lock, flag);
561         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
562
563         /* Make sure hardware complete it */
564         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
565                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
566
567         spin_unlock_irqrestore(&iommu->register_lock, flag);
568
569         /* flush context entry will implicitly flush write buffer */
570         return 0;
571 }
572
573 /* return value determine if we need a write buffer flush */
574 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
575         u64 addr, unsigned int size_order, u64 type,
576         int non_present_entry_flush)
577 {
578         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
579         u64 val = 0, val_iva = 0;
580         unsigned long flag;
581
582         /*
583          * In the non-present entry flush case, if hardware doesn't cache
584          * non-present entry we do nothing and if hardware cache non-present
585          * entry, we flush entries of domain 0 (the domain id is used to cache
586          * any non-present entries)
587          */
588         if (non_present_entry_flush) {
589                 if (!cap_caching_mode(iommu->cap))
590                         return 1;
591                 else
592                         did = 0;
593         }
594
595         switch (type) {
596         case DMA_TLB_GLOBAL_FLUSH:
597                 /* global flush doesn't need set IVA_REG */
598                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
599                 break;
600         case DMA_TLB_DSI_FLUSH:
601                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
602                 break;
603         case DMA_TLB_PSI_FLUSH:
604                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
605                 /* Note: always flush non-leaf currently */
606                 val_iva = size_order | addr;
607                 break;
608         default:
609                 BUG();
610         }
611         /* Note: set drain read/write */
612 #if 0
613         /*
614          * This is probably to be super secure.. Looks like we can
615          * ignore it without any impact.
616          */
617         if (cap_read_drain(iommu->cap))
618                 val |= DMA_TLB_READ_DRAIN;
619 #endif
620         if (cap_write_drain(iommu->cap))
621                 val |= DMA_TLB_WRITE_DRAIN;
622
623         spin_lock_irqsave(&iommu->register_lock, flag);
624         /* Note: Only uses first TLB reg currently */
625         if (val_iva)
626                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
627         dmar_writeq(iommu->reg + tlb_offset + 8, val);
628
629         /* Make sure hardware complete it */
630         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
631                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
632
633         spin_unlock_irqrestore(&iommu->register_lock, flag);
634
635         /* check IOTLB invalidation granularity */
636         if (DMA_TLB_IAIG(val) == 0)
637                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
638         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
639                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
640                         (unsigned long long)DMA_TLB_IIRG(type),
641                         (unsigned long long)DMA_TLB_IAIG(val));
642         /* flush iotlb entry will implicitly flush write buffer */
643         return 0;
644 }
645
646 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
647         u64 addr, unsigned int pages, int non_present_entry_flush)
648 {
649         unsigned int mask;
650
651         BUG_ON(addr & (~VTD_PAGE_MASK));
652         BUG_ON(pages == 0);
653
654         /* Fallback to domain selective flush if no PSI support */
655         if (!cap_pgsel_inv(iommu->cap))
656                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
657                                                 DMA_TLB_DSI_FLUSH,
658                                                 non_present_entry_flush);
659
660         /*
661          * PSI requires page size to be 2 ^ x, and the base address is naturally
662          * aligned to the size
663          */
664         mask = ilog2(__roundup_pow_of_two(pages));
665         /* Fallback to domain selective flush if size is too big */
666         if (mask > cap_max_amask_val(iommu->cap))
667                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
668                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
669
670         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
671                                         DMA_TLB_PSI_FLUSH,
672                                         non_present_entry_flush);
673 }
674
675 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
676 {
677         u32 pmen;
678         unsigned long flags;
679
680         spin_lock_irqsave(&iommu->register_lock, flags);
681         pmen = readl(iommu->reg + DMAR_PMEN_REG);
682         pmen &= ~DMA_PMEN_EPM;
683         writel(pmen, iommu->reg + DMAR_PMEN_REG);
684
685         /* wait for the protected region status bit to clear */
686         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
687                 readl, !(pmen & DMA_PMEN_PRS), pmen);
688
689         spin_unlock_irqrestore(&iommu->register_lock, flags);
690 }
691
692 static int iommu_enable_translation(struct intel_iommu *iommu)
693 {
694         u32 sts;
695         unsigned long flags;
696
697         spin_lock_irqsave(&iommu->register_lock, flags);
698         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
699
700         /* Make sure hardware complete it */
701         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
702                 readl, (sts & DMA_GSTS_TES), sts);
703
704         iommu->gcmd |= DMA_GCMD_TE;
705         spin_unlock_irqrestore(&iommu->register_lock, flags);
706         return 0;
707 }
708
709 static int iommu_disable_translation(struct intel_iommu *iommu)
710 {
711         u32 sts;
712         unsigned long flag;
713
714         spin_lock_irqsave(&iommu->register_lock, flag);
715         iommu->gcmd &= ~DMA_GCMD_TE;
716         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
717
718         /* Make sure hardware complete it */
719         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
720                 readl, (!(sts & DMA_GSTS_TES)), sts);
721
722         spin_unlock_irqrestore(&iommu->register_lock, flag);
723         return 0;
724 }
725
726 /* iommu interrupt handling. Most stuff are MSI-like. */
727
728 static const char *fault_reason_strings[] =
729 {
730         "Software",
731         "Present bit in root entry is clear",
732         "Present bit in context entry is clear",
733         "Invalid context entry",
734         "Access beyond MGAW",
735         "PTE Write access is not set",
736         "PTE Read access is not set",
737         "Next page table ptr is invalid",
738         "Root table address invalid",
739         "Context table ptr is invalid",
740         "non-zero reserved fields in RTP",
741         "non-zero reserved fields in CTP",
742         "non-zero reserved fields in PTE",
743 };
744 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
745
746 const char *dmar_get_fault_reason(u8 fault_reason)
747 {
748         if (fault_reason > MAX_FAULT_REASON_IDX)
749                 return "Unknown";
750         else
751                 return fault_reason_strings[fault_reason];
752 }
753
754 void dmar_msi_unmask(unsigned int irq)
755 {
756         struct intel_iommu *iommu = get_irq_data(irq);
757         unsigned long flag;
758
759         /* unmask it */
760         spin_lock_irqsave(&iommu->register_lock, flag);
761         writel(0, iommu->reg + DMAR_FECTL_REG);
762         /* Read a reg to force flush the post write */
763         readl(iommu->reg + DMAR_FECTL_REG);
764         spin_unlock_irqrestore(&iommu->register_lock, flag);
765 }
766
767 void dmar_msi_mask(unsigned int irq)
768 {
769         unsigned long flag;
770         struct intel_iommu *iommu = get_irq_data(irq);
771
772         /* mask it */
773         spin_lock_irqsave(&iommu->register_lock, flag);
774         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
775         /* Read a reg to force flush the post write */
776         readl(iommu->reg + DMAR_FECTL_REG);
777         spin_unlock_irqrestore(&iommu->register_lock, flag);
778 }
779
780 void dmar_msi_write(int irq, struct msi_msg *msg)
781 {
782         struct intel_iommu *iommu = get_irq_data(irq);
783         unsigned long flag;
784
785         spin_lock_irqsave(&iommu->register_lock, flag);
786         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
787         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
788         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
789         spin_unlock_irqrestore(&iommu->register_lock, flag);
790 }
791
792 void dmar_msi_read(int irq, struct msi_msg *msg)
793 {
794         struct intel_iommu *iommu = get_irq_data(irq);
795         unsigned long flag;
796
797         spin_lock_irqsave(&iommu->register_lock, flag);
798         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
799         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
800         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
801         spin_unlock_irqrestore(&iommu->register_lock, flag);
802 }
803
804 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
805                 u8 fault_reason, u16 source_id, unsigned long long addr)
806 {
807         const char *reason;
808
809         reason = dmar_get_fault_reason(fault_reason);
810
811         printk(KERN_ERR
812                 "DMAR:[%s] Request device [%02x:%02x.%d] "
813                 "fault addr %llx \n"
814                 "DMAR:[fault reason %02d] %s\n",
815                 (type ? "DMA Read" : "DMA Write"),
816                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
817                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
818         return 0;
819 }
820
821 #define PRIMARY_FAULT_REG_LEN (16)
822 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
823 {
824         struct intel_iommu *iommu = dev_id;
825         int reg, fault_index;
826         u32 fault_status;
827         unsigned long flag;
828
829         spin_lock_irqsave(&iommu->register_lock, flag);
830         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
831
832         /* TBD: ignore advanced fault log currently */
833         if (!(fault_status & DMA_FSTS_PPF))
834                 goto clear_overflow;
835
836         fault_index = dma_fsts_fault_record_index(fault_status);
837         reg = cap_fault_reg_offset(iommu->cap);
838         while (1) {
839                 u8 fault_reason;
840                 u16 source_id;
841                 u64 guest_addr;
842                 int type;
843                 u32 data;
844
845                 /* highest 32 bits */
846                 data = readl(iommu->reg + reg +
847                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
848                 if (!(data & DMA_FRCD_F))
849                         break;
850
851                 fault_reason = dma_frcd_fault_reason(data);
852                 type = dma_frcd_type(data);
853
854                 data = readl(iommu->reg + reg +
855                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
856                 source_id = dma_frcd_source_id(data);
857
858                 guest_addr = dmar_readq(iommu->reg + reg +
859                                 fault_index * PRIMARY_FAULT_REG_LEN);
860                 guest_addr = dma_frcd_page_addr(guest_addr);
861                 /* clear the fault */
862                 writel(DMA_FRCD_F, iommu->reg + reg +
863                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
864
865                 spin_unlock_irqrestore(&iommu->register_lock, flag);
866
867                 iommu_page_fault_do_one(iommu, type, fault_reason,
868                                 source_id, guest_addr);
869
870                 fault_index++;
871                 if (fault_index > cap_num_fault_regs(iommu->cap))
872                         fault_index = 0;
873                 spin_lock_irqsave(&iommu->register_lock, flag);
874         }
875 clear_overflow:
876         /* clear primary fault overflow */
877         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
878         if (fault_status & DMA_FSTS_PFO)
879                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
880
881         spin_unlock_irqrestore(&iommu->register_lock, flag);
882         return IRQ_HANDLED;
883 }
884
885 int dmar_set_interrupt(struct intel_iommu *iommu)
886 {
887         int irq, ret;
888
889         irq = create_irq();
890         if (!irq) {
891                 printk(KERN_ERR "IOMMU: no free vectors\n");
892                 return -EINVAL;
893         }
894
895         set_irq_data(irq, iommu);
896         iommu->irq = irq;
897
898         ret = arch_setup_dmar_msi(irq);
899         if (ret) {
900                 set_irq_data(irq, NULL);
901                 iommu->irq = 0;
902                 destroy_irq(irq);
903                 return 0;
904         }
905
906         /* Force fault register is cleared */
907         iommu_page_fault(irq, iommu);
908
909         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
910         if (ret)
911                 printk(KERN_ERR "IOMMU: can't request irq\n");
912         return ret;
913 }
914
915 static int iommu_init_domains(struct intel_iommu *iommu)
916 {
917         unsigned long ndomains;
918         unsigned long nlongs;
919
920         ndomains = cap_ndoms(iommu->cap);
921         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
922         nlongs = BITS_TO_LONGS(ndomains);
923
924         /* TBD: there might be 64K domains,
925          * consider other allocation for future chip
926          */
927         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
928         if (!iommu->domain_ids) {
929                 printk(KERN_ERR "Allocating domain id array failed\n");
930                 return -ENOMEM;
931         }
932         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
933                         GFP_KERNEL);
934         if (!iommu->domains) {
935                 printk(KERN_ERR "Allocating domain array failed\n");
936                 kfree(iommu->domain_ids);
937                 return -ENOMEM;
938         }
939
940         spin_lock_init(&iommu->lock);
941
942         /*
943          * if Caching mode is set, then invalid translations are tagged
944          * with domainid 0. Hence we need to pre-allocate it.
945          */
946         if (cap_caching_mode(iommu->cap))
947                 set_bit(0, iommu->domain_ids);
948         return 0;
949 }
950
951
952 static void domain_exit(struct dmar_domain *domain);
953
954 void free_dmar_iommu(struct intel_iommu *iommu)
955 {
956         struct dmar_domain *domain;
957         int i;
958
959         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
960         for (; i < cap_ndoms(iommu->cap); ) {
961                 domain = iommu->domains[i];
962                 clear_bit(i, iommu->domain_ids);
963                 domain_exit(domain);
964                 i = find_next_bit(iommu->domain_ids,
965                         cap_ndoms(iommu->cap), i+1);
966         }
967
968         if (iommu->gcmd & DMA_GCMD_TE)
969                 iommu_disable_translation(iommu);
970
971         if (iommu->irq) {
972                 set_irq_data(iommu->irq, NULL);
973                 /* This will mask the irq */
974                 free_irq(iommu->irq, iommu);
975                 destroy_irq(iommu->irq);
976         }
977
978         kfree(iommu->domains);
979         kfree(iommu->domain_ids);
980
981         /* free context mapping */
982         free_context_table(iommu);
983 }
984
985 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
986 {
987         unsigned long num;
988         unsigned long ndomains;
989         struct dmar_domain *domain;
990         unsigned long flags;
991
992         domain = alloc_domain_mem();
993         if (!domain)
994                 return NULL;
995
996         ndomains = cap_ndoms(iommu->cap);
997
998         spin_lock_irqsave(&iommu->lock, flags);
999         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1000         if (num >= ndomains) {
1001                 spin_unlock_irqrestore(&iommu->lock, flags);
1002                 free_domain_mem(domain);
1003                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1004                 return NULL;
1005         }
1006
1007         set_bit(num, iommu->domain_ids);
1008         domain->id = num;
1009         domain->iommu = iommu;
1010         iommu->domains[num] = domain;
1011         spin_unlock_irqrestore(&iommu->lock, flags);
1012
1013         return domain;
1014 }
1015
1016 static void iommu_free_domain(struct dmar_domain *domain)
1017 {
1018         unsigned long flags;
1019
1020         spin_lock_irqsave(&domain->iommu->lock, flags);
1021         clear_bit(domain->id, domain->iommu->domain_ids);
1022         spin_unlock_irqrestore(&domain->iommu->lock, flags);
1023 }
1024
1025 static struct iova_domain reserved_iova_list;
1026 static struct lock_class_key reserved_alloc_key;
1027 static struct lock_class_key reserved_rbtree_key;
1028
1029 static void dmar_init_reserved_ranges(void)
1030 {
1031         struct pci_dev *pdev = NULL;
1032         struct iova *iova;
1033         int i;
1034         u64 addr, size;
1035
1036         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1037
1038         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1039                 &reserved_alloc_key);
1040         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1041                 &reserved_rbtree_key);
1042
1043         /* IOAPIC ranges shouldn't be accessed by DMA */
1044         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1045                 IOVA_PFN(IOAPIC_RANGE_END));
1046         if (!iova)
1047                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1048
1049         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1050         for_each_pci_dev(pdev) {
1051                 struct resource *r;
1052
1053                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1054                         r = &pdev->resource[i];
1055                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1056                                 continue;
1057                         addr = r->start;
1058                         addr &= PAGE_MASK;
1059                         size = r->end - addr;
1060                         size = PAGE_ALIGN(size);
1061                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1062                                 IOVA_PFN(size + addr) - 1);
1063                         if (!iova)
1064                                 printk(KERN_ERR "Reserve iova failed\n");
1065                 }
1066         }
1067
1068 }
1069
1070 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1071 {
1072         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1073 }
1074
1075 static inline int guestwidth_to_adjustwidth(int gaw)
1076 {
1077         int agaw;
1078         int r = (gaw - 12) % 9;
1079
1080         if (r == 0)
1081                 agaw = gaw;
1082         else
1083                 agaw = gaw + 9 - r;
1084         if (agaw > 64)
1085                 agaw = 64;
1086         return agaw;
1087 }
1088
1089 static int domain_init(struct dmar_domain *domain, int guest_width)
1090 {
1091         struct intel_iommu *iommu;
1092         int adjust_width, agaw;
1093         unsigned long sagaw;
1094
1095         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1096         spin_lock_init(&domain->mapping_lock);
1097
1098         domain_reserve_special_ranges(domain);
1099
1100         /* calculate AGAW */
1101         iommu = domain->iommu;
1102         if (guest_width > cap_mgaw(iommu->cap))
1103                 guest_width = cap_mgaw(iommu->cap);
1104         domain->gaw = guest_width;
1105         adjust_width = guestwidth_to_adjustwidth(guest_width);
1106         agaw = width_to_agaw(adjust_width);
1107         sagaw = cap_sagaw(iommu->cap);
1108         if (!test_bit(agaw, &sagaw)) {
1109                 /* hardware doesn't support it, choose a bigger one */
1110                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1111                 agaw = find_next_bit(&sagaw, 5, agaw);
1112                 if (agaw >= 5)
1113                         return -ENODEV;
1114         }
1115         domain->agaw = agaw;
1116         INIT_LIST_HEAD(&domain->devices);
1117
1118         /* always allocate the top pgd */
1119         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1120         if (!domain->pgd)
1121                 return -ENOMEM;
1122         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1123         return 0;
1124 }
1125
1126 static void domain_exit(struct dmar_domain *domain)
1127 {
1128         u64 end;
1129
1130         /* Domain 0 is reserved, so dont process it */
1131         if (!domain)
1132                 return;
1133
1134         domain_remove_dev_info(domain);
1135         /* destroy iovas */
1136         put_iova_domain(&domain->iovad);
1137         end = DOMAIN_MAX_ADDR(domain->gaw);
1138         end = end & (~PAGE_MASK);
1139
1140         /* clear ptes */
1141         dma_pte_clear_range(domain, 0, end);
1142
1143         /* free page tables */
1144         dma_pte_free_pagetable(domain, 0, end);
1145
1146         iommu_free_domain(domain);
1147         free_domain_mem(domain);
1148 }
1149
1150 static int domain_context_mapping_one(struct dmar_domain *domain,
1151                 u8 bus, u8 devfn)
1152 {
1153         struct context_entry *context;
1154         struct intel_iommu *iommu = domain->iommu;
1155         unsigned long flags;
1156
1157         pr_debug("Set context mapping for %02x:%02x.%d\n",
1158                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1159         BUG_ON(!domain->pgd);
1160         context = device_to_context_entry(iommu, bus, devfn);
1161         if (!context)
1162                 return -ENOMEM;
1163         spin_lock_irqsave(&iommu->lock, flags);
1164         if (context_present(*context)) {
1165                 spin_unlock_irqrestore(&iommu->lock, flags);
1166                 return 0;
1167         }
1168
1169         context_set_domain_id(*context, domain->id);
1170         context_set_address_width(*context, domain->agaw);
1171         context_set_address_root(*context, virt_to_phys(domain->pgd));
1172         context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1173         context_set_fault_enable(*context);
1174         context_set_present(*context);
1175         __iommu_flush_cache(iommu, context, sizeof(*context));
1176
1177         /* it's a non-present to present mapping */
1178         if (iommu->flush.flush_context(iommu, domain->id,
1179                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1180                 DMA_CCMD_DEVICE_INVL, 1))
1181                 iommu_flush_write_buffer(iommu);
1182         else
1183                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1184
1185         spin_unlock_irqrestore(&iommu->lock, flags);
1186         return 0;
1187 }
1188
1189 static int
1190 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1191 {
1192         int ret;
1193         struct pci_dev *tmp, *parent;
1194
1195         ret = domain_context_mapping_one(domain, pdev->bus->number,
1196                 pdev->devfn);
1197         if (ret)
1198                 return ret;
1199
1200         /* dependent device mapping */
1201         tmp = pci_find_upstream_pcie_bridge(pdev);
1202         if (!tmp)
1203                 return 0;
1204         /* Secondary interface's bus number and devfn 0 */
1205         parent = pdev->bus->self;
1206         while (parent != tmp) {
1207                 ret = domain_context_mapping_one(domain, parent->bus->number,
1208                         parent->devfn);
1209                 if (ret)
1210                         return ret;
1211                 parent = parent->bus->self;
1212         }
1213         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1214                 return domain_context_mapping_one(domain,
1215                         tmp->subordinate->number, 0);
1216         else /* this is a legacy PCI bridge */
1217                 return domain_context_mapping_one(domain,
1218                         tmp->bus->number, tmp->devfn);
1219 }
1220
1221 static int domain_context_mapped(struct dmar_domain *domain,
1222         struct pci_dev *pdev)
1223 {
1224         int ret;
1225         struct pci_dev *tmp, *parent;
1226
1227         ret = device_context_mapped(domain->iommu,
1228                 pdev->bus->number, pdev->devfn);
1229         if (!ret)
1230                 return ret;
1231         /* dependent device mapping */
1232         tmp = pci_find_upstream_pcie_bridge(pdev);
1233         if (!tmp)
1234                 return ret;
1235         /* Secondary interface's bus number and devfn 0 */
1236         parent = pdev->bus->self;
1237         while (parent != tmp) {
1238                 ret = device_context_mapped(domain->iommu, parent->bus->number,
1239                         parent->devfn);
1240                 if (!ret)
1241                         return ret;
1242                 parent = parent->bus->self;
1243         }
1244         if (tmp->is_pcie)
1245                 return device_context_mapped(domain->iommu,
1246                         tmp->subordinate->number, 0);
1247         else
1248                 return device_context_mapped(domain->iommu,
1249                         tmp->bus->number, tmp->devfn);
1250 }
1251
1252 static int
1253 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1254                         u64 hpa, size_t size, int prot)
1255 {
1256         u64 start_pfn, end_pfn;
1257         struct dma_pte *pte;
1258         int index;
1259         int addr_width = agaw_to_width(domain->agaw);
1260
1261         hpa &= (((u64)1) << addr_width) - 1;
1262
1263         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1264                 return -EINVAL;
1265         iova &= PAGE_MASK;
1266         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1267         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1268         index = 0;
1269         while (start_pfn < end_pfn) {
1270                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1271                 if (!pte)
1272                         return -ENOMEM;
1273                 /* We don't need lock here, nobody else
1274                  * touches the iova range
1275                  */
1276                 BUG_ON(dma_pte_addr(*pte));
1277                 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
1278                 dma_set_pte_prot(*pte, prot);
1279                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1280                 start_pfn++;
1281                 index++;
1282         }
1283         return 0;
1284 }
1285
1286 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1287 {
1288         clear_context_table(domain->iommu, bus, devfn);
1289         domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1290                                            DMA_CCMD_GLOBAL_INVL, 0);
1291         domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1292                                          DMA_TLB_GLOBAL_FLUSH, 0);
1293 }
1294
1295 static void domain_remove_dev_info(struct dmar_domain *domain)
1296 {
1297         struct device_domain_info *info;
1298         unsigned long flags;
1299
1300         spin_lock_irqsave(&device_domain_lock, flags);
1301         while (!list_empty(&domain->devices)) {
1302                 info = list_entry(domain->devices.next,
1303                         struct device_domain_info, link);
1304                 list_del(&info->link);
1305                 list_del(&info->global);
1306                 if (info->dev)
1307                         info->dev->dev.archdata.iommu = NULL;
1308                 spin_unlock_irqrestore(&device_domain_lock, flags);
1309
1310                 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1311                 free_devinfo_mem(info);
1312
1313                 spin_lock_irqsave(&device_domain_lock, flags);
1314         }
1315         spin_unlock_irqrestore(&device_domain_lock, flags);
1316 }
1317
1318 /*
1319  * find_domain
1320  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1321  */
1322 static struct dmar_domain *
1323 find_domain(struct pci_dev *pdev)
1324 {
1325         struct device_domain_info *info;
1326
1327         /* No lock here, assumes no domain exit in normal case */
1328         info = pdev->dev.archdata.iommu;
1329         if (info)
1330                 return info->domain;
1331         return NULL;
1332 }
1333
1334 /* domain is initialized */
1335 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1336 {
1337         struct dmar_domain *domain, *found = NULL;
1338         struct intel_iommu *iommu;
1339         struct dmar_drhd_unit *drhd;
1340         struct device_domain_info *info, *tmp;
1341         struct pci_dev *dev_tmp;
1342         unsigned long flags;
1343         int bus = 0, devfn = 0;
1344
1345         domain = find_domain(pdev);
1346         if (domain)
1347                 return domain;
1348
1349         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1350         if (dev_tmp) {
1351                 if (dev_tmp->is_pcie) {
1352                         bus = dev_tmp->subordinate->number;
1353                         devfn = 0;
1354                 } else {
1355                         bus = dev_tmp->bus->number;
1356                         devfn = dev_tmp->devfn;
1357                 }
1358                 spin_lock_irqsave(&device_domain_lock, flags);
1359                 list_for_each_entry(info, &device_domain_list, global) {
1360                         if (info->bus == bus && info->devfn == devfn) {
1361                                 found = info->domain;
1362                                 break;
1363                         }
1364                 }
1365                 spin_unlock_irqrestore(&device_domain_lock, flags);
1366                 /* pcie-pci bridge already has a domain, uses it */
1367                 if (found) {
1368                         domain = found;
1369                         goto found_domain;
1370                 }
1371         }
1372
1373         /* Allocate new domain for the device */
1374         drhd = dmar_find_matched_drhd_unit(pdev);
1375         if (!drhd) {
1376                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1377                         pci_name(pdev));
1378                 return NULL;
1379         }
1380         iommu = drhd->iommu;
1381
1382         domain = iommu_alloc_domain(iommu);
1383         if (!domain)
1384                 goto error;
1385
1386         if (domain_init(domain, gaw)) {
1387                 domain_exit(domain);
1388                 goto error;
1389         }
1390
1391         /* register pcie-to-pci device */
1392         if (dev_tmp) {
1393                 info = alloc_devinfo_mem();
1394                 if (!info) {
1395                         domain_exit(domain);
1396                         goto error;
1397                 }
1398                 info->bus = bus;
1399                 info->devfn = devfn;
1400                 info->dev = NULL;
1401                 info->domain = domain;
1402                 /* This domain is shared by devices under p2p bridge */
1403                 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1404
1405                 /* pcie-to-pci bridge already has a domain, uses it */
1406                 found = NULL;
1407                 spin_lock_irqsave(&device_domain_lock, flags);
1408                 list_for_each_entry(tmp, &device_domain_list, global) {
1409                         if (tmp->bus == bus && tmp->devfn == devfn) {
1410                                 found = tmp->domain;
1411                                 break;
1412                         }
1413                 }
1414                 if (found) {
1415                         free_devinfo_mem(info);
1416                         domain_exit(domain);
1417                         domain = found;
1418                 } else {
1419                         list_add(&info->link, &domain->devices);
1420                         list_add(&info->global, &device_domain_list);
1421                 }
1422                 spin_unlock_irqrestore(&device_domain_lock, flags);
1423         }
1424
1425 found_domain:
1426         info = alloc_devinfo_mem();
1427         if (!info)
1428                 goto error;
1429         info->bus = pdev->bus->number;
1430         info->devfn = pdev->devfn;
1431         info->dev = pdev;
1432         info->domain = domain;
1433         spin_lock_irqsave(&device_domain_lock, flags);
1434         /* somebody is fast */
1435         found = find_domain(pdev);
1436         if (found != NULL) {
1437                 spin_unlock_irqrestore(&device_domain_lock, flags);
1438                 if (found != domain) {
1439                         domain_exit(domain);
1440                         domain = found;
1441                 }
1442                 free_devinfo_mem(info);
1443                 return domain;
1444         }
1445         list_add(&info->link, &domain->devices);
1446         list_add(&info->global, &device_domain_list);
1447         pdev->dev.archdata.iommu = info;
1448         spin_unlock_irqrestore(&device_domain_lock, flags);
1449         return domain;
1450 error:
1451         /* recheck it here, maybe others set it */
1452         return find_domain(pdev);
1453 }
1454
1455 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1456                                       unsigned long long start,
1457                                       unsigned long long end)
1458 {
1459         struct dmar_domain *domain;
1460         unsigned long size;
1461         unsigned long long base;
1462         int ret;
1463
1464         printk(KERN_INFO
1465                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1466                 pci_name(pdev), start, end);
1467         /* page table init */
1468         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1469         if (!domain)
1470                 return -ENOMEM;
1471
1472         /* The address might not be aligned */
1473         base = start & PAGE_MASK;
1474         size = end - base;
1475         size = PAGE_ALIGN(size);
1476         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1477                         IOVA_PFN(base + size) - 1)) {
1478                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1479                 ret = -ENOMEM;
1480                 goto error;
1481         }
1482
1483         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1484                 size, base, pci_name(pdev));
1485         /*
1486          * RMRR range might have overlap with physical memory range,
1487          * clear it first
1488          */
1489         dma_pte_clear_range(domain, base, base + size);
1490
1491         ret = domain_page_mapping(domain, base, base, size,
1492                 DMA_PTE_READ|DMA_PTE_WRITE);
1493         if (ret)
1494                 goto error;
1495
1496         /* context entry init */
1497         ret = domain_context_mapping(domain, pdev);
1498         if (!ret)
1499                 return 0;
1500 error:
1501         domain_exit(domain);
1502         return ret;
1503
1504 }
1505
1506 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1507         struct pci_dev *pdev)
1508 {
1509         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1510                 return 0;
1511         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1512                 rmrr->end_address + 1);
1513 }
1514
1515 #ifdef CONFIG_DMAR_GFX_WA
1516 struct iommu_prepare_data {
1517         struct pci_dev *pdev;
1518         int ret;
1519 };
1520
1521 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1522                                          unsigned long end_pfn, void *datax)
1523 {
1524         struct iommu_prepare_data *data;
1525
1526         data = (struct iommu_prepare_data *)datax;
1527
1528         data->ret = iommu_prepare_identity_map(data->pdev,
1529                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1530         return data->ret;
1531
1532 }
1533
1534 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1535 {
1536         int nid;
1537         struct iommu_prepare_data data;
1538
1539         data.pdev = pdev;
1540         data.ret = 0;
1541
1542         for_each_online_node(nid) {
1543                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1544                 if (data.ret)
1545                         return data.ret;
1546         }
1547         return data.ret;
1548 }
1549
1550 static void __init iommu_prepare_gfx_mapping(void)
1551 {
1552         struct pci_dev *pdev = NULL;
1553         int ret;
1554
1555         for_each_pci_dev(pdev) {
1556                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1557                                 !IS_GFX_DEVICE(pdev))
1558                         continue;
1559                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1560                         pci_name(pdev));
1561                 ret = iommu_prepare_with_active_regions(pdev);
1562                 if (ret)
1563                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1564         }
1565 }
1566 #endif
1567
1568 #ifdef CONFIG_DMAR_FLOPPY_WA
1569 static inline void iommu_prepare_isa(void)
1570 {
1571         struct pci_dev *pdev;
1572         int ret;
1573
1574         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1575         if (!pdev)
1576                 return;
1577
1578         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1579         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1580
1581         if (ret)
1582                 printk("IOMMU: Failed to create 0-64M identity map, "
1583                         "floppy might not work\n");
1584
1585 }
1586 #else
1587 static inline void iommu_prepare_isa(void)
1588 {
1589         return;
1590 }
1591 #endif /* !CONFIG_DMAR_FLPY_WA */
1592
1593 int __init init_dmars(void)
1594 {
1595         struct dmar_drhd_unit *drhd;
1596         struct dmar_rmrr_unit *rmrr;
1597         struct pci_dev *pdev;
1598         struct intel_iommu *iommu;
1599         int i, ret, unit = 0;
1600
1601         /*
1602          * for each drhd
1603          *    allocate root
1604          *    initialize and program root entry to not present
1605          * endfor
1606          */
1607         for_each_drhd_unit(drhd) {
1608                 g_num_of_iommus++;
1609                 /*
1610                  * lock not needed as this is only incremented in the single
1611                  * threaded kernel __init code path all other access are read
1612                  * only
1613                  */
1614         }
1615
1616         deferred_flush = kzalloc(g_num_of_iommus *
1617                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1618         if (!deferred_flush) {
1619                 ret = -ENOMEM;
1620                 goto error;
1621         }
1622
1623         for_each_drhd_unit(drhd) {
1624                 if (drhd->ignored)
1625                         continue;
1626
1627                 iommu = drhd->iommu;
1628
1629                 ret = iommu_init_domains(iommu);
1630                 if (ret)
1631                         goto error;
1632
1633                 /*
1634                  * TBD:
1635                  * we could share the same root & context tables
1636                  * amoung all IOMMU's. Need to Split it later.
1637                  */
1638                 ret = iommu_alloc_root_entry(iommu);
1639                 if (ret) {
1640                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1641                         goto error;
1642                 }
1643         }
1644
1645         for_each_drhd_unit(drhd) {
1646                 if (drhd->ignored)
1647                         continue;
1648
1649                 iommu = drhd->iommu;
1650                 if (dmar_enable_qi(iommu)) {
1651                         /*
1652                          * Queued Invalidate not enabled, use Register Based
1653                          * Invalidate
1654                          */
1655                         iommu->flush.flush_context = __iommu_flush_context;
1656                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1657                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1658                                "invalidation\n", drhd->reg_base_addr);
1659                 } else {
1660                         iommu->flush.flush_context = qi_flush_context;
1661                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1662                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1663                                "invalidation\n", drhd->reg_base_addr);
1664                 }
1665         }
1666
1667         /*
1668          * For each rmrr
1669          *   for each dev attached to rmrr
1670          *   do
1671          *     locate drhd for dev, alloc domain for dev
1672          *     allocate free domain
1673          *     allocate page table entries for rmrr
1674          *     if context not allocated for bus
1675          *           allocate and init context
1676          *           set present in root table for this bus
1677          *     init context with domain, translation etc
1678          *    endfor
1679          * endfor
1680          */
1681         for_each_rmrr_units(rmrr) {
1682                 for (i = 0; i < rmrr->devices_cnt; i++) {
1683                         pdev = rmrr->devices[i];
1684                         /* some BIOS lists non-exist devices in DMAR table */
1685                         if (!pdev)
1686                                 continue;
1687                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1688                         if (ret)
1689                                 printk(KERN_ERR
1690                                  "IOMMU: mapping reserved region failed\n");
1691                 }
1692         }
1693
1694         iommu_prepare_gfx_mapping();
1695
1696         iommu_prepare_isa();
1697
1698         /*
1699          * for each drhd
1700          *   enable fault log
1701          *   global invalidate context cache
1702          *   global invalidate iotlb
1703          *   enable translation
1704          */
1705         for_each_drhd_unit(drhd) {
1706                 if (drhd->ignored)
1707                         continue;
1708                 iommu = drhd->iommu;
1709                 sprintf (iommu->name, "dmar%d", unit++);
1710
1711                 iommu_flush_write_buffer(iommu);
1712
1713                 ret = dmar_set_interrupt(iommu);
1714                 if (ret)
1715                         goto error;
1716
1717                 iommu_set_root_entry(iommu);
1718
1719                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1720                                            0);
1721                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1722                                          0);
1723                 iommu_disable_protect_mem_regions(iommu);
1724
1725                 ret = iommu_enable_translation(iommu);
1726                 if (ret)
1727                         goto error;
1728         }
1729
1730         return 0;
1731 error:
1732         for_each_drhd_unit(drhd) {
1733                 if (drhd->ignored)
1734                         continue;
1735                 iommu = drhd->iommu;
1736                 free_iommu(iommu);
1737         }
1738         return ret;
1739 }
1740
1741 static inline u64 aligned_size(u64 host_addr, size_t size)
1742 {
1743         u64 addr;
1744         addr = (host_addr & (~PAGE_MASK)) + size;
1745         return PAGE_ALIGN(addr);
1746 }
1747
1748 struct iova *
1749 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1750 {
1751         struct iova *piova;
1752
1753         /* Make sure it's in range */
1754         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1755         if (!size || (IOVA_START_ADDR + size > end))
1756                 return NULL;
1757
1758         piova = alloc_iova(&domain->iovad,
1759                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1760         return piova;
1761 }
1762
1763 static struct iova *
1764 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1765                    size_t size, u64 dma_mask)
1766 {
1767         struct pci_dev *pdev = to_pci_dev(dev);
1768         struct iova *iova = NULL;
1769
1770         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1771                 iova = iommu_alloc_iova(domain, size, dma_mask);
1772         else {
1773                 /*
1774                  * First try to allocate an io virtual address in
1775                  * DMA_32BIT_MASK and if that fails then try allocating
1776                  * from higher range
1777                  */
1778                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1779                 if (!iova)
1780                         iova = iommu_alloc_iova(domain, size, dma_mask);
1781         }
1782
1783         if (!iova) {
1784                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1785                 return NULL;
1786         }
1787
1788         return iova;
1789 }
1790
1791 static struct dmar_domain *
1792 get_valid_domain_for_dev(struct pci_dev *pdev)
1793 {
1794         struct dmar_domain *domain;
1795         int ret;
1796
1797         domain = get_domain_for_dev(pdev,
1798                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
1799         if (!domain) {
1800                 printk(KERN_ERR
1801                         "Allocating domain for %s failed", pci_name(pdev));
1802                 return NULL;
1803         }
1804
1805         /* make sure context mapping is ok */
1806         if (unlikely(!domain_context_mapped(domain, pdev))) {
1807                 ret = domain_context_mapping(domain, pdev);
1808                 if (ret) {
1809                         printk(KERN_ERR
1810                                 "Domain context map for %s failed",
1811                                 pci_name(pdev));
1812                         return NULL;
1813                 }
1814         }
1815
1816         return domain;
1817 }
1818
1819 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1820                                      size_t size, int dir, u64 dma_mask)
1821 {
1822         struct pci_dev *pdev = to_pci_dev(hwdev);
1823         struct dmar_domain *domain;
1824         phys_addr_t start_paddr;
1825         struct iova *iova;
1826         int prot = 0;
1827         int ret;
1828
1829         BUG_ON(dir == DMA_NONE);
1830         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1831                 return paddr;
1832
1833         domain = get_valid_domain_for_dev(pdev);
1834         if (!domain)
1835                 return 0;
1836
1837         size = aligned_size((u64)paddr, size);
1838
1839         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
1840         if (!iova)
1841                 goto error;
1842
1843         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
1844
1845         /*
1846          * Check if DMAR supports zero-length reads on write only
1847          * mappings..
1848          */
1849         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1850                         !cap_zlr(domain->iommu->cap))
1851                 prot |= DMA_PTE_READ;
1852         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1853                 prot |= DMA_PTE_WRITE;
1854         /*
1855          * paddr - (paddr + size) might be partial page, we should map the whole
1856          * page.  Note: if two part of one page are separately mapped, we
1857          * might have two guest_addr mapping to the same host paddr, but this
1858          * is not a big problem
1859          */
1860         ret = domain_page_mapping(domain, start_paddr,
1861                 ((u64)paddr) & PAGE_MASK, size, prot);
1862         if (ret)
1863                 goto error;
1864
1865         /* it's a non-present to present mapping */
1866         ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1867                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
1868         if (ret)
1869                 iommu_flush_write_buffer(domain->iommu);
1870
1871         return start_paddr + ((u64)paddr & (~PAGE_MASK));
1872
1873 error:
1874         if (iova)
1875                 __free_iova(&domain->iovad, iova);
1876         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1877                 pci_name(pdev), size, (unsigned long long)paddr, dir);
1878         return 0;
1879 }
1880
1881 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1882                             size_t size, int dir)
1883 {
1884         return __intel_map_single(hwdev, paddr, size, dir,
1885                                   to_pci_dev(hwdev)->dma_mask);
1886 }
1887
1888 static void flush_unmaps(void)
1889 {
1890         int i, j;
1891
1892         timer_on = 0;
1893
1894         /* just flush them all */
1895         for (i = 0; i < g_num_of_iommus; i++) {
1896                 if (deferred_flush[i].next) {
1897                         struct intel_iommu *iommu =
1898                                 deferred_flush[i].domain[0]->iommu;
1899
1900                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1901                                                  DMA_TLB_GLOBAL_FLUSH, 0);
1902                         for (j = 0; j < deferred_flush[i].next; j++) {
1903                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
1904                                                 deferred_flush[i].iova[j]);
1905                         }
1906                         deferred_flush[i].next = 0;
1907                 }
1908         }
1909
1910         list_size = 0;
1911 }
1912
1913 static void flush_unmaps_timeout(unsigned long data)
1914 {
1915         unsigned long flags;
1916
1917         spin_lock_irqsave(&async_umap_flush_lock, flags);
1918         flush_unmaps();
1919         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1920 }
1921
1922 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1923 {
1924         unsigned long flags;
1925         int next, iommu_id;
1926
1927         spin_lock_irqsave(&async_umap_flush_lock, flags);
1928         if (list_size == HIGH_WATER_MARK)
1929                 flush_unmaps();
1930
1931         iommu_id = dom->iommu->seq_id;
1932
1933         next = deferred_flush[iommu_id].next;
1934         deferred_flush[iommu_id].domain[next] = dom;
1935         deferred_flush[iommu_id].iova[next] = iova;
1936         deferred_flush[iommu_id].next++;
1937
1938         if (!timer_on) {
1939                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1940                 timer_on = 1;
1941         }
1942         list_size++;
1943         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1944 }
1945
1946 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1947                         int dir)
1948 {
1949         struct pci_dev *pdev = to_pci_dev(dev);
1950         struct dmar_domain *domain;
1951         unsigned long start_addr;
1952         struct iova *iova;
1953
1954         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1955                 return;
1956         domain = find_domain(pdev);
1957         BUG_ON(!domain);
1958
1959         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1960         if (!iova)
1961                 return;
1962
1963         start_addr = iova->pfn_lo << PAGE_SHIFT;
1964         size = aligned_size((u64)dev_addr, size);
1965
1966         pr_debug("Device %s unmapping: %lx@%llx\n",
1967                 pci_name(pdev), size, (unsigned long long)start_addr);
1968
1969         /*  clear the whole page */
1970         dma_pte_clear_range(domain, start_addr, start_addr + size);
1971         /* free page tables */
1972         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1973         if (intel_iommu_strict) {
1974                 if (iommu_flush_iotlb_psi(domain->iommu,
1975                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
1976                         iommu_flush_write_buffer(domain->iommu);
1977                 /* free iova */
1978                 __free_iova(&domain->iovad, iova);
1979         } else {
1980                 add_unmap(domain, iova);
1981                 /*
1982                  * queue up the release of the unmap to save the 1/6th of the
1983                  * cpu used up by the iotlb flush operation...
1984                  */
1985         }
1986 }
1987
1988 void *intel_alloc_coherent(struct device *hwdev, size_t size,
1989                            dma_addr_t *dma_handle, gfp_t flags)
1990 {
1991         void *vaddr;
1992         int order;
1993
1994         size = PAGE_ALIGN(size);
1995         order = get_order(size);
1996         flags &= ~(GFP_DMA | GFP_DMA32);
1997
1998         vaddr = (void *)__get_free_pages(flags, order);
1999         if (!vaddr)
2000                 return NULL;
2001         memset(vaddr, 0, size);
2002
2003         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2004                                          DMA_BIDIRECTIONAL,
2005                                          hwdev->coherent_dma_mask);
2006         if (*dma_handle)
2007                 return vaddr;
2008         free_pages((unsigned long)vaddr, order);
2009         return NULL;
2010 }
2011
2012 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2013                          dma_addr_t dma_handle)
2014 {
2015         int order;
2016
2017         size = PAGE_ALIGN(size);
2018         order = get_order(size);
2019
2020         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2021         free_pages((unsigned long)vaddr, order);
2022 }
2023
2024 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2025
2026 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2027                     int nelems, int dir)
2028 {
2029         int i;
2030         struct pci_dev *pdev = to_pci_dev(hwdev);
2031         struct dmar_domain *domain;
2032         unsigned long start_addr;
2033         struct iova *iova;
2034         size_t size = 0;
2035         void *addr;
2036         struct scatterlist *sg;
2037
2038         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2039                 return;
2040
2041         domain = find_domain(pdev);
2042
2043         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2044         if (!iova)
2045                 return;
2046         for_each_sg(sglist, sg, nelems, i) {
2047                 addr = SG_ENT_VIRT_ADDRESS(sg);
2048                 size += aligned_size((u64)addr, sg->length);
2049         }
2050
2051         start_addr = iova->pfn_lo << PAGE_SHIFT;
2052
2053         /*  clear the whole page */
2054         dma_pte_clear_range(domain, start_addr, start_addr + size);
2055         /* free page tables */
2056         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2057
2058         if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2059                         size >> VTD_PAGE_SHIFT, 0))
2060                 iommu_flush_write_buffer(domain->iommu);
2061
2062         /* free iova */
2063         __free_iova(&domain->iovad, iova);
2064 }
2065
2066 static int intel_nontranslate_map_sg(struct device *hddev,
2067         struct scatterlist *sglist, int nelems, int dir)
2068 {
2069         int i;
2070         struct scatterlist *sg;
2071
2072         for_each_sg(sglist, sg, nelems, i) {
2073                 BUG_ON(!sg_page(sg));
2074                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2075                 sg->dma_length = sg->length;
2076         }
2077         return nelems;
2078 }
2079
2080 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2081                  int dir)
2082 {
2083         void *addr;
2084         int i;
2085         struct pci_dev *pdev = to_pci_dev(hwdev);
2086         struct dmar_domain *domain;
2087         size_t size = 0;
2088         int prot = 0;
2089         size_t offset = 0;
2090         struct iova *iova = NULL;
2091         int ret;
2092         struct scatterlist *sg;
2093         unsigned long start_addr;
2094
2095         BUG_ON(dir == DMA_NONE);
2096         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2097                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2098
2099         domain = get_valid_domain_for_dev(pdev);
2100         if (!domain)
2101                 return 0;
2102
2103         for_each_sg(sglist, sg, nelems, i) {
2104                 addr = SG_ENT_VIRT_ADDRESS(sg);
2105                 addr = (void *)virt_to_phys(addr);
2106                 size += aligned_size((u64)addr, sg->length);
2107         }
2108
2109         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2110         if (!iova) {
2111                 sglist->dma_length = 0;
2112                 return 0;
2113         }
2114
2115         /*
2116          * Check if DMAR supports zero-length reads on write only
2117          * mappings..
2118          */
2119         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2120                         !cap_zlr(domain->iommu->cap))
2121                 prot |= DMA_PTE_READ;
2122         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2123                 prot |= DMA_PTE_WRITE;
2124
2125         start_addr = iova->pfn_lo << PAGE_SHIFT;
2126         offset = 0;
2127         for_each_sg(sglist, sg, nelems, i) {
2128                 addr = SG_ENT_VIRT_ADDRESS(sg);
2129                 addr = (void *)virt_to_phys(addr);
2130                 size = aligned_size((u64)addr, sg->length);
2131                 ret = domain_page_mapping(domain, start_addr + offset,
2132                         ((u64)addr) & PAGE_MASK,
2133                         size, prot);
2134                 if (ret) {
2135                         /*  clear the page */
2136                         dma_pte_clear_range(domain, start_addr,
2137                                   start_addr + offset);
2138                         /* free page tables */
2139                         dma_pte_free_pagetable(domain, start_addr,
2140                                   start_addr + offset);
2141                         /* free iova */
2142                         __free_iova(&domain->iovad, iova);
2143                         return 0;
2144                 }
2145                 sg->dma_address = start_addr + offset +
2146                                 ((u64)addr & (~PAGE_MASK));
2147                 sg->dma_length = sg->length;
2148                 offset += size;
2149         }
2150
2151         /* it's a non-present to present mapping */
2152         if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2153                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2154                 iommu_flush_write_buffer(domain->iommu);
2155         return nelems;
2156 }
2157
2158 static struct dma_mapping_ops intel_dma_ops = {
2159         .alloc_coherent = intel_alloc_coherent,
2160         .free_coherent = intel_free_coherent,
2161         .map_single = intel_map_single,
2162         .unmap_single = intel_unmap_single,
2163         .map_sg = intel_map_sg,
2164         .unmap_sg = intel_unmap_sg,
2165 };
2166
2167 static inline int iommu_domain_cache_init(void)
2168 {
2169         int ret = 0;
2170
2171         iommu_domain_cache = kmem_cache_create("iommu_domain",
2172                                          sizeof(struct dmar_domain),
2173                                          0,
2174                                          SLAB_HWCACHE_ALIGN,
2175
2176                                          NULL);
2177         if (!iommu_domain_cache) {
2178                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2179                 ret = -ENOMEM;
2180         }
2181
2182         return ret;
2183 }
2184
2185 static inline int iommu_devinfo_cache_init(void)
2186 {
2187         int ret = 0;
2188
2189         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2190                                          sizeof(struct device_domain_info),
2191                                          0,
2192                                          SLAB_HWCACHE_ALIGN,
2193                                          NULL);
2194         if (!iommu_devinfo_cache) {
2195                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2196                 ret = -ENOMEM;
2197         }
2198
2199         return ret;
2200 }
2201
2202 static inline int iommu_iova_cache_init(void)
2203 {
2204         int ret = 0;
2205
2206         iommu_iova_cache = kmem_cache_create("iommu_iova",
2207                                          sizeof(struct iova),
2208                                          0,
2209                                          SLAB_HWCACHE_ALIGN,
2210                                          NULL);
2211         if (!iommu_iova_cache) {
2212                 printk(KERN_ERR "Couldn't create iova cache\n");
2213                 ret = -ENOMEM;
2214         }
2215
2216         return ret;
2217 }
2218
2219 static int __init iommu_init_mempool(void)
2220 {
2221         int ret;
2222         ret = iommu_iova_cache_init();
2223         if (ret)
2224                 return ret;
2225
2226         ret = iommu_domain_cache_init();
2227         if (ret)
2228                 goto domain_error;
2229
2230         ret = iommu_devinfo_cache_init();
2231         if (!ret)
2232                 return ret;
2233
2234         kmem_cache_destroy(iommu_domain_cache);
2235 domain_error:
2236         kmem_cache_destroy(iommu_iova_cache);
2237
2238         return -ENOMEM;
2239 }
2240
2241 static void __init iommu_exit_mempool(void)
2242 {
2243         kmem_cache_destroy(iommu_devinfo_cache);
2244         kmem_cache_destroy(iommu_domain_cache);
2245         kmem_cache_destroy(iommu_iova_cache);
2246
2247 }
2248
2249 static void __init init_no_remapping_devices(void)
2250 {
2251         struct dmar_drhd_unit *drhd;
2252
2253         for_each_drhd_unit(drhd) {
2254                 if (!drhd->include_all) {
2255                         int i;
2256                         for (i = 0; i < drhd->devices_cnt; i++)
2257                                 if (drhd->devices[i] != NULL)
2258                                         break;
2259                         /* ignore DMAR unit if no pci devices exist */
2260                         if (i == drhd->devices_cnt)
2261                                 drhd->ignored = 1;
2262                 }
2263         }
2264
2265         if (dmar_map_gfx)
2266                 return;
2267
2268         for_each_drhd_unit(drhd) {
2269                 int i;
2270                 if (drhd->ignored || drhd->include_all)
2271                         continue;
2272
2273                 for (i = 0; i < drhd->devices_cnt; i++)
2274                         if (drhd->devices[i] &&
2275                                 !IS_GFX_DEVICE(drhd->devices[i]))
2276                                 break;
2277
2278                 if (i < drhd->devices_cnt)
2279                         continue;
2280
2281                 /* bypass IOMMU if it is just for gfx devices */
2282                 drhd->ignored = 1;
2283                 for (i = 0; i < drhd->devices_cnt; i++) {
2284                         if (!drhd->devices[i])
2285                                 continue;
2286                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2287                 }
2288         }
2289 }
2290
2291 int __init intel_iommu_init(void)
2292 {
2293         int ret = 0;
2294
2295         if (dmar_table_init())
2296                 return  -ENODEV;
2297
2298         if (dmar_dev_scope_init())
2299                 return  -ENODEV;
2300
2301         /*
2302          * Check the need for DMA-remapping initialization now.
2303          * Above initialization will also be used by Interrupt-remapping.
2304          */
2305         if (no_iommu || swiotlb || dmar_disabled)
2306                 return -ENODEV;
2307
2308         iommu_init_mempool();
2309         dmar_init_reserved_ranges();
2310
2311         init_no_remapping_devices();
2312
2313         ret = init_dmars();
2314         if (ret) {
2315                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2316                 put_iova_domain(&reserved_iova_list);
2317                 iommu_exit_mempool();
2318                 return ret;
2319         }
2320         printk(KERN_INFO
2321         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2322
2323         init_timer(&unmap_timer);
2324         force_iommu = 1;
2325         dma_ops = &intel_dma_ops;
2326         return 0;
2327 }
2328
2329 void intel_iommu_domain_exit(struct dmar_domain *domain)
2330 {
2331         u64 end;
2332
2333         /* Domain 0 is reserved, so dont process it */
2334         if (!domain)
2335                 return;
2336
2337         end = DOMAIN_MAX_ADDR(domain->gaw);
2338         end = end & (~VTD_PAGE_MASK);
2339
2340         /* clear ptes */
2341         dma_pte_clear_range(domain, 0, end);
2342
2343         /* free page tables */
2344         dma_pte_free_pagetable(domain, 0, end);
2345
2346         iommu_free_domain(domain);
2347         free_domain_mem(domain);
2348 }
2349 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2350
2351 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2352 {
2353         struct dmar_drhd_unit *drhd;
2354         struct dmar_domain *domain;
2355         struct intel_iommu *iommu;
2356
2357         drhd = dmar_find_matched_drhd_unit(pdev);
2358         if (!drhd) {
2359                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2360                 return NULL;
2361         }
2362
2363         iommu = drhd->iommu;
2364         if (!iommu) {
2365                 printk(KERN_ERR
2366                         "intel_iommu_domain_alloc: iommu == NULL\n");
2367                 return NULL;
2368         }
2369         domain = iommu_alloc_domain(iommu);
2370         if (!domain) {
2371                 printk(KERN_ERR
2372                         "intel_iommu_domain_alloc: domain == NULL\n");
2373                 return NULL;
2374         }
2375         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2376                 printk(KERN_ERR
2377                         "intel_iommu_domain_alloc: domain_init() failed\n");
2378                 intel_iommu_domain_exit(domain);
2379                 return NULL;
2380         }
2381         return domain;
2382 }
2383 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2384
2385 int intel_iommu_context_mapping(
2386         struct dmar_domain *domain, struct pci_dev *pdev)
2387 {
2388         int rc;
2389         rc = domain_context_mapping(domain, pdev);
2390         return rc;
2391 }
2392 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2393
2394 int intel_iommu_page_mapping(
2395         struct dmar_domain *domain, dma_addr_t iova,
2396         u64 hpa, size_t size, int prot)
2397 {
2398         int rc;
2399         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2400         return rc;
2401 }
2402 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2403
2404 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2405 {
2406         detach_domain_for_dev(domain, bus, devfn);
2407 }
2408 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2409
2410 struct dmar_domain *
2411 intel_iommu_find_domain(struct pci_dev *pdev)
2412 {
2413         return find_domain(pdev);
2414 }
2415 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2416
2417 int intel_iommu_found(void)
2418 {
2419         return g_num_of_iommus;
2420 }
2421 EXPORT_SYMBOL_GPL(intel_iommu_found);
2422
2423 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2424 {
2425         struct dma_pte *pte;
2426         u64 pfn;
2427
2428         pfn = 0;
2429         pte = addr_to_dma_pte(domain, iova);
2430
2431         if (pte)
2432                 pfn = dma_pte_addr(*pte);
2433
2434         return pfn >> VTD_PAGE_SHIFT;
2435 }
2436 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);