Merge branch 'linus' into x86/core
[linux-2.6] / arch / x86 / kernel / amd_iommu.c
1 /*
2  * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
3  * Author: Joerg Roedel <joerg.roedel@amd.com>
4  *         Leo Duran <leo.duran@amd.com>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
18  */
19
20 #include <linux/pci.h>
21 #include <linux/gfp.h>
22 #include <linux/bitops.h>
23 #include <linux/scatterlist.h>
24 #include <linux/iommu-helper.h>
25 #include <asm/proto.h>
26 #include <asm/gart.h>
27 #include <asm/amd_iommu_types.h>
28 #include <asm/amd_iommu.h>
29
30 #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
31
32 #define to_pages(addr, size) \
33          (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
34
35 static DEFINE_RWLOCK(amd_iommu_devtable_lock);
36
37 struct command {
38         u32 data[4];
39 };
40
41 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
42                              struct unity_map_entry *e);
43
44 static int iommu_has_npcache(struct amd_iommu *iommu)
45 {
46         return iommu->cap & IOMMU_CAP_NPCACHE;
47 }
48
49 static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
50 {
51         u32 tail, head;
52         u8 *target;
53
54         tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
55         target = (iommu->cmd_buf + tail);
56         memcpy_toio(target, cmd, sizeof(*cmd));
57         tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
58         head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
59         if (tail == head)
60                 return -ENOMEM;
61         writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
62
63         return 0;
64 }
65
66 static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
67 {
68         unsigned long flags;
69         int ret;
70
71         spin_lock_irqsave(&iommu->lock, flags);
72         ret = __iommu_queue_command(iommu, cmd);
73         spin_unlock_irqrestore(&iommu->lock, flags);
74
75         return ret;
76 }
77
78 static int iommu_completion_wait(struct amd_iommu *iommu)
79 {
80         int ret;
81         struct command cmd;
82         volatile u64 ready = 0;
83         unsigned long ready_phys = virt_to_phys(&ready);
84
85         memset(&cmd, 0, sizeof(cmd));
86         cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
87         cmd.data[1] = HIGH_U32(ready_phys);
88         cmd.data[2] = 1; /* value written to 'ready' */
89         CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
90
91         iommu->need_sync = 0;
92
93         ret = iommu_queue_command(iommu, &cmd);
94
95         if (ret)
96                 return ret;
97
98         while (!ready)
99                 cpu_relax();
100
101         return 0;
102 }
103
104 static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
105 {
106         struct command cmd;
107
108         BUG_ON(iommu == NULL);
109
110         memset(&cmd, 0, sizeof(cmd));
111         CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
112         cmd.data[0] = devid;
113
114         iommu->need_sync = 1;
115
116         return iommu_queue_command(iommu, &cmd);
117 }
118
119 static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
120                 u64 address, u16 domid, int pde, int s)
121 {
122         struct command cmd;
123
124         memset(&cmd, 0, sizeof(cmd));
125         address &= PAGE_MASK;
126         CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
127         cmd.data[1] |= domid;
128         cmd.data[2] = LOW_U32(address);
129         cmd.data[3] = HIGH_U32(address);
130         if (s)
131                 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
132         if (pde)
133                 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
134
135         iommu->need_sync = 1;
136
137         return iommu_queue_command(iommu, &cmd);
138 }
139
140 static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
141                 u64 address, size_t size)
142 {
143         int s = 0;
144         unsigned pages = to_pages(address, size);
145
146         address &= PAGE_MASK;
147
148         if (pages > 1) {
149                 /*
150                  * If we have to flush more than one page, flush all
151                  * TLB entries for this domain
152                  */
153                 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
154                 s = 1;
155         }
156
157         iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
158
159         return 0;
160 }
161
162 static int iommu_map(struct protection_domain *dom,
163                      unsigned long bus_addr,
164                      unsigned long phys_addr,
165                      int prot)
166 {
167         u64 __pte, *pte, *page;
168
169         bus_addr  = PAGE_ALIGN(bus_addr);
170         phys_addr = PAGE_ALIGN(bus_addr);
171
172         /* only support 512GB address spaces for now */
173         if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
174                 return -EINVAL;
175
176         pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
177
178         if (!IOMMU_PTE_PRESENT(*pte)) {
179                 page = (u64 *)get_zeroed_page(GFP_KERNEL);
180                 if (!page)
181                         return -ENOMEM;
182                 *pte = IOMMU_L2_PDE(virt_to_phys(page));
183         }
184
185         pte = IOMMU_PTE_PAGE(*pte);
186         pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
187
188         if (!IOMMU_PTE_PRESENT(*pte)) {
189                 page = (u64 *)get_zeroed_page(GFP_KERNEL);
190                 if (!page)
191                         return -ENOMEM;
192                 *pte = IOMMU_L1_PDE(virt_to_phys(page));
193         }
194
195         pte = IOMMU_PTE_PAGE(*pte);
196         pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
197
198         if (IOMMU_PTE_PRESENT(*pte))
199                 return -EBUSY;
200
201         __pte = phys_addr | IOMMU_PTE_P;
202         if (prot & IOMMU_PROT_IR)
203                 __pte |= IOMMU_PTE_IR;
204         if (prot & IOMMU_PROT_IW)
205                 __pte |= IOMMU_PTE_IW;
206
207         *pte = __pte;
208
209         return 0;
210 }
211
212 static int iommu_for_unity_map(struct amd_iommu *iommu,
213                                struct unity_map_entry *entry)
214 {
215         u16 bdf, i;
216
217         for (i = entry->devid_start; i <= entry->devid_end; ++i) {
218                 bdf = amd_iommu_alias_table[i];
219                 if (amd_iommu_rlookup_table[bdf] == iommu)
220                         return 1;
221         }
222
223         return 0;
224 }
225
226 static int iommu_init_unity_mappings(struct amd_iommu *iommu)
227 {
228         struct unity_map_entry *entry;
229         int ret;
230
231         list_for_each_entry(entry, &amd_iommu_unity_map, list) {
232                 if (!iommu_for_unity_map(iommu, entry))
233                         continue;
234                 ret = dma_ops_unity_map(iommu->default_dom, entry);
235                 if (ret)
236                         return ret;
237         }
238
239         return 0;
240 }
241
242 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
243                              struct unity_map_entry *e)
244 {
245         u64 addr;
246         int ret;
247
248         for (addr = e->address_start; addr < e->address_end;
249              addr += PAGE_SIZE) {
250                 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
251                 if (ret)
252                         return ret;
253                 /*
254                  * if unity mapping is in aperture range mark the page
255                  * as allocated in the aperture
256                  */
257                 if (addr < dma_dom->aperture_size)
258                         __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
259         }
260
261         return 0;
262 }
263
264 static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
265                                           u16 devid)
266 {
267         struct unity_map_entry *e;
268         int ret;
269
270         list_for_each_entry(e, &amd_iommu_unity_map, list) {
271                 if (!(devid >= e->devid_start && devid <= e->devid_end))
272                         continue;
273                 ret = dma_ops_unity_map(dma_dom, e);
274                 if (ret)
275                         return ret;
276         }
277
278         return 0;
279 }
280
281 static unsigned long dma_mask_to_pages(unsigned long mask)
282 {
283         return (mask >> PAGE_SHIFT) +
284                 (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
285 }
286
287 static unsigned long dma_ops_alloc_addresses(struct device *dev,
288                                              struct dma_ops_domain *dom,
289                                              unsigned int pages)
290 {
291         unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
292         unsigned long address;
293         unsigned long size = dom->aperture_size >> PAGE_SHIFT;
294         unsigned long boundary_size;
295
296         boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
297                         PAGE_SIZE) >> PAGE_SHIFT;
298         limit = limit < size ? limit : size;
299
300         if (dom->next_bit >= limit)
301                 dom->next_bit = 0;
302
303         address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
304                         0 , boundary_size, 0);
305         if (address == -1)
306                 address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
307                                 0, boundary_size, 0);
308
309         if (likely(address != -1)) {
310                 dom->next_bit = address + pages;
311                 address <<= PAGE_SHIFT;
312         } else
313                 address = bad_dma_address;
314
315         WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
316
317         return address;
318 }
319
320 static void dma_ops_free_addresses(struct dma_ops_domain *dom,
321                                    unsigned long address,
322                                    unsigned int pages)
323 {
324         address >>= PAGE_SHIFT;
325         iommu_area_free(dom->bitmap, address, pages);
326 }
327
328 static u16 domain_id_alloc(void)
329 {
330         unsigned long flags;
331         int id;
332
333         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
334         id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
335         BUG_ON(id == 0);
336         if (id > 0 && id < MAX_DOMAIN_ID)
337                 __set_bit(id, amd_iommu_pd_alloc_bitmap);
338         else
339                 id = 0;
340         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
341
342         return id;
343 }
344
345 static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
346                                       unsigned long start_page,
347                                       unsigned int pages)
348 {
349         unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
350
351         if (start_page + pages > last_page)
352                 pages = last_page - start_page;
353
354         set_bit_string(dom->bitmap, start_page, pages);
355 }
356
357 static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
358 {
359         int i, j;
360         u64 *p1, *p2, *p3;
361
362         p1 = dma_dom->domain.pt_root;
363
364         if (!p1)
365                 return;
366
367         for (i = 0; i < 512; ++i) {
368                 if (!IOMMU_PTE_PRESENT(p1[i]))
369                         continue;
370
371                 p2 = IOMMU_PTE_PAGE(p1[i]);
372                 for (j = 0; j < 512; ++i) {
373                         if (!IOMMU_PTE_PRESENT(p2[j]))
374                                 continue;
375                         p3 = IOMMU_PTE_PAGE(p2[j]);
376                         free_page((unsigned long)p3);
377                 }
378
379                 free_page((unsigned long)p2);
380         }
381
382         free_page((unsigned long)p1);
383 }
384
385 static void dma_ops_domain_free(struct dma_ops_domain *dom)
386 {
387         if (!dom)
388                 return;
389
390         dma_ops_free_pagetable(dom);
391
392         kfree(dom->pte_pages);
393
394         kfree(dom->bitmap);
395
396         kfree(dom);
397 }
398
399 static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
400                                                    unsigned order)
401 {
402         struct dma_ops_domain *dma_dom;
403         unsigned i, num_pte_pages;
404         u64 *l2_pde;
405         u64 address;
406
407         /*
408          * Currently the DMA aperture must be between 32 MB and 1GB in size
409          */
410         if ((order < 25) || (order > 30))
411                 return NULL;
412
413         dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
414         if (!dma_dom)
415                 return NULL;
416
417         spin_lock_init(&dma_dom->domain.lock);
418
419         dma_dom->domain.id = domain_id_alloc();
420         if (dma_dom->domain.id == 0)
421                 goto free_dma_dom;
422         dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
423         dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
424         dma_dom->domain.priv = dma_dom;
425         if (!dma_dom->domain.pt_root)
426                 goto free_dma_dom;
427         dma_dom->aperture_size = (1ULL << order);
428         dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
429                                   GFP_KERNEL);
430         if (!dma_dom->bitmap)
431                 goto free_dma_dom;
432         /*
433          * mark the first page as allocated so we never return 0 as
434          * a valid dma-address. So we can use 0 as error value
435          */
436         dma_dom->bitmap[0] = 1;
437         dma_dom->next_bit = 0;
438
439         if (iommu->exclusion_start &&
440             iommu->exclusion_start < dma_dom->aperture_size) {
441                 unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
442                 int pages = to_pages(iommu->exclusion_start,
443                                 iommu->exclusion_length);
444                 dma_ops_reserve_addresses(dma_dom, startpage, pages);
445         }
446
447         num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
448         dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
449                         GFP_KERNEL);
450         if (!dma_dom->pte_pages)
451                 goto free_dma_dom;
452
453         l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
454         if (l2_pde == NULL)
455                 goto free_dma_dom;
456
457         dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
458
459         for (i = 0; i < num_pte_pages; ++i) {
460                 dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
461                 if (!dma_dom->pte_pages[i])
462                         goto free_dma_dom;
463                 address = virt_to_phys(dma_dom->pte_pages[i]);
464                 l2_pde[i] = IOMMU_L1_PDE(address);
465         }
466
467         return dma_dom;
468
469 free_dma_dom:
470         dma_ops_domain_free(dma_dom);
471
472         return NULL;
473 }
474
475 static struct protection_domain *domain_for_device(u16 devid)
476 {
477         struct protection_domain *dom;
478         unsigned long flags;
479
480         read_lock_irqsave(&amd_iommu_devtable_lock, flags);
481         dom = amd_iommu_pd_table[devid];
482         read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
483
484         return dom;
485 }
486
487 static void set_device_domain(struct amd_iommu *iommu,
488                               struct protection_domain *domain,
489                               u16 devid)
490 {
491         unsigned long flags;
492
493         u64 pte_root = virt_to_phys(domain->pt_root);
494
495         pte_root |= (domain->mode & 0x07) << 9;
496         pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
497
498         write_lock_irqsave(&amd_iommu_devtable_lock, flags);
499         amd_iommu_dev_table[devid].data[0] = pte_root;
500         amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
501         amd_iommu_dev_table[devid].data[2] = domain->id;
502
503         amd_iommu_pd_table[devid] = domain;
504         write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
505
506         iommu_queue_inv_dev_entry(iommu, devid);
507
508         iommu->need_sync = 1;
509 }
510
511 static int get_device_resources(struct device *dev,
512                                 struct amd_iommu **iommu,
513                                 struct protection_domain **domain,
514                                 u16 *bdf)
515 {
516         struct dma_ops_domain *dma_dom;
517         struct pci_dev *pcidev;
518         u16 _bdf;
519
520         BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
521
522         pcidev = to_pci_dev(dev);
523         _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
524
525         if (_bdf >= amd_iommu_last_bdf) {
526                 *iommu = NULL;
527                 *domain = NULL;
528                 *bdf = 0xffff;
529                 return 0;
530         }
531
532         *bdf = amd_iommu_alias_table[_bdf];
533
534         *iommu = amd_iommu_rlookup_table[*bdf];
535         if (*iommu == NULL)
536                 return 0;
537         dma_dom = (*iommu)->default_dom;
538         *domain = domain_for_device(*bdf);
539         if (*domain == NULL) {
540                 *domain = &dma_dom->domain;
541                 set_device_domain(*iommu, *domain, *bdf);
542                 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
543                                 "device ", (*domain)->id);
544                 print_devid(_bdf, 1);
545         }
546
547         return 1;
548 }
549
550 static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
551                                      struct dma_ops_domain *dom,
552                                      unsigned long address,
553                                      phys_addr_t paddr,
554                                      int direction)
555 {
556         u64 *pte, __pte;
557
558         WARN_ON(address > dom->aperture_size);
559
560         paddr &= PAGE_MASK;
561
562         pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
563         pte += IOMMU_PTE_L0_INDEX(address);
564
565         __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
566
567         if (direction == DMA_TO_DEVICE)
568                 __pte |= IOMMU_PTE_IR;
569         else if (direction == DMA_FROM_DEVICE)
570                 __pte |= IOMMU_PTE_IW;
571         else if (direction == DMA_BIDIRECTIONAL)
572                 __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
573
574         WARN_ON(*pte);
575
576         *pte = __pte;
577
578         return (dma_addr_t)address;
579 }
580
581 static void dma_ops_domain_unmap(struct amd_iommu *iommu,
582                                  struct dma_ops_domain *dom,
583                                  unsigned long address)
584 {
585         u64 *pte;
586
587         if (address >= dom->aperture_size)
588                 return;
589
590         WARN_ON(address & 0xfffULL || address > dom->aperture_size);
591
592         pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
593         pte += IOMMU_PTE_L0_INDEX(address);
594
595         WARN_ON(!*pte);
596
597         *pte = 0ULL;
598 }
599
600 static dma_addr_t __map_single(struct device *dev,
601                                struct amd_iommu *iommu,
602                                struct dma_ops_domain *dma_dom,
603                                phys_addr_t paddr,
604                                size_t size,
605                                int dir)
606 {
607         dma_addr_t offset = paddr & ~PAGE_MASK;
608         dma_addr_t address, start;
609         unsigned int pages;
610         int i;
611
612         pages = to_pages(paddr, size);
613         paddr &= PAGE_MASK;
614
615         address = dma_ops_alloc_addresses(dev, dma_dom, pages);
616         if (unlikely(address == bad_dma_address))
617                 goto out;
618
619         start = address;
620         for (i = 0; i < pages; ++i) {
621                 dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
622                 paddr += PAGE_SIZE;
623                 start += PAGE_SIZE;
624         }
625         address += offset;
626
627 out:
628         return address;
629 }
630
631 static void __unmap_single(struct amd_iommu *iommu,
632                            struct dma_ops_domain *dma_dom,
633                            dma_addr_t dma_addr,
634                            size_t size,
635                            int dir)
636 {
637         dma_addr_t i, start;
638         unsigned int pages;
639
640         if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
641                 return;
642
643         pages = to_pages(dma_addr, size);
644         dma_addr &= PAGE_MASK;
645         start = dma_addr;
646
647         for (i = 0; i < pages; ++i) {
648                 dma_ops_domain_unmap(iommu, dma_dom, start);
649                 start += PAGE_SIZE;
650         }
651
652         dma_ops_free_addresses(dma_dom, dma_addr, pages);
653 }
654
655 static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
656                              size_t size, int dir)
657 {
658         unsigned long flags;
659         struct amd_iommu *iommu;
660         struct protection_domain *domain;
661         u16 devid;
662         dma_addr_t addr;
663
664         get_device_resources(dev, &iommu, &domain, &devid);
665
666         if (iommu == NULL || domain == NULL)
667                 return (dma_addr_t)paddr;
668
669         spin_lock_irqsave(&domain->lock, flags);
670         addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
671         if (addr == bad_dma_address)
672                 goto out;
673
674         if (iommu_has_npcache(iommu))
675                 iommu_flush_pages(iommu, domain->id, addr, size);
676
677         if (iommu->need_sync)
678                 iommu_completion_wait(iommu);
679
680 out:
681         spin_unlock_irqrestore(&domain->lock, flags);
682
683         return addr;
684 }
685
686 static void unmap_single(struct device *dev, dma_addr_t dma_addr,
687                          size_t size, int dir)
688 {
689         unsigned long flags;
690         struct amd_iommu *iommu;
691         struct protection_domain *domain;
692         u16 devid;
693
694         if (!get_device_resources(dev, &iommu, &domain, &devid))
695                 return;
696
697         spin_lock_irqsave(&domain->lock, flags);
698
699         __unmap_single(iommu, domain->priv, dma_addr, size, dir);
700
701         iommu_flush_pages(iommu, domain->id, dma_addr, size);
702
703         if (iommu->need_sync)
704                 iommu_completion_wait(iommu);
705
706         spin_unlock_irqrestore(&domain->lock, flags);
707 }
708
709 static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
710                            int nelems, int dir)
711 {
712         struct scatterlist *s;
713         int i;
714
715         for_each_sg(sglist, s, nelems, i) {
716                 s->dma_address = (dma_addr_t)sg_phys(s);
717                 s->dma_length  = s->length;
718         }
719
720         return nelems;
721 }
722
723 static int map_sg(struct device *dev, struct scatterlist *sglist,
724                   int nelems, int dir)
725 {
726         unsigned long flags;
727         struct amd_iommu *iommu;
728         struct protection_domain *domain;
729         u16 devid;
730         int i;
731         struct scatterlist *s;
732         phys_addr_t paddr;
733         int mapped_elems = 0;
734
735         get_device_resources(dev, &iommu, &domain, &devid);
736
737         if (!iommu || !domain)
738                 return map_sg_no_iommu(dev, sglist, nelems, dir);
739
740         spin_lock_irqsave(&domain->lock, flags);
741
742         for_each_sg(sglist, s, nelems, i) {
743                 paddr = sg_phys(s);
744
745                 s->dma_address = __map_single(dev, iommu, domain->priv,
746                                               paddr, s->length, dir);
747
748                 if (s->dma_address) {
749                         s->dma_length = s->length;
750                         mapped_elems++;
751                 } else
752                         goto unmap;
753                 if (iommu_has_npcache(iommu))
754                         iommu_flush_pages(iommu, domain->id, s->dma_address,
755                                           s->dma_length);
756         }
757
758         if (iommu->need_sync)
759                 iommu_completion_wait(iommu);
760
761 out:
762         spin_unlock_irqrestore(&domain->lock, flags);
763
764         return mapped_elems;
765 unmap:
766         for_each_sg(sglist, s, mapped_elems, i) {
767                 if (s->dma_address)
768                         __unmap_single(iommu, domain->priv, s->dma_address,
769                                        s->dma_length, dir);
770                 s->dma_address = s->dma_length = 0;
771         }
772
773         mapped_elems = 0;
774
775         goto out;
776 }
777
778 static void unmap_sg(struct device *dev, struct scatterlist *sglist,
779                      int nelems, int dir)
780 {
781         unsigned long flags;
782         struct amd_iommu *iommu;
783         struct protection_domain *domain;
784         struct scatterlist *s;
785         u16 devid;
786         int i;
787
788         if (!get_device_resources(dev, &iommu, &domain, &devid))
789                 return;
790
791         spin_lock_irqsave(&domain->lock, flags);
792
793         for_each_sg(sglist, s, nelems, i) {
794                 __unmap_single(iommu, domain->priv, s->dma_address,
795                                s->dma_length, dir);
796                 iommu_flush_pages(iommu, domain->id, s->dma_address,
797                                   s->dma_length);
798                 s->dma_address = s->dma_length = 0;
799         }
800
801         if (iommu->need_sync)
802                 iommu_completion_wait(iommu);
803
804         spin_unlock_irqrestore(&domain->lock, flags);
805 }
806
807 static void *alloc_coherent(struct device *dev, size_t size,
808                             dma_addr_t *dma_addr, gfp_t flag)
809 {
810         unsigned long flags;
811         void *virt_addr;
812         struct amd_iommu *iommu;
813         struct protection_domain *domain;
814         u16 devid;
815         phys_addr_t paddr;
816
817         virt_addr = (void *)__get_free_pages(flag, get_order(size));
818         if (!virt_addr)
819                 return 0;
820
821         memset(virt_addr, 0, size);
822         paddr = virt_to_phys(virt_addr);
823
824         get_device_resources(dev, &iommu, &domain, &devid);
825
826         if (!iommu || !domain) {
827                 *dma_addr = (dma_addr_t)paddr;
828                 return virt_addr;
829         }
830
831         spin_lock_irqsave(&domain->lock, flags);
832
833         *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
834                                  size, DMA_BIDIRECTIONAL);
835
836         if (*dma_addr == bad_dma_address) {
837                 free_pages((unsigned long)virt_addr, get_order(size));
838                 virt_addr = NULL;
839                 goto out;
840         }
841
842         if (iommu_has_npcache(iommu))
843                 iommu_flush_pages(iommu, domain->id, *dma_addr, size);
844
845         if (iommu->need_sync)
846                 iommu_completion_wait(iommu);
847
848 out:
849         spin_unlock_irqrestore(&domain->lock, flags);
850
851         return virt_addr;
852 }
853
854 static void free_coherent(struct device *dev, size_t size,
855                           void *virt_addr, dma_addr_t dma_addr)
856 {
857         unsigned long flags;
858         struct amd_iommu *iommu;
859         struct protection_domain *domain;
860         u16 devid;
861
862         get_device_resources(dev, &iommu, &domain, &devid);
863
864         if (!iommu || !domain)
865                 goto free_mem;
866
867         spin_lock_irqsave(&domain->lock, flags);
868
869         __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
870         iommu_flush_pages(iommu, domain->id, dma_addr, size);
871
872         if (iommu->need_sync)
873                 iommu_completion_wait(iommu);
874
875         spin_unlock_irqrestore(&domain->lock, flags);
876
877 free_mem:
878         free_pages((unsigned long)virt_addr, get_order(size));
879 }
880
881 /*
882  * If the driver core informs the DMA layer if a driver grabs a device
883  * we don't need to preallocate the protection domains anymore.
884  * For now we have to.
885  */
886 void prealloc_protection_domains(void)
887 {
888         struct pci_dev *dev = NULL;
889         struct dma_ops_domain *dma_dom;
890         struct amd_iommu *iommu;
891         int order = amd_iommu_aperture_order;
892         u16 devid;
893
894         while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
895                 devid = (dev->bus->number << 8) | dev->devfn;
896                 if (devid >= amd_iommu_last_bdf)
897                         continue;
898                 devid = amd_iommu_alias_table[devid];
899                 if (domain_for_device(devid))
900                         continue;
901                 iommu = amd_iommu_rlookup_table[devid];
902                 if (!iommu)
903                         continue;
904                 dma_dom = dma_ops_domain_alloc(iommu, order);
905                 if (!dma_dom)
906                         continue;
907                 init_unity_mappings_for_device(dma_dom, devid);
908                 set_device_domain(iommu, &dma_dom->domain, devid);
909                 printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
910                        dma_dom->domain.id);
911                 print_devid(devid, 1);
912         }
913 }
914
915 static struct dma_mapping_ops amd_iommu_dma_ops = {
916         .alloc_coherent = alloc_coherent,
917         .free_coherent = free_coherent,
918         .map_single = map_single,
919         .unmap_single = unmap_single,
920         .map_sg = map_sg,
921         .unmap_sg = unmap_sg,
922 };
923
924 int __init amd_iommu_init_dma_ops(void)
925 {
926         struct amd_iommu *iommu;
927         int order = amd_iommu_aperture_order;
928         int ret;
929
930         list_for_each_entry(iommu, &amd_iommu_list, list) {
931                 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
932                 if (iommu->default_dom == NULL)
933                         return -ENOMEM;
934                 ret = iommu_init_unity_mappings(iommu);
935                 if (ret)
936                         goto free_domains;
937         }
938
939         if (amd_iommu_isolate)
940                 prealloc_protection_domains();
941
942         iommu_detected = 1;
943         force_iommu = 1;
944         bad_dma_address = 0;
945 #ifdef CONFIG_GART_IOMMU
946         gart_iommu_aperture_disabled = 1;
947         gart_iommu_aperture = 0;
948 #endif
949
950         dma_ops = &amd_iommu_dma_ops;
951
952         return 0;
953
954 free_domains:
955
956         list_for_each_entry(iommu, &amd_iommu_list, list) {
957                 if (iommu->default_dom)
958                         dma_ops_domain_free(iommu->default_dom);
959         }
960
961         return ret;
962 }