Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux...
[linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
177         if (!dmaru)
178                 return -ENOMEM;
179
180         dmaru->hdr = header;
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184
185         ret = alloc_iommu(dmaru);
186         if (ret) {
187                 kfree(dmaru);
188                 return ret;
189         }
190         dmar_register_drhd_unit(dmaru);
191         return 0;
192 }
193
194 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
195 {
196         struct acpi_dmar_hardware_unit *drhd;
197         int ret = 0;
198
199         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
200
201         if (dmaru->include_all)
202                 return 0;
203
204         ret = dmar_parse_dev_scope((void *)(drhd + 1),
205                                 ((void *)drhd) + drhd->header.length,
206                                 &dmaru->devices_cnt, &dmaru->devices,
207                                 drhd->segment);
208         if (ret) {
209                 list_del(&dmaru->list);
210                 kfree(dmaru);
211         }
212         return ret;
213 }
214
215 #ifdef CONFIG_DMAR
216 LIST_HEAD(dmar_rmrr_units);
217
218 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
219 {
220         list_add(&rmrr->list, &dmar_rmrr_units);
221 }
222
223
224 static int __init
225 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
226 {
227         struct acpi_dmar_reserved_memory *rmrr;
228         struct dmar_rmrr_unit *rmrru;
229
230         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
231         if (!rmrru)
232                 return -ENOMEM;
233
234         rmrru->hdr = header;
235         rmrr = (struct acpi_dmar_reserved_memory *)header;
236         rmrru->base_address = rmrr->base_address;
237         rmrru->end_address = rmrr->end_address;
238
239         dmar_register_rmrr_unit(rmrru);
240         return 0;
241 }
242
243 static int __init
244 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
245 {
246         struct acpi_dmar_reserved_memory *rmrr;
247         int ret;
248
249         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
250         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
251                 ((void *)rmrr) + rmrr->header.length,
252                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
253
254         if (ret || (rmrru->devices_cnt == 0)) {
255                 list_del(&rmrru->list);
256                 kfree(rmrru);
257         }
258         return ret;
259 }
260 #endif
261
262 static void __init
263 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
264 {
265         struct acpi_dmar_hardware_unit *drhd;
266         struct acpi_dmar_reserved_memory *rmrr;
267
268         switch (header->type) {
269         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
270                 drhd = (struct acpi_dmar_hardware_unit *)header;
271                 printk (KERN_INFO PREFIX
272                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
273                         drhd->flags, (unsigned long long)drhd->address);
274                 break;
275         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
276                 rmrr = (struct acpi_dmar_reserved_memory *)header;
277
278                 printk (KERN_INFO PREFIX
279                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
280                         (unsigned long long)rmrr->base_address,
281                         (unsigned long long)rmrr->end_address);
282                 break;
283         }
284 }
285
286 /**
287  * dmar_table_detect - checks to see if the platform supports DMAR devices
288  */
289 static int __init dmar_table_detect(void)
290 {
291         acpi_status status = AE_OK;
292
293         /* if we could find DMAR table, then there are DMAR devices */
294         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
295                                 (struct acpi_table_header **)&dmar_tbl,
296                                 &dmar_tbl_size);
297
298         if (ACPI_SUCCESS(status) && !dmar_tbl) {
299                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
300                 status = AE_NOT_FOUND;
301         }
302
303         return (ACPI_SUCCESS(status) ? 1 : 0);
304 }
305
306 /**
307  * parse_dmar_table - parses the DMA reporting table
308  */
309 static int __init
310 parse_dmar_table(void)
311 {
312         struct acpi_table_dmar *dmar;
313         struct acpi_dmar_header *entry_header;
314         int ret = 0;
315
316         /*
317          * Do it again, earlier dmar_tbl mapping could be mapped with
318          * fixed map.
319          */
320         dmar_table_detect();
321
322         dmar = (struct acpi_table_dmar *)dmar_tbl;
323         if (!dmar)
324                 return -ENODEV;
325
326         if (dmar->width < PAGE_SHIFT - 1) {
327                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
328                 return -EINVAL;
329         }
330
331         printk (KERN_INFO PREFIX "Host address width %d\n",
332                 dmar->width + 1);
333
334         entry_header = (struct acpi_dmar_header *)(dmar + 1);
335         while (((unsigned long)entry_header) <
336                         (((unsigned long)dmar) + dmar_tbl->length)) {
337                 /* Avoid looping forever on bad ACPI tables */
338                 if (entry_header->length == 0) {
339                         printk(KERN_WARNING PREFIX
340                                 "Invalid 0-length structure\n");
341                         ret = -EINVAL;
342                         break;
343                 }
344
345                 dmar_table_print_dmar_entry(entry_header);
346
347                 switch (entry_header->type) {
348                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
349                         ret = dmar_parse_one_drhd(entry_header);
350                         break;
351                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
352 #ifdef CONFIG_DMAR
353                         ret = dmar_parse_one_rmrr(entry_header);
354 #endif
355                         break;
356                 default:
357                         printk(KERN_WARNING PREFIX
358                                 "Unknown DMAR structure type\n");
359                         ret = 0; /* for forward compatibility */
360                         break;
361                 }
362                 if (ret)
363                         break;
364
365                 entry_header = ((void *)entry_header + entry_header->length);
366         }
367         return ret;
368 }
369
370 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
371                           struct pci_dev *dev)
372 {
373         int index;
374
375         while (dev) {
376                 for (index = 0; index < cnt; index++)
377                         if (dev == devices[index])
378                                 return 1;
379
380                 /* Check our parent */
381                 dev = dev->bus->self;
382         }
383
384         return 0;
385 }
386
387 struct dmar_drhd_unit *
388 dmar_find_matched_drhd_unit(struct pci_dev *dev)
389 {
390         struct dmar_drhd_unit *dmaru = NULL;
391         struct acpi_dmar_hardware_unit *drhd;
392
393         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
394                 drhd = container_of(dmaru->hdr,
395                                     struct acpi_dmar_hardware_unit,
396                                     header);
397
398                 if (dmaru->include_all &&
399                     drhd->segment == pci_domain_nr(dev->bus))
400                         return dmaru;
401
402                 if (dmar_pci_device_match(dmaru->devices,
403                                           dmaru->devices_cnt, dev))
404                         return dmaru;
405         }
406
407         return NULL;
408 }
409
410 int __init dmar_dev_scope_init(void)
411 {
412         struct dmar_drhd_unit *drhd, *drhd_n;
413         int ret = -ENODEV;
414
415         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
416                 ret = dmar_parse_dev(drhd);
417                 if (ret)
418                         return ret;
419         }
420
421 #ifdef CONFIG_DMAR
422         {
423                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
424                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
425                         ret = rmrr_parse_dev(rmrr);
426                         if (ret)
427                                 return ret;
428                 }
429         }
430 #endif
431
432         return ret;
433 }
434
435
436 int __init dmar_table_init(void)
437 {
438         static int dmar_table_initialized;
439         int ret;
440
441         if (dmar_table_initialized)
442                 return 0;
443
444         dmar_table_initialized = 1;
445
446         ret = parse_dmar_table();
447         if (ret) {
448                 if (ret != -ENODEV)
449                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
450                 return ret;
451         }
452
453         if (list_empty(&dmar_drhd_units)) {
454                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
455                 return -ENODEV;
456         }
457
458 #ifdef CONFIG_DMAR
459         if (list_empty(&dmar_rmrr_units))
460                 printk(KERN_INFO PREFIX "No RMRR found\n");
461 #endif
462
463 #ifdef CONFIG_INTR_REMAP
464         parse_ioapics_under_ir();
465 #endif
466         return 0;
467 }
468
469 void __init detect_intel_iommu(void)
470 {
471         int ret;
472
473         ret = dmar_table_detect();
474
475         {
476 #ifdef CONFIG_INTR_REMAP
477                 struct acpi_table_dmar *dmar;
478                 /*
479                  * for now we will disable dma-remapping when interrupt
480                  * remapping is enabled.
481                  * When support for queued invalidation for IOTLB invalidation
482                  * is added, we will not need this any more.
483                  */
484                 dmar = (struct acpi_table_dmar *) dmar_tbl;
485                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
486                         printk(KERN_INFO
487                                "Queued invalidation will be enabled to support "
488                                "x2apic and Intr-remapping.\n");
489 #endif
490 #ifdef CONFIG_DMAR
491                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
492                     !dmar_disabled)
493                         iommu_detected = 1;
494 #endif
495         }
496         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
497         dmar_tbl = NULL;
498 }
499
500
501 int alloc_iommu(struct dmar_drhd_unit *drhd)
502 {
503         struct intel_iommu *iommu;
504         int map_size;
505         u32 ver;
506         static int iommu_allocated = 0;
507         int agaw = 0;
508
509         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
510         if (!iommu)
511                 return -ENOMEM;
512
513         iommu->seq_id = iommu_allocated++;
514         sprintf (iommu->name, "dmar%d", iommu->seq_id);
515
516         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
517         if (!iommu->reg) {
518                 printk(KERN_ERR "IOMMU: can't map the region\n");
519                 goto error;
520         }
521         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
522         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
523
524 #ifdef CONFIG_DMAR
525         agaw = iommu_calculate_agaw(iommu);
526         if (agaw < 0) {
527                 printk(KERN_ERR
528                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
529                         iommu->seq_id);
530                 goto error;
531         }
532 #endif
533         iommu->agaw = agaw;
534
535         /* the registers might be more than one page */
536         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
537                 cap_max_fault_reg_offset(iommu->cap));
538         map_size = VTD_PAGE_ALIGN(map_size);
539         if (map_size > VTD_PAGE_SIZE) {
540                 iounmap(iommu->reg);
541                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
542                 if (!iommu->reg) {
543                         printk(KERN_ERR "IOMMU: can't map the region\n");
544                         goto error;
545                 }
546         }
547
548         ver = readl(iommu->reg + DMAR_VER_REG);
549         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
550                 (unsigned long long)drhd->reg_base_addr,
551                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
552                 (unsigned long long)iommu->cap,
553                 (unsigned long long)iommu->ecap);
554
555         spin_lock_init(&iommu->register_lock);
556
557         drhd->iommu = iommu;
558         return 0;
559 error:
560         kfree(iommu);
561         return -1;
562 }
563
564 void free_iommu(struct intel_iommu *iommu)
565 {
566         if (!iommu)
567                 return;
568
569 #ifdef CONFIG_DMAR
570         free_dmar_iommu(iommu);
571 #endif
572
573         if (iommu->reg)
574                 iounmap(iommu->reg);
575         kfree(iommu);
576 }
577
578 /*
579  * Reclaim all the submitted descriptors which have completed its work.
580  */
581 static inline void reclaim_free_desc(struct q_inval *qi)
582 {
583         while (qi->desc_status[qi->free_tail] == QI_DONE) {
584                 qi->desc_status[qi->free_tail] = QI_FREE;
585                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
586                 qi->free_cnt++;
587         }
588 }
589
590 static int qi_check_fault(struct intel_iommu *iommu, int index)
591 {
592         u32 fault;
593         int head;
594         struct q_inval *qi = iommu->qi;
595         int wait_index = (index + 1) % QI_LENGTH;
596
597         fault = readl(iommu->reg + DMAR_FSTS_REG);
598
599         /*
600          * If IQE happens, the head points to the descriptor associated
601          * with the error. No new descriptors are fetched until the IQE
602          * is cleared.
603          */
604         if (fault & DMA_FSTS_IQE) {
605                 head = readl(iommu->reg + DMAR_IQH_REG);
606                 if ((head >> 4) == index) {
607                         memcpy(&qi->desc[index], &qi->desc[wait_index],
608                                         sizeof(struct qi_desc));
609                         __iommu_flush_cache(iommu, &qi->desc[index],
610                                         sizeof(struct qi_desc));
611                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
612                         return -EINVAL;
613                 }
614         }
615
616         return 0;
617 }
618
619 /*
620  * Submit the queued invalidation descriptor to the remapping
621  * hardware unit and wait for its completion.
622  */
623 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
624 {
625         int rc = 0;
626         struct q_inval *qi = iommu->qi;
627         struct qi_desc *hw, wait_desc;
628         int wait_index, index;
629         unsigned long flags;
630
631         if (!qi)
632                 return 0;
633
634         hw = qi->desc;
635
636         spin_lock_irqsave(&qi->q_lock, flags);
637         while (qi->free_cnt < 3) {
638                 spin_unlock_irqrestore(&qi->q_lock, flags);
639                 cpu_relax();
640                 spin_lock_irqsave(&qi->q_lock, flags);
641         }
642
643         index = qi->free_head;
644         wait_index = (index + 1) % QI_LENGTH;
645
646         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
647
648         hw[index] = *desc;
649
650         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
651                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
652         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
653
654         hw[wait_index] = wait_desc;
655
656         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
657         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
658
659         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
660         qi->free_cnt -= 2;
661
662         /*
663          * update the HW tail register indicating the presence of
664          * new descriptors.
665          */
666         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
667
668         while (qi->desc_status[wait_index] != QI_DONE) {
669                 /*
670                  * We will leave the interrupts disabled, to prevent interrupt
671                  * context to queue another cmd while a cmd is already submitted
672                  * and waiting for completion on this cpu. This is to avoid
673                  * a deadlock where the interrupt context can wait indefinitely
674                  * for free slots in the queue.
675                  */
676                 rc = qi_check_fault(iommu, index);
677                 if (rc)
678                         goto out;
679
680                 spin_unlock(&qi->q_lock);
681                 cpu_relax();
682                 spin_lock(&qi->q_lock);
683         }
684 out:
685         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
686
687         reclaim_free_desc(qi);
688         spin_unlock_irqrestore(&qi->q_lock, flags);
689
690         return rc;
691 }
692
693 /*
694  * Flush the global interrupt entry cache.
695  */
696 void qi_global_iec(struct intel_iommu *iommu)
697 {
698         struct qi_desc desc;
699
700         desc.low = QI_IEC_TYPE;
701         desc.high = 0;
702
703         /* should never fail */
704         qi_submit_sync(&desc, iommu);
705 }
706
707 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
708                      u64 type, int non_present_entry_flush)
709 {
710         struct qi_desc desc;
711
712         if (non_present_entry_flush) {
713                 if (!cap_caching_mode(iommu->cap))
714                         return 1;
715                 else
716                         did = 0;
717         }
718
719         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
720                         | QI_CC_GRAN(type) | QI_CC_TYPE;
721         desc.high = 0;
722
723         return qi_submit_sync(&desc, iommu);
724 }
725
726 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
727                    unsigned int size_order, u64 type,
728                    int non_present_entry_flush)
729 {
730         u8 dw = 0, dr = 0;
731
732         struct qi_desc desc;
733         int ih = 0;
734
735         if (non_present_entry_flush) {
736                 if (!cap_caching_mode(iommu->cap))
737                         return 1;
738                 else
739                         did = 0;
740         }
741
742         if (cap_write_drain(iommu->cap))
743                 dw = 1;
744
745         if (cap_read_drain(iommu->cap))
746                 dr = 1;
747
748         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
749                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
750         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
751                 | QI_IOTLB_AM(size_order);
752
753         return qi_submit_sync(&desc, iommu);
754 }
755
756 /*
757  * Disable Queued Invalidation interface.
758  */
759 void dmar_disable_qi(struct intel_iommu *iommu)
760 {
761         unsigned long flags;
762         u32 sts;
763         cycles_t start_time = get_cycles();
764
765         if (!ecap_qis(iommu->ecap))
766                 return;
767
768         spin_lock_irqsave(&iommu->register_lock, flags);
769
770         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
771         if (!(sts & DMA_GSTS_QIES))
772                 goto end;
773
774         /*
775          * Give a chance to HW to complete the pending invalidation requests.
776          */
777         while ((readl(iommu->reg + DMAR_IQT_REG) !=
778                 readl(iommu->reg + DMAR_IQH_REG)) &&
779                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
780                 cpu_relax();
781
782         iommu->gcmd &= ~DMA_GCMD_QIE;
783
784         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
785
786         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
787                       !(sts & DMA_GSTS_QIES), sts);
788 end:
789         spin_unlock_irqrestore(&iommu->register_lock, flags);
790 }
791
792 /*
793  * Enable Queued Invalidation interface. This is a must to support
794  * interrupt-remapping. Also used by DMA-remapping, which replaces
795  * register based IOTLB invalidation.
796  */
797 int dmar_enable_qi(struct intel_iommu *iommu)
798 {
799         u32 cmd, sts;
800         unsigned long flags;
801         struct q_inval *qi;
802
803         if (!ecap_qis(iommu->ecap))
804                 return -ENOENT;
805
806         /*
807          * queued invalidation is already setup and enabled.
808          */
809         if (iommu->qi)
810                 return 0;
811
812         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
813         if (!iommu->qi)
814                 return -ENOMEM;
815
816         qi = iommu->qi;
817
818         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
819         if (!qi->desc) {
820                 kfree(qi);
821                 iommu->qi = 0;
822                 return -ENOMEM;
823         }
824
825         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
826         if (!qi->desc_status) {
827                 free_page((unsigned long) qi->desc);
828                 kfree(qi);
829                 iommu->qi = 0;
830                 return -ENOMEM;
831         }
832
833         qi->free_head = qi->free_tail = 0;
834         qi->free_cnt = QI_LENGTH;
835
836         spin_lock_init(&qi->q_lock);
837
838         spin_lock_irqsave(&iommu->register_lock, flags);
839         /* write zero to the tail reg */
840         writel(0, iommu->reg + DMAR_IQT_REG);
841
842         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
843
844         cmd = iommu->gcmd | DMA_GCMD_QIE;
845         iommu->gcmd |= DMA_GCMD_QIE;
846         writel(cmd, iommu->reg + DMAR_GCMD_REG);
847
848         /* Make sure hardware complete it */
849         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
850         spin_unlock_irqrestore(&iommu->register_lock, flags);
851
852         return 0;
853 }
854
855 /* iommu interrupt handling. Most stuff are MSI-like. */
856
857 enum faulttype {
858         DMA_REMAP,
859         INTR_REMAP,
860         UNKNOWN,
861 };
862
863 static const char *dma_remap_fault_reasons[] =
864 {
865         "Software",
866         "Present bit in root entry is clear",
867         "Present bit in context entry is clear",
868         "Invalid context entry",
869         "Access beyond MGAW",
870         "PTE Write access is not set",
871         "PTE Read access is not set",
872         "Next page table ptr is invalid",
873         "Root table address invalid",
874         "Context table ptr is invalid",
875         "non-zero reserved fields in RTP",
876         "non-zero reserved fields in CTP",
877         "non-zero reserved fields in PTE",
878 };
879
880 static const char *intr_remap_fault_reasons[] =
881 {
882         "Detected reserved fields in the decoded interrupt-remapped request",
883         "Interrupt index exceeded the interrupt-remapping table size",
884         "Present field in the IRTE entry is clear",
885         "Error accessing interrupt-remapping table pointed by IRTA_REG",
886         "Detected reserved fields in the IRTE entry",
887         "Blocked a compatibility format interrupt request",
888         "Blocked an interrupt request due to source-id verification failure",
889 };
890
891 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
892
893 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
894 {
895         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
896                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
897                 *fault_type = INTR_REMAP;
898                 return intr_remap_fault_reasons[fault_reason - 0x20];
899         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
900                 *fault_type = DMA_REMAP;
901                 return dma_remap_fault_reasons[fault_reason];
902         } else {
903                 *fault_type = UNKNOWN;
904                 return "Unknown";
905         }
906 }
907
908 void dmar_msi_unmask(unsigned int irq)
909 {
910         struct intel_iommu *iommu = get_irq_data(irq);
911         unsigned long flag;
912
913         /* unmask it */
914         spin_lock_irqsave(&iommu->register_lock, flag);
915         writel(0, iommu->reg + DMAR_FECTL_REG);
916         /* Read a reg to force flush the post write */
917         readl(iommu->reg + DMAR_FECTL_REG);
918         spin_unlock_irqrestore(&iommu->register_lock, flag);
919 }
920
921 void dmar_msi_mask(unsigned int irq)
922 {
923         unsigned long flag;
924         struct intel_iommu *iommu = get_irq_data(irq);
925
926         /* mask it */
927         spin_lock_irqsave(&iommu->register_lock, flag);
928         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
929         /* Read a reg to force flush the post write */
930         readl(iommu->reg + DMAR_FECTL_REG);
931         spin_unlock_irqrestore(&iommu->register_lock, flag);
932 }
933
934 void dmar_msi_write(int irq, struct msi_msg *msg)
935 {
936         struct intel_iommu *iommu = get_irq_data(irq);
937         unsigned long flag;
938
939         spin_lock_irqsave(&iommu->register_lock, flag);
940         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
941         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
942         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
943         spin_unlock_irqrestore(&iommu->register_lock, flag);
944 }
945
946 void dmar_msi_read(int irq, struct msi_msg *msg)
947 {
948         struct intel_iommu *iommu = get_irq_data(irq);
949         unsigned long flag;
950
951         spin_lock_irqsave(&iommu->register_lock, flag);
952         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
953         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
954         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
955         spin_unlock_irqrestore(&iommu->register_lock, flag);
956 }
957
958 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
959                 u8 fault_reason, u16 source_id, unsigned long long addr)
960 {
961         const char *reason;
962         int fault_type;
963
964         reason = dmar_get_fault_reason(fault_reason, &fault_type);
965
966         if (fault_type == INTR_REMAP)
967                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
968                        "fault index %llx\n"
969                         "INTR-REMAP:[fault reason %02d] %s\n",
970                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
971                         PCI_FUNC(source_id & 0xFF), addr >> 48,
972                         fault_reason, reason);
973         else
974                 printk(KERN_ERR
975                        "DMAR:[%s] Request device [%02x:%02x.%d] "
976                        "fault addr %llx \n"
977                        "DMAR:[fault reason %02d] %s\n",
978                        (type ? "DMA Read" : "DMA Write"),
979                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
980                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
981         return 0;
982 }
983
984 #define PRIMARY_FAULT_REG_LEN (16)
985 irqreturn_t dmar_fault(int irq, void *dev_id)
986 {
987         struct intel_iommu *iommu = dev_id;
988         int reg, fault_index;
989         u32 fault_status;
990         unsigned long flag;
991
992         spin_lock_irqsave(&iommu->register_lock, flag);
993         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
994         if (fault_status)
995                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
996                        fault_status);
997
998         /* TBD: ignore advanced fault log currently */
999         if (!(fault_status & DMA_FSTS_PPF))
1000                 goto clear_rest;
1001
1002         fault_index = dma_fsts_fault_record_index(fault_status);
1003         reg = cap_fault_reg_offset(iommu->cap);
1004         while (1) {
1005                 u8 fault_reason;
1006                 u16 source_id;
1007                 u64 guest_addr;
1008                 int type;
1009                 u32 data;
1010
1011                 /* highest 32 bits */
1012                 data = readl(iommu->reg + reg +
1013                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1014                 if (!(data & DMA_FRCD_F))
1015                         break;
1016
1017                 fault_reason = dma_frcd_fault_reason(data);
1018                 type = dma_frcd_type(data);
1019
1020                 data = readl(iommu->reg + reg +
1021                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1022                 source_id = dma_frcd_source_id(data);
1023
1024                 guest_addr = dmar_readq(iommu->reg + reg +
1025                                 fault_index * PRIMARY_FAULT_REG_LEN);
1026                 guest_addr = dma_frcd_page_addr(guest_addr);
1027                 /* clear the fault */
1028                 writel(DMA_FRCD_F, iommu->reg + reg +
1029                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1030
1031                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1032
1033                 dmar_fault_do_one(iommu, type, fault_reason,
1034                                 source_id, guest_addr);
1035
1036                 fault_index++;
1037                 if (fault_index > cap_num_fault_regs(iommu->cap))
1038                         fault_index = 0;
1039                 spin_lock_irqsave(&iommu->register_lock, flag);
1040         }
1041 clear_rest:
1042         /* clear all the other faults */
1043         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1044         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1045
1046         spin_unlock_irqrestore(&iommu->register_lock, flag);
1047         return IRQ_HANDLED;
1048 }
1049
1050 int dmar_set_interrupt(struct intel_iommu *iommu)
1051 {
1052         int irq, ret;
1053
1054         /*
1055          * Check if the fault interrupt is already initialized.
1056          */
1057         if (iommu->irq)
1058                 return 0;
1059
1060         irq = create_irq();
1061         if (!irq) {
1062                 printk(KERN_ERR "IOMMU: no free vectors\n");
1063                 return -EINVAL;
1064         }
1065
1066         set_irq_data(irq, iommu);
1067         iommu->irq = irq;
1068
1069         ret = arch_setup_dmar_msi(irq);
1070         if (ret) {
1071                 set_irq_data(irq, NULL);
1072                 iommu->irq = 0;
1073                 destroy_irq(irq);
1074                 return 0;
1075         }
1076
1077         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1078         if (ret)
1079                 printk(KERN_ERR "IOMMU: can't request irq\n");
1080         return ret;
1081 }
1082
1083 int __init enable_drhd_fault_handling(void)
1084 {
1085         struct dmar_drhd_unit *drhd;
1086
1087         /*
1088          * Enable fault control interrupt.
1089          */
1090         for_each_drhd_unit(drhd) {
1091                 int ret;
1092                 struct intel_iommu *iommu = drhd->iommu;
1093                 ret = dmar_set_interrupt(iommu);
1094
1095                 if (ret) {
1096                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1097                                " interrupt, ret %d\n",
1098                                (unsigned long long)drhd->reg_base_addr, ret);
1099                         return -1;
1100                 }
1101         }
1102
1103         return 0;
1104 }