Merge branch 'next' into for-linus
[linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
177         if (!dmaru)
178                 return -ENOMEM;
179
180         dmaru->hdr = header;
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->segment = drhd->segment;
184         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
185
186         ret = alloc_iommu(dmaru);
187         if (ret) {
188                 kfree(dmaru);
189                 return ret;
190         }
191         dmar_register_drhd_unit(dmaru);
192         return 0;
193 }
194
195 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
196 {
197         struct acpi_dmar_hardware_unit *drhd;
198         int ret = 0;
199
200         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
201
202         if (dmaru->include_all)
203                 return 0;
204
205         ret = dmar_parse_dev_scope((void *)(drhd + 1),
206                                 ((void *)drhd) + drhd->header.length,
207                                 &dmaru->devices_cnt, &dmaru->devices,
208                                 drhd->segment);
209         if (ret) {
210                 list_del(&dmaru->list);
211                 kfree(dmaru);
212         }
213         return ret;
214 }
215
216 #ifdef CONFIG_DMAR
217 LIST_HEAD(dmar_rmrr_units);
218
219 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
220 {
221         list_add(&rmrr->list, &dmar_rmrr_units);
222 }
223
224
225 static int __init
226 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
227 {
228         struct acpi_dmar_reserved_memory *rmrr;
229         struct dmar_rmrr_unit *rmrru;
230
231         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
232         if (!rmrru)
233                 return -ENOMEM;
234
235         rmrru->hdr = header;
236         rmrr = (struct acpi_dmar_reserved_memory *)header;
237         rmrru->base_address = rmrr->base_address;
238         rmrru->end_address = rmrr->end_address;
239
240         dmar_register_rmrr_unit(rmrru);
241         return 0;
242 }
243
244 static int __init
245 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
246 {
247         struct acpi_dmar_reserved_memory *rmrr;
248         int ret;
249
250         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
251         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
252                 ((void *)rmrr) + rmrr->header.length,
253                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
254
255         if (ret || (rmrru->devices_cnt == 0)) {
256                 list_del(&rmrru->list);
257                 kfree(rmrru);
258         }
259         return ret;
260 }
261 #endif
262
263 static void __init
264 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
265 {
266         struct acpi_dmar_hardware_unit *drhd;
267         struct acpi_dmar_reserved_memory *rmrr;
268
269         switch (header->type) {
270         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
271                 drhd = (struct acpi_dmar_hardware_unit *)header;
272                 printk (KERN_INFO PREFIX
273                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
274                         drhd->flags, (unsigned long long)drhd->address);
275                 break;
276         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
277                 rmrr = (struct acpi_dmar_reserved_memory *)header;
278
279                 printk (KERN_INFO PREFIX
280                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
281                         (unsigned long long)rmrr->base_address,
282                         (unsigned long long)rmrr->end_address);
283                 break;
284         }
285 }
286
287 /**
288  * dmar_table_detect - checks to see if the platform supports DMAR devices
289  */
290 static int __init dmar_table_detect(void)
291 {
292         acpi_status status = AE_OK;
293
294         /* if we could find DMAR table, then there are DMAR devices */
295         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
296                                 (struct acpi_table_header **)&dmar_tbl,
297                                 &dmar_tbl_size);
298
299         if (ACPI_SUCCESS(status) && !dmar_tbl) {
300                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
301                 status = AE_NOT_FOUND;
302         }
303
304         return (ACPI_SUCCESS(status) ? 1 : 0);
305 }
306
307 /**
308  * parse_dmar_table - parses the DMA reporting table
309  */
310 static int __init
311 parse_dmar_table(void)
312 {
313         struct acpi_table_dmar *dmar;
314         struct acpi_dmar_header *entry_header;
315         int ret = 0;
316
317         /*
318          * Do it again, earlier dmar_tbl mapping could be mapped with
319          * fixed map.
320          */
321         dmar_table_detect();
322
323         dmar = (struct acpi_table_dmar *)dmar_tbl;
324         if (!dmar)
325                 return -ENODEV;
326
327         if (dmar->width < PAGE_SHIFT - 1) {
328                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
329                 return -EINVAL;
330         }
331
332         printk (KERN_INFO PREFIX "Host address width %d\n",
333                 dmar->width + 1);
334
335         entry_header = (struct acpi_dmar_header *)(dmar + 1);
336         while (((unsigned long)entry_header) <
337                         (((unsigned long)dmar) + dmar_tbl->length)) {
338                 /* Avoid looping forever on bad ACPI tables */
339                 if (entry_header->length == 0) {
340                         printk(KERN_WARNING PREFIX
341                                 "Invalid 0-length structure\n");
342                         ret = -EINVAL;
343                         break;
344                 }
345
346                 dmar_table_print_dmar_entry(entry_header);
347
348                 switch (entry_header->type) {
349                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
350                         ret = dmar_parse_one_drhd(entry_header);
351                         break;
352                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
353 #ifdef CONFIG_DMAR
354                         ret = dmar_parse_one_rmrr(entry_header);
355 #endif
356                         break;
357                 default:
358                         printk(KERN_WARNING PREFIX
359                                 "Unknown DMAR structure type\n");
360                         ret = 0; /* for forward compatibility */
361                         break;
362                 }
363                 if (ret)
364                         break;
365
366                 entry_header = ((void *)entry_header + entry_header->length);
367         }
368         return ret;
369 }
370
371 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
372                           struct pci_dev *dev)
373 {
374         int index;
375
376         while (dev) {
377                 for (index = 0; index < cnt; index++)
378                         if (dev == devices[index])
379                                 return 1;
380
381                 /* Check our parent */
382                 dev = dev->bus->self;
383         }
384
385         return 0;
386 }
387
388 struct dmar_drhd_unit *
389 dmar_find_matched_drhd_unit(struct pci_dev *dev)
390 {
391         struct dmar_drhd_unit *dmaru = NULL;
392         struct acpi_dmar_hardware_unit *drhd;
393
394         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
395                 drhd = container_of(dmaru->hdr,
396                                     struct acpi_dmar_hardware_unit,
397                                     header);
398
399                 if (dmaru->include_all &&
400                     drhd->segment == pci_domain_nr(dev->bus))
401                         return dmaru;
402
403                 if (dmar_pci_device_match(dmaru->devices,
404                                           dmaru->devices_cnt, dev))
405                         return dmaru;
406         }
407
408         return NULL;
409 }
410
411 int __init dmar_dev_scope_init(void)
412 {
413         struct dmar_drhd_unit *drhd, *drhd_n;
414         int ret = -ENODEV;
415
416         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
417                 ret = dmar_parse_dev(drhd);
418                 if (ret)
419                         return ret;
420         }
421
422 #ifdef CONFIG_DMAR
423         {
424                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
425                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
426                         ret = rmrr_parse_dev(rmrr);
427                         if (ret)
428                                 return ret;
429                 }
430         }
431 #endif
432
433         return ret;
434 }
435
436
437 int __init dmar_table_init(void)
438 {
439         static int dmar_table_initialized;
440         int ret;
441
442         if (dmar_table_initialized)
443                 return 0;
444
445         dmar_table_initialized = 1;
446
447         ret = parse_dmar_table();
448         if (ret) {
449                 if (ret != -ENODEV)
450                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
451                 return ret;
452         }
453
454         if (list_empty(&dmar_drhd_units)) {
455                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
456                 return -ENODEV;
457         }
458
459 #ifdef CONFIG_DMAR
460         if (list_empty(&dmar_rmrr_units))
461                 printk(KERN_INFO PREFIX "No RMRR found\n");
462 #endif
463
464 #ifdef CONFIG_INTR_REMAP
465         parse_ioapics_under_ir();
466 #endif
467         return 0;
468 }
469
470 void __init detect_intel_iommu(void)
471 {
472         int ret;
473
474         ret = dmar_table_detect();
475
476         {
477 #ifdef CONFIG_INTR_REMAP
478                 struct acpi_table_dmar *dmar;
479                 /*
480                  * for now we will disable dma-remapping when interrupt
481                  * remapping is enabled.
482                  * When support for queued invalidation for IOTLB invalidation
483                  * is added, we will not need this any more.
484                  */
485                 dmar = (struct acpi_table_dmar *) dmar_tbl;
486                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
487                         printk(KERN_INFO
488                                "Queued invalidation will be enabled to support "
489                                "x2apic and Intr-remapping.\n");
490 #endif
491 #ifdef CONFIG_DMAR
492                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
493                     !dmar_disabled)
494                         iommu_detected = 1;
495 #endif
496         }
497         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
498         dmar_tbl = NULL;
499 }
500
501
502 int alloc_iommu(struct dmar_drhd_unit *drhd)
503 {
504         struct intel_iommu *iommu;
505         int map_size;
506         u32 ver;
507         static int iommu_allocated = 0;
508         int agaw = 0;
509
510         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
511         if (!iommu)
512                 return -ENOMEM;
513
514         iommu->seq_id = iommu_allocated++;
515         sprintf (iommu->name, "dmar%d", iommu->seq_id);
516
517         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
518         if (!iommu->reg) {
519                 printk(KERN_ERR "IOMMU: can't map the region\n");
520                 goto error;
521         }
522         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
523         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
524
525 #ifdef CONFIG_DMAR
526         agaw = iommu_calculate_agaw(iommu);
527         if (agaw < 0) {
528                 printk(KERN_ERR
529                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
530                         iommu->seq_id);
531                 goto error;
532         }
533 #endif
534         iommu->agaw = agaw;
535
536         /* the registers might be more than one page */
537         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
538                 cap_max_fault_reg_offset(iommu->cap));
539         map_size = VTD_PAGE_ALIGN(map_size);
540         if (map_size > VTD_PAGE_SIZE) {
541                 iounmap(iommu->reg);
542                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
543                 if (!iommu->reg) {
544                         printk(KERN_ERR "IOMMU: can't map the region\n");
545                         goto error;
546                 }
547         }
548
549         ver = readl(iommu->reg + DMAR_VER_REG);
550         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
551                 (unsigned long long)drhd->reg_base_addr,
552                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
553                 (unsigned long long)iommu->cap,
554                 (unsigned long long)iommu->ecap);
555
556         spin_lock_init(&iommu->register_lock);
557
558         drhd->iommu = iommu;
559         return 0;
560 error:
561         kfree(iommu);
562         return -1;
563 }
564
565 void free_iommu(struct intel_iommu *iommu)
566 {
567         if (!iommu)
568                 return;
569
570 #ifdef CONFIG_DMAR
571         free_dmar_iommu(iommu);
572 #endif
573
574         if (iommu->reg)
575                 iounmap(iommu->reg);
576         kfree(iommu);
577 }
578
579 /*
580  * Reclaim all the submitted descriptors which have completed its work.
581  */
582 static inline void reclaim_free_desc(struct q_inval *qi)
583 {
584         while (qi->desc_status[qi->free_tail] == QI_DONE) {
585                 qi->desc_status[qi->free_tail] = QI_FREE;
586                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
587                 qi->free_cnt++;
588         }
589 }
590
591 static int qi_check_fault(struct intel_iommu *iommu, int index)
592 {
593         u32 fault;
594         int head;
595         struct q_inval *qi = iommu->qi;
596         int wait_index = (index + 1) % QI_LENGTH;
597
598         fault = readl(iommu->reg + DMAR_FSTS_REG);
599
600         /*
601          * If IQE happens, the head points to the descriptor associated
602          * with the error. No new descriptors are fetched until the IQE
603          * is cleared.
604          */
605         if (fault & DMA_FSTS_IQE) {
606                 head = readl(iommu->reg + DMAR_IQH_REG);
607                 if ((head >> 4) == index) {
608                         memcpy(&qi->desc[index], &qi->desc[wait_index],
609                                         sizeof(struct qi_desc));
610                         __iommu_flush_cache(iommu, &qi->desc[index],
611                                         sizeof(struct qi_desc));
612                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
613                         return -EINVAL;
614                 }
615         }
616
617         return 0;
618 }
619
620 /*
621  * Submit the queued invalidation descriptor to the remapping
622  * hardware unit and wait for its completion.
623  */
624 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
625 {
626         int rc = 0;
627         struct q_inval *qi = iommu->qi;
628         struct qi_desc *hw, wait_desc;
629         int wait_index, index;
630         unsigned long flags;
631
632         if (!qi)
633                 return 0;
634
635         hw = qi->desc;
636
637         spin_lock_irqsave(&qi->q_lock, flags);
638         while (qi->free_cnt < 3) {
639                 spin_unlock_irqrestore(&qi->q_lock, flags);
640                 cpu_relax();
641                 spin_lock_irqsave(&qi->q_lock, flags);
642         }
643
644         index = qi->free_head;
645         wait_index = (index + 1) % QI_LENGTH;
646
647         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
648
649         hw[index] = *desc;
650
651         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
652                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
653         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
654
655         hw[wait_index] = wait_desc;
656
657         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
658         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
659
660         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
661         qi->free_cnt -= 2;
662
663         /*
664          * update the HW tail register indicating the presence of
665          * new descriptors.
666          */
667         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
668
669         while (qi->desc_status[wait_index] != QI_DONE) {
670                 /*
671                  * We will leave the interrupts disabled, to prevent interrupt
672                  * context to queue another cmd while a cmd is already submitted
673                  * and waiting for completion on this cpu. This is to avoid
674                  * a deadlock where the interrupt context can wait indefinitely
675                  * for free slots in the queue.
676                  */
677                 rc = qi_check_fault(iommu, index);
678                 if (rc)
679                         goto out;
680
681                 spin_unlock(&qi->q_lock);
682                 cpu_relax();
683                 spin_lock(&qi->q_lock);
684         }
685 out:
686         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
687
688         reclaim_free_desc(qi);
689         spin_unlock_irqrestore(&qi->q_lock, flags);
690
691         return rc;
692 }
693
694 /*
695  * Flush the global interrupt entry cache.
696  */
697 void qi_global_iec(struct intel_iommu *iommu)
698 {
699         struct qi_desc desc;
700
701         desc.low = QI_IEC_TYPE;
702         desc.high = 0;
703
704         /* should never fail */
705         qi_submit_sync(&desc, iommu);
706 }
707
708 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
709                      u64 type, int non_present_entry_flush)
710 {
711         struct qi_desc desc;
712
713         if (non_present_entry_flush) {
714                 if (!cap_caching_mode(iommu->cap))
715                         return 1;
716                 else
717                         did = 0;
718         }
719
720         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
721                         | QI_CC_GRAN(type) | QI_CC_TYPE;
722         desc.high = 0;
723
724         return qi_submit_sync(&desc, iommu);
725 }
726
727 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
728                    unsigned int size_order, u64 type,
729                    int non_present_entry_flush)
730 {
731         u8 dw = 0, dr = 0;
732
733         struct qi_desc desc;
734         int ih = 0;
735
736         if (non_present_entry_flush) {
737                 if (!cap_caching_mode(iommu->cap))
738                         return 1;
739                 else
740                         did = 0;
741         }
742
743         if (cap_write_drain(iommu->cap))
744                 dw = 1;
745
746         if (cap_read_drain(iommu->cap))
747                 dr = 1;
748
749         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
750                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
751         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
752                 | QI_IOTLB_AM(size_order);
753
754         return qi_submit_sync(&desc, iommu);
755 }
756
757 /*
758  * Disable Queued Invalidation interface.
759  */
760 void dmar_disable_qi(struct intel_iommu *iommu)
761 {
762         unsigned long flags;
763         u32 sts;
764         cycles_t start_time = get_cycles();
765
766         if (!ecap_qis(iommu->ecap))
767                 return;
768
769         spin_lock_irqsave(&iommu->register_lock, flags);
770
771         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
772         if (!(sts & DMA_GSTS_QIES))
773                 goto end;
774
775         /*
776          * Give a chance to HW to complete the pending invalidation requests.
777          */
778         while ((readl(iommu->reg + DMAR_IQT_REG) !=
779                 readl(iommu->reg + DMAR_IQH_REG)) &&
780                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
781                 cpu_relax();
782
783         iommu->gcmd &= ~DMA_GCMD_QIE;
784
785         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
786
787         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
788                       !(sts & DMA_GSTS_QIES), sts);
789 end:
790         spin_unlock_irqrestore(&iommu->register_lock, flags);
791 }
792
793 /*
794  * Enable queued invalidation.
795  */
796 static void __dmar_enable_qi(struct intel_iommu *iommu)
797 {
798         u32 cmd, sts;
799         unsigned long flags;
800         struct q_inval *qi = iommu->qi;
801
802         qi->free_head = qi->free_tail = 0;
803         qi->free_cnt = QI_LENGTH;
804
805         spin_lock_irqsave(&iommu->register_lock, flags);
806
807         /* write zero to the tail reg */
808         writel(0, iommu->reg + DMAR_IQT_REG);
809
810         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
811
812         cmd = iommu->gcmd | DMA_GCMD_QIE;
813         iommu->gcmd |= DMA_GCMD_QIE;
814         writel(cmd, iommu->reg + DMAR_GCMD_REG);
815
816         /* Make sure hardware complete it */
817         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
818
819         spin_unlock_irqrestore(&iommu->register_lock, flags);
820 }
821
822 /*
823  * Enable Queued Invalidation interface. This is a must to support
824  * interrupt-remapping. Also used by DMA-remapping, which replaces
825  * register based IOTLB invalidation.
826  */
827 int dmar_enable_qi(struct intel_iommu *iommu)
828 {
829         struct q_inval *qi;
830
831         if (!ecap_qis(iommu->ecap))
832                 return -ENOENT;
833
834         /*
835          * queued invalidation is already setup and enabled.
836          */
837         if (iommu->qi)
838                 return 0;
839
840         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
841         if (!iommu->qi)
842                 return -ENOMEM;
843
844         qi = iommu->qi;
845
846         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
847         if (!qi->desc) {
848                 kfree(qi);
849                 iommu->qi = 0;
850                 return -ENOMEM;
851         }
852
853         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
854         if (!qi->desc_status) {
855                 free_page((unsigned long) qi->desc);
856                 kfree(qi);
857                 iommu->qi = 0;
858                 return -ENOMEM;
859         }
860
861         qi->free_head = qi->free_tail = 0;
862         qi->free_cnt = QI_LENGTH;
863
864         spin_lock_init(&qi->q_lock);
865
866         __dmar_enable_qi(iommu);
867
868         return 0;
869 }
870
871 /* iommu interrupt handling. Most stuff are MSI-like. */
872
873 enum faulttype {
874         DMA_REMAP,
875         INTR_REMAP,
876         UNKNOWN,
877 };
878
879 static const char *dma_remap_fault_reasons[] =
880 {
881         "Software",
882         "Present bit in root entry is clear",
883         "Present bit in context entry is clear",
884         "Invalid context entry",
885         "Access beyond MGAW",
886         "PTE Write access is not set",
887         "PTE Read access is not set",
888         "Next page table ptr is invalid",
889         "Root table address invalid",
890         "Context table ptr is invalid",
891         "non-zero reserved fields in RTP",
892         "non-zero reserved fields in CTP",
893         "non-zero reserved fields in PTE",
894 };
895
896 static const char *intr_remap_fault_reasons[] =
897 {
898         "Detected reserved fields in the decoded interrupt-remapped request",
899         "Interrupt index exceeded the interrupt-remapping table size",
900         "Present field in the IRTE entry is clear",
901         "Error accessing interrupt-remapping table pointed by IRTA_REG",
902         "Detected reserved fields in the IRTE entry",
903         "Blocked a compatibility format interrupt request",
904         "Blocked an interrupt request due to source-id verification failure",
905 };
906
907 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
908
909 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
910 {
911         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
912                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
913                 *fault_type = INTR_REMAP;
914                 return intr_remap_fault_reasons[fault_reason - 0x20];
915         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
916                 *fault_type = DMA_REMAP;
917                 return dma_remap_fault_reasons[fault_reason];
918         } else {
919                 *fault_type = UNKNOWN;
920                 return "Unknown";
921         }
922 }
923
924 void dmar_msi_unmask(unsigned int irq)
925 {
926         struct intel_iommu *iommu = get_irq_data(irq);
927         unsigned long flag;
928
929         /* unmask it */
930         spin_lock_irqsave(&iommu->register_lock, flag);
931         writel(0, iommu->reg + DMAR_FECTL_REG);
932         /* Read a reg to force flush the post write */
933         readl(iommu->reg + DMAR_FECTL_REG);
934         spin_unlock_irqrestore(&iommu->register_lock, flag);
935 }
936
937 void dmar_msi_mask(unsigned int irq)
938 {
939         unsigned long flag;
940         struct intel_iommu *iommu = get_irq_data(irq);
941
942         /* mask it */
943         spin_lock_irqsave(&iommu->register_lock, flag);
944         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
945         /* Read a reg to force flush the post write */
946         readl(iommu->reg + DMAR_FECTL_REG);
947         spin_unlock_irqrestore(&iommu->register_lock, flag);
948 }
949
950 void dmar_msi_write(int irq, struct msi_msg *msg)
951 {
952         struct intel_iommu *iommu = get_irq_data(irq);
953         unsigned long flag;
954
955         spin_lock_irqsave(&iommu->register_lock, flag);
956         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
957         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
958         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
959         spin_unlock_irqrestore(&iommu->register_lock, flag);
960 }
961
962 void dmar_msi_read(int irq, struct msi_msg *msg)
963 {
964         struct intel_iommu *iommu = get_irq_data(irq);
965         unsigned long flag;
966
967         spin_lock_irqsave(&iommu->register_lock, flag);
968         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
969         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
970         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
971         spin_unlock_irqrestore(&iommu->register_lock, flag);
972 }
973
974 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
975                 u8 fault_reason, u16 source_id, unsigned long long addr)
976 {
977         const char *reason;
978         int fault_type;
979
980         reason = dmar_get_fault_reason(fault_reason, &fault_type);
981
982         if (fault_type == INTR_REMAP)
983                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
984                        "fault index %llx\n"
985                         "INTR-REMAP:[fault reason %02d] %s\n",
986                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
987                         PCI_FUNC(source_id & 0xFF), addr >> 48,
988                         fault_reason, reason);
989         else
990                 printk(KERN_ERR
991                        "DMAR:[%s] Request device [%02x:%02x.%d] "
992                        "fault addr %llx \n"
993                        "DMAR:[fault reason %02d] %s\n",
994                        (type ? "DMA Read" : "DMA Write"),
995                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
996                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
997         return 0;
998 }
999
1000 #define PRIMARY_FAULT_REG_LEN (16)
1001 irqreturn_t dmar_fault(int irq, void *dev_id)
1002 {
1003         struct intel_iommu *iommu = dev_id;
1004         int reg, fault_index;
1005         u32 fault_status;
1006         unsigned long flag;
1007
1008         spin_lock_irqsave(&iommu->register_lock, flag);
1009         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1010         if (fault_status)
1011                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1012                        fault_status);
1013
1014         /* TBD: ignore advanced fault log currently */
1015         if (!(fault_status & DMA_FSTS_PPF))
1016                 goto clear_rest;
1017
1018         fault_index = dma_fsts_fault_record_index(fault_status);
1019         reg = cap_fault_reg_offset(iommu->cap);
1020         while (1) {
1021                 u8 fault_reason;
1022                 u16 source_id;
1023                 u64 guest_addr;
1024                 int type;
1025                 u32 data;
1026
1027                 /* highest 32 bits */
1028                 data = readl(iommu->reg + reg +
1029                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1030                 if (!(data & DMA_FRCD_F))
1031                         break;
1032
1033                 fault_reason = dma_frcd_fault_reason(data);
1034                 type = dma_frcd_type(data);
1035
1036                 data = readl(iommu->reg + reg +
1037                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1038                 source_id = dma_frcd_source_id(data);
1039
1040                 guest_addr = dmar_readq(iommu->reg + reg +
1041                                 fault_index * PRIMARY_FAULT_REG_LEN);
1042                 guest_addr = dma_frcd_page_addr(guest_addr);
1043                 /* clear the fault */
1044                 writel(DMA_FRCD_F, iommu->reg + reg +
1045                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1046
1047                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1048
1049                 dmar_fault_do_one(iommu, type, fault_reason,
1050                                 source_id, guest_addr);
1051
1052                 fault_index++;
1053                 if (fault_index > cap_num_fault_regs(iommu->cap))
1054                         fault_index = 0;
1055                 spin_lock_irqsave(&iommu->register_lock, flag);
1056         }
1057 clear_rest:
1058         /* clear all the other faults */
1059         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1060         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1061
1062         spin_unlock_irqrestore(&iommu->register_lock, flag);
1063         return IRQ_HANDLED;
1064 }
1065
1066 int dmar_set_interrupt(struct intel_iommu *iommu)
1067 {
1068         int irq, ret;
1069
1070         /*
1071          * Check if the fault interrupt is already initialized.
1072          */
1073         if (iommu->irq)
1074                 return 0;
1075
1076         irq = create_irq();
1077         if (!irq) {
1078                 printk(KERN_ERR "IOMMU: no free vectors\n");
1079                 return -EINVAL;
1080         }
1081
1082         set_irq_data(irq, iommu);
1083         iommu->irq = irq;
1084
1085         ret = arch_setup_dmar_msi(irq);
1086         if (ret) {
1087                 set_irq_data(irq, NULL);
1088                 iommu->irq = 0;
1089                 destroy_irq(irq);
1090                 return 0;
1091         }
1092
1093         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1094         if (ret)
1095                 printk(KERN_ERR "IOMMU: can't request irq\n");
1096         return ret;
1097 }
1098
1099 int __init enable_drhd_fault_handling(void)
1100 {
1101         struct dmar_drhd_unit *drhd;
1102
1103         /*
1104          * Enable fault control interrupt.
1105          */
1106         for_each_drhd_unit(drhd) {
1107                 int ret;
1108                 struct intel_iommu *iommu = drhd->iommu;
1109                 ret = dmar_set_interrupt(iommu);
1110
1111                 if (ret) {
1112                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1113                                " interrupt, ret %d\n",
1114                                (unsigned long long)drhd->reg_base_addr, ret);
1115                         return -1;
1116                 }
1117         }
1118
1119         return 0;
1120 }
1121
1122 /*
1123  * Re-enable Queued Invalidation interface.
1124  */
1125 int dmar_reenable_qi(struct intel_iommu *iommu)
1126 {
1127         if (!ecap_qis(iommu->ecap))
1128                 return -ENOENT;
1129
1130         if (!iommu->qi)
1131                 return -ENOENT;
1132
1133         /*
1134          * First disable queued invalidation.
1135          */
1136         dmar_disable_qi(iommu);
1137         /*
1138          * Then enable queued invalidation again. Since there is no pending
1139          * invalidation requests now, it's safe to re-enable queued
1140          * invalidation.
1141          */
1142         __dmar_enable_qi(iommu);
1143
1144         return 0;
1145 }