intel-iommu: set compatibility format interrupt
[linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
177         if (!dmaru)
178                 return -ENOMEM;
179
180         dmaru->hdr = header;
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184
185         ret = alloc_iommu(dmaru);
186         if (ret) {
187                 kfree(dmaru);
188                 return ret;
189         }
190         dmar_register_drhd_unit(dmaru);
191         return 0;
192 }
193
194 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
195 {
196         struct acpi_dmar_hardware_unit *drhd;
197         int ret = 0;
198
199         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
200
201         if (dmaru->include_all)
202                 return 0;
203
204         ret = dmar_parse_dev_scope((void *)(drhd + 1),
205                                 ((void *)drhd) + drhd->header.length,
206                                 &dmaru->devices_cnt, &dmaru->devices,
207                                 drhd->segment);
208         if (ret) {
209                 list_del(&dmaru->list);
210                 kfree(dmaru);
211         }
212         return ret;
213 }
214
215 #ifdef CONFIG_DMAR
216 LIST_HEAD(dmar_rmrr_units);
217
218 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
219 {
220         list_add(&rmrr->list, &dmar_rmrr_units);
221 }
222
223
224 static int __init
225 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
226 {
227         struct acpi_dmar_reserved_memory *rmrr;
228         struct dmar_rmrr_unit *rmrru;
229
230         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
231         if (!rmrru)
232                 return -ENOMEM;
233
234         rmrru->hdr = header;
235         rmrr = (struct acpi_dmar_reserved_memory *)header;
236         rmrru->base_address = rmrr->base_address;
237         rmrru->end_address = rmrr->end_address;
238
239         dmar_register_rmrr_unit(rmrru);
240         return 0;
241 }
242
243 static int __init
244 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
245 {
246         struct acpi_dmar_reserved_memory *rmrr;
247         int ret;
248
249         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
250         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
251                 ((void *)rmrr) + rmrr->header.length,
252                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
253
254         if (ret || (rmrru->devices_cnt == 0)) {
255                 list_del(&rmrru->list);
256                 kfree(rmrru);
257         }
258         return ret;
259 }
260 #endif
261
262 static void __init
263 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
264 {
265         struct acpi_dmar_hardware_unit *drhd;
266         struct acpi_dmar_reserved_memory *rmrr;
267
268         switch (header->type) {
269         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
270                 drhd = (struct acpi_dmar_hardware_unit *)header;
271                 printk (KERN_INFO PREFIX
272                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
273                         drhd->flags, (unsigned long long)drhd->address);
274                 break;
275         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
276                 rmrr = (struct acpi_dmar_reserved_memory *)header;
277
278                 printk (KERN_INFO PREFIX
279                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
280                         (unsigned long long)rmrr->base_address,
281                         (unsigned long long)rmrr->end_address);
282                 break;
283         }
284 }
285
286 /**
287  * dmar_table_detect - checks to see if the platform supports DMAR devices
288  */
289 static int __init dmar_table_detect(void)
290 {
291         acpi_status status = AE_OK;
292
293         /* if we could find DMAR table, then there are DMAR devices */
294         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
295                                 (struct acpi_table_header **)&dmar_tbl,
296                                 &dmar_tbl_size);
297
298         if (ACPI_SUCCESS(status) && !dmar_tbl) {
299                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
300                 status = AE_NOT_FOUND;
301         }
302
303         return (ACPI_SUCCESS(status) ? 1 : 0);
304 }
305
306 /**
307  * parse_dmar_table - parses the DMA reporting table
308  */
309 static int __init
310 parse_dmar_table(void)
311 {
312         struct acpi_table_dmar *dmar;
313         struct acpi_dmar_header *entry_header;
314         int ret = 0;
315
316         /*
317          * Do it again, earlier dmar_tbl mapping could be mapped with
318          * fixed map.
319          */
320         dmar_table_detect();
321
322         dmar = (struct acpi_table_dmar *)dmar_tbl;
323         if (!dmar)
324                 return -ENODEV;
325
326         if (dmar->width < PAGE_SHIFT - 1) {
327                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
328                 return -EINVAL;
329         }
330
331         printk (KERN_INFO PREFIX "Host address width %d\n",
332                 dmar->width + 1);
333
334         entry_header = (struct acpi_dmar_header *)(dmar + 1);
335         while (((unsigned long)entry_header) <
336                         (((unsigned long)dmar) + dmar_tbl->length)) {
337                 /* Avoid looping forever on bad ACPI tables */
338                 if (entry_header->length == 0) {
339                         printk(KERN_WARNING PREFIX
340                                 "Invalid 0-length structure\n");
341                         ret = -EINVAL;
342                         break;
343                 }
344
345                 dmar_table_print_dmar_entry(entry_header);
346
347                 switch (entry_header->type) {
348                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
349                         ret = dmar_parse_one_drhd(entry_header);
350                         break;
351                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
352 #ifdef CONFIG_DMAR
353                         ret = dmar_parse_one_rmrr(entry_header);
354 #endif
355                         break;
356                 default:
357                         printk(KERN_WARNING PREFIX
358                                 "Unknown DMAR structure type\n");
359                         ret = 0; /* for forward compatibility */
360                         break;
361                 }
362                 if (ret)
363                         break;
364
365                 entry_header = ((void *)entry_header + entry_header->length);
366         }
367         return ret;
368 }
369
370 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
371                           struct pci_dev *dev)
372 {
373         int index;
374
375         while (dev) {
376                 for (index = 0; index < cnt; index++)
377                         if (dev == devices[index])
378                                 return 1;
379
380                 /* Check our parent */
381                 dev = dev->bus->self;
382         }
383
384         return 0;
385 }
386
387 struct dmar_drhd_unit *
388 dmar_find_matched_drhd_unit(struct pci_dev *dev)
389 {
390         struct dmar_drhd_unit *dmaru = NULL;
391         struct acpi_dmar_hardware_unit *drhd;
392
393         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
394                 drhd = container_of(dmaru->hdr,
395                                     struct acpi_dmar_hardware_unit,
396                                     header);
397
398                 if (dmaru->include_all &&
399                     drhd->segment == pci_domain_nr(dev->bus))
400                         return dmaru;
401
402                 if (dmar_pci_device_match(dmaru->devices,
403                                           dmaru->devices_cnt, dev))
404                         return dmaru;
405         }
406
407         return NULL;
408 }
409
410 int __init dmar_dev_scope_init(void)
411 {
412         struct dmar_drhd_unit *drhd, *drhd_n;
413         int ret = -ENODEV;
414
415         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
416                 ret = dmar_parse_dev(drhd);
417                 if (ret)
418                         return ret;
419         }
420
421 #ifdef CONFIG_DMAR
422         {
423                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
424                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
425                         ret = rmrr_parse_dev(rmrr);
426                         if (ret)
427                                 return ret;
428                 }
429         }
430 #endif
431
432         return ret;
433 }
434
435
436 int __init dmar_table_init(void)
437 {
438         static int dmar_table_initialized;
439         int ret;
440
441         if (dmar_table_initialized)
442                 return 0;
443
444         dmar_table_initialized = 1;
445
446         ret = parse_dmar_table();
447         if (ret) {
448                 if (ret != -ENODEV)
449                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
450                 return ret;
451         }
452
453         if (list_empty(&dmar_drhd_units)) {
454                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
455                 return -ENODEV;
456         }
457
458 #ifdef CONFIG_DMAR
459         if (list_empty(&dmar_rmrr_units))
460                 printk(KERN_INFO PREFIX "No RMRR found\n");
461 #endif
462
463 #ifdef CONFIG_INTR_REMAP
464         parse_ioapics_under_ir();
465 #endif
466         return 0;
467 }
468
469 void __init detect_intel_iommu(void)
470 {
471         int ret;
472
473         ret = dmar_table_detect();
474
475         {
476 #ifdef CONFIG_INTR_REMAP
477                 struct acpi_table_dmar *dmar;
478                 /*
479                  * for now we will disable dma-remapping when interrupt
480                  * remapping is enabled.
481                  * When support for queued invalidation for IOTLB invalidation
482                  * is added, we will not need this any more.
483                  */
484                 dmar = (struct acpi_table_dmar *) dmar_tbl;
485                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
486                         printk(KERN_INFO
487                                "Queued invalidation will be enabled to support "
488                                "x2apic and Intr-remapping.\n");
489 #endif
490 #ifdef CONFIG_DMAR
491                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
492                     !dmar_disabled)
493                         iommu_detected = 1;
494 #endif
495         }
496         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
497         dmar_tbl = NULL;
498 }
499
500
501 int alloc_iommu(struct dmar_drhd_unit *drhd)
502 {
503         struct intel_iommu *iommu;
504         int map_size;
505         u32 ver;
506         static int iommu_allocated = 0;
507         int agaw = 0;
508
509         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
510         if (!iommu)
511                 return -ENOMEM;
512
513         iommu->seq_id = iommu_allocated++;
514         sprintf (iommu->name, "dmar%d", iommu->seq_id);
515
516         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
517         if (!iommu->reg) {
518                 printk(KERN_ERR "IOMMU: can't map the region\n");
519                 goto error;
520         }
521         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
522         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
523
524 #ifdef CONFIG_DMAR
525         agaw = iommu_calculate_agaw(iommu);
526         if (agaw < 0) {
527                 printk(KERN_ERR
528                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
529                         iommu->seq_id);
530                 goto error;
531         }
532 #endif
533         iommu->agaw = agaw;
534
535         /* the registers might be more than one page */
536         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
537                 cap_max_fault_reg_offset(iommu->cap));
538         map_size = VTD_PAGE_ALIGN(map_size);
539         if (map_size > VTD_PAGE_SIZE) {
540                 iounmap(iommu->reg);
541                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
542                 if (!iommu->reg) {
543                         printk(KERN_ERR "IOMMU: can't map the region\n");
544                         goto error;
545                 }
546         }
547
548         ver = readl(iommu->reg + DMAR_VER_REG);
549         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
550                 (unsigned long long)drhd->reg_base_addr,
551                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
552                 (unsigned long long)iommu->cap,
553                 (unsigned long long)iommu->ecap);
554
555         spin_lock_init(&iommu->register_lock);
556
557         drhd->iommu = iommu;
558         return 0;
559 error:
560         kfree(iommu);
561         return -1;
562 }
563
564 void free_iommu(struct intel_iommu *iommu)
565 {
566         if (!iommu)
567                 return;
568
569 #ifdef CONFIG_DMAR
570         free_dmar_iommu(iommu);
571 #endif
572
573         if (iommu->reg)
574                 iounmap(iommu->reg);
575         kfree(iommu);
576 }
577
578 /*
579  * Reclaim all the submitted descriptors which have completed its work.
580  */
581 static inline void reclaim_free_desc(struct q_inval *qi)
582 {
583         while (qi->desc_status[qi->free_tail] == QI_DONE) {
584                 qi->desc_status[qi->free_tail] = QI_FREE;
585                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
586                 qi->free_cnt++;
587         }
588 }
589
590 static int qi_check_fault(struct intel_iommu *iommu, int index)
591 {
592         u32 fault;
593         int head;
594         struct q_inval *qi = iommu->qi;
595         int wait_index = (index + 1) % QI_LENGTH;
596
597         fault = readl(iommu->reg + DMAR_FSTS_REG);
598
599         /*
600          * If IQE happens, the head points to the descriptor associated
601          * with the error. No new descriptors are fetched until the IQE
602          * is cleared.
603          */
604         if (fault & DMA_FSTS_IQE) {
605                 head = readl(iommu->reg + DMAR_IQH_REG);
606                 if ((head >> 4) == index) {
607                         memcpy(&qi->desc[index], &qi->desc[wait_index],
608                                         sizeof(struct qi_desc));
609                         __iommu_flush_cache(iommu, &qi->desc[index],
610                                         sizeof(struct qi_desc));
611                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
612                         return -EINVAL;
613                 }
614         }
615
616         return 0;
617 }
618
619 /*
620  * Submit the queued invalidation descriptor to the remapping
621  * hardware unit and wait for its completion.
622  */
623 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
624 {
625         int rc = 0;
626         struct q_inval *qi = iommu->qi;
627         struct qi_desc *hw, wait_desc;
628         int wait_index, index;
629         unsigned long flags;
630
631         if (!qi)
632                 return 0;
633
634         hw = qi->desc;
635
636         spin_lock_irqsave(&qi->q_lock, flags);
637         while (qi->free_cnt < 3) {
638                 spin_unlock_irqrestore(&qi->q_lock, flags);
639                 cpu_relax();
640                 spin_lock_irqsave(&qi->q_lock, flags);
641         }
642
643         index = qi->free_head;
644         wait_index = (index + 1) % QI_LENGTH;
645
646         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
647
648         hw[index] = *desc;
649
650         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
651                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
652         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
653
654         hw[wait_index] = wait_desc;
655
656         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
657         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
658
659         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
660         qi->free_cnt -= 2;
661
662         /*
663          * update the HW tail register indicating the presence of
664          * new descriptors.
665          */
666         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
667
668         while (qi->desc_status[wait_index] != QI_DONE) {
669                 /*
670                  * We will leave the interrupts disabled, to prevent interrupt
671                  * context to queue another cmd while a cmd is already submitted
672                  * and waiting for completion on this cpu. This is to avoid
673                  * a deadlock where the interrupt context can wait indefinitely
674                  * for free slots in the queue.
675                  */
676                 rc = qi_check_fault(iommu, index);
677                 if (rc)
678                         goto out;
679
680                 spin_unlock(&qi->q_lock);
681                 cpu_relax();
682                 spin_lock(&qi->q_lock);
683         }
684 out:
685         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
686
687         reclaim_free_desc(qi);
688         spin_unlock_irqrestore(&qi->q_lock, flags);
689
690         return rc;
691 }
692
693 /*
694  * Flush the global interrupt entry cache.
695  */
696 void qi_global_iec(struct intel_iommu *iommu)
697 {
698         struct qi_desc desc;
699
700         desc.low = QI_IEC_TYPE;
701         desc.high = 0;
702
703         /* should never fail */
704         qi_submit_sync(&desc, iommu);
705 }
706
707 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
708                      u64 type, int non_present_entry_flush)
709 {
710         struct qi_desc desc;
711
712         if (non_present_entry_flush) {
713                 if (!cap_caching_mode(iommu->cap))
714                         return 1;
715                 else
716                         did = 0;
717         }
718
719         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
720                         | QI_CC_GRAN(type) | QI_CC_TYPE;
721         desc.high = 0;
722
723         return qi_submit_sync(&desc, iommu);
724 }
725
726 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
727                    unsigned int size_order, u64 type,
728                    int non_present_entry_flush)
729 {
730         u8 dw = 0, dr = 0;
731
732         struct qi_desc desc;
733         int ih = 0;
734
735         if (non_present_entry_flush) {
736                 if (!cap_caching_mode(iommu->cap))
737                         return 1;
738                 else
739                         did = 0;
740         }
741
742         if (cap_write_drain(iommu->cap))
743                 dw = 1;
744
745         if (cap_read_drain(iommu->cap))
746                 dr = 1;
747
748         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
749                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
750         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
751                 | QI_IOTLB_AM(size_order);
752
753         return qi_submit_sync(&desc, iommu);
754 }
755
756 /*
757  * Disable Queued Invalidation interface.
758  */
759 void dmar_disable_qi(struct intel_iommu *iommu)
760 {
761         unsigned long flags;
762         u32 sts;
763         cycles_t start_time = get_cycles();
764
765         if (!ecap_qis(iommu->ecap))
766                 return;
767
768         spin_lock_irqsave(&iommu->register_lock, flags);
769
770         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
771         if (!(sts & DMA_GSTS_QIES))
772                 goto end;
773
774         /*
775          * Give a chance to HW to complete the pending invalidation requests.
776          */
777         while ((readl(iommu->reg + DMAR_IQT_REG) !=
778                 readl(iommu->reg + DMAR_IQH_REG)) &&
779                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
780                 cpu_relax();
781
782         iommu->gcmd &= ~DMA_GCMD_QIE;
783
784         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
785
786         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
787                       !(sts & DMA_GSTS_QIES), sts);
788 end:
789         spin_unlock_irqrestore(&iommu->register_lock, flags);
790 }
791
792 /*
793  * Enable queued invalidation.
794  */
795 static void __dmar_enable_qi(struct intel_iommu *iommu)
796 {
797         u32 cmd, sts;
798         unsigned long flags;
799         struct q_inval *qi = iommu->qi;
800
801         qi->free_head = qi->free_tail = 0;
802         qi->free_cnt = QI_LENGTH;
803
804         spin_lock_irqsave(&iommu->register_lock, flags);
805
806         /* write zero to the tail reg */
807         writel(0, iommu->reg + DMAR_IQT_REG);
808
809         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
810
811         cmd = iommu->gcmd | DMA_GCMD_QIE;
812         iommu->gcmd |= DMA_GCMD_QIE;
813         writel(cmd, iommu->reg + DMAR_GCMD_REG);
814
815         /* Make sure hardware complete it */
816         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
817
818         spin_unlock_irqrestore(&iommu->register_lock, flags);
819 }
820
821 /*
822  * Enable Queued Invalidation interface. This is a must to support
823  * interrupt-remapping. Also used by DMA-remapping, which replaces
824  * register based IOTLB invalidation.
825  */
826 int dmar_enable_qi(struct intel_iommu *iommu)
827 {
828         struct q_inval *qi;
829
830         if (!ecap_qis(iommu->ecap))
831                 return -ENOENT;
832
833         /*
834          * queued invalidation is already setup and enabled.
835          */
836         if (iommu->qi)
837                 return 0;
838
839         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
840         if (!iommu->qi)
841                 return -ENOMEM;
842
843         qi = iommu->qi;
844
845         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
846         if (!qi->desc) {
847                 kfree(qi);
848                 iommu->qi = 0;
849                 return -ENOMEM;
850         }
851
852         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
853         if (!qi->desc_status) {
854                 free_page((unsigned long) qi->desc);
855                 kfree(qi);
856                 iommu->qi = 0;
857                 return -ENOMEM;
858         }
859
860         qi->free_head = qi->free_tail = 0;
861         qi->free_cnt = QI_LENGTH;
862
863         spin_lock_init(&qi->q_lock);
864
865         __dmar_enable_qi(iommu);
866
867         return 0;
868 }
869
870 /* iommu interrupt handling. Most stuff are MSI-like. */
871
872 enum faulttype {
873         DMA_REMAP,
874         INTR_REMAP,
875         UNKNOWN,
876 };
877
878 static const char *dma_remap_fault_reasons[] =
879 {
880         "Software",
881         "Present bit in root entry is clear",
882         "Present bit in context entry is clear",
883         "Invalid context entry",
884         "Access beyond MGAW",
885         "PTE Write access is not set",
886         "PTE Read access is not set",
887         "Next page table ptr is invalid",
888         "Root table address invalid",
889         "Context table ptr is invalid",
890         "non-zero reserved fields in RTP",
891         "non-zero reserved fields in CTP",
892         "non-zero reserved fields in PTE",
893 };
894
895 static const char *intr_remap_fault_reasons[] =
896 {
897         "Detected reserved fields in the decoded interrupt-remapped request",
898         "Interrupt index exceeded the interrupt-remapping table size",
899         "Present field in the IRTE entry is clear",
900         "Error accessing interrupt-remapping table pointed by IRTA_REG",
901         "Detected reserved fields in the IRTE entry",
902         "Blocked a compatibility format interrupt request",
903         "Blocked an interrupt request due to source-id verification failure",
904 };
905
906 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
907
908 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
909 {
910         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
911                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
912                 *fault_type = INTR_REMAP;
913                 return intr_remap_fault_reasons[fault_reason - 0x20];
914         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
915                 *fault_type = DMA_REMAP;
916                 return dma_remap_fault_reasons[fault_reason];
917         } else {
918                 *fault_type = UNKNOWN;
919                 return "Unknown";
920         }
921 }
922
923 void dmar_msi_unmask(unsigned int irq)
924 {
925         struct intel_iommu *iommu = get_irq_data(irq);
926         unsigned long flag;
927
928         /* unmask it */
929         spin_lock_irqsave(&iommu->register_lock, flag);
930         writel(0, iommu->reg + DMAR_FECTL_REG);
931         /* Read a reg to force flush the post write */
932         readl(iommu->reg + DMAR_FECTL_REG);
933         spin_unlock_irqrestore(&iommu->register_lock, flag);
934 }
935
936 void dmar_msi_mask(unsigned int irq)
937 {
938         unsigned long flag;
939         struct intel_iommu *iommu = get_irq_data(irq);
940
941         /* mask it */
942         spin_lock_irqsave(&iommu->register_lock, flag);
943         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
944         /* Read a reg to force flush the post write */
945         readl(iommu->reg + DMAR_FECTL_REG);
946         spin_unlock_irqrestore(&iommu->register_lock, flag);
947 }
948
949 void dmar_msi_write(int irq, struct msi_msg *msg)
950 {
951         struct intel_iommu *iommu = get_irq_data(irq);
952         unsigned long flag;
953
954         spin_lock_irqsave(&iommu->register_lock, flag);
955         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
956         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
957         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
958         spin_unlock_irqrestore(&iommu->register_lock, flag);
959 }
960
961 void dmar_msi_read(int irq, struct msi_msg *msg)
962 {
963         struct intel_iommu *iommu = get_irq_data(irq);
964         unsigned long flag;
965
966         spin_lock_irqsave(&iommu->register_lock, flag);
967         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
968         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
969         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
970         spin_unlock_irqrestore(&iommu->register_lock, flag);
971 }
972
973 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
974                 u8 fault_reason, u16 source_id, unsigned long long addr)
975 {
976         const char *reason;
977         int fault_type;
978
979         reason = dmar_get_fault_reason(fault_reason, &fault_type);
980
981         if (fault_type == INTR_REMAP)
982                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
983                        "fault index %llx\n"
984                         "INTR-REMAP:[fault reason %02d] %s\n",
985                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
986                         PCI_FUNC(source_id & 0xFF), addr >> 48,
987                         fault_reason, reason);
988         else
989                 printk(KERN_ERR
990                        "DMAR:[%s] Request device [%02x:%02x.%d] "
991                        "fault addr %llx \n"
992                        "DMAR:[fault reason %02d] %s\n",
993                        (type ? "DMA Read" : "DMA Write"),
994                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
995                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
996         return 0;
997 }
998
999 #define PRIMARY_FAULT_REG_LEN (16)
1000 irqreturn_t dmar_fault(int irq, void *dev_id)
1001 {
1002         struct intel_iommu *iommu = dev_id;
1003         int reg, fault_index;
1004         u32 fault_status;
1005         unsigned long flag;
1006
1007         spin_lock_irqsave(&iommu->register_lock, flag);
1008         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1009         if (fault_status)
1010                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1011                        fault_status);
1012
1013         /* TBD: ignore advanced fault log currently */
1014         if (!(fault_status & DMA_FSTS_PPF))
1015                 goto clear_rest;
1016
1017         fault_index = dma_fsts_fault_record_index(fault_status);
1018         reg = cap_fault_reg_offset(iommu->cap);
1019         while (1) {
1020                 u8 fault_reason;
1021                 u16 source_id;
1022                 u64 guest_addr;
1023                 int type;
1024                 u32 data;
1025
1026                 /* highest 32 bits */
1027                 data = readl(iommu->reg + reg +
1028                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1029                 if (!(data & DMA_FRCD_F))
1030                         break;
1031
1032                 fault_reason = dma_frcd_fault_reason(data);
1033                 type = dma_frcd_type(data);
1034
1035                 data = readl(iommu->reg + reg +
1036                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1037                 source_id = dma_frcd_source_id(data);
1038
1039                 guest_addr = dmar_readq(iommu->reg + reg +
1040                                 fault_index * PRIMARY_FAULT_REG_LEN);
1041                 guest_addr = dma_frcd_page_addr(guest_addr);
1042                 /* clear the fault */
1043                 writel(DMA_FRCD_F, iommu->reg + reg +
1044                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1045
1046                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1047
1048                 dmar_fault_do_one(iommu, type, fault_reason,
1049                                 source_id, guest_addr);
1050
1051                 fault_index++;
1052                 if (fault_index > cap_num_fault_regs(iommu->cap))
1053                         fault_index = 0;
1054                 spin_lock_irqsave(&iommu->register_lock, flag);
1055         }
1056 clear_rest:
1057         /* clear all the other faults */
1058         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1059         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1060
1061         spin_unlock_irqrestore(&iommu->register_lock, flag);
1062         return IRQ_HANDLED;
1063 }
1064
1065 int dmar_set_interrupt(struct intel_iommu *iommu)
1066 {
1067         int irq, ret;
1068
1069         /*
1070          * Check if the fault interrupt is already initialized.
1071          */
1072         if (iommu->irq)
1073                 return 0;
1074
1075         irq = create_irq();
1076         if (!irq) {
1077                 printk(KERN_ERR "IOMMU: no free vectors\n");
1078                 return -EINVAL;
1079         }
1080
1081         set_irq_data(irq, iommu);
1082         iommu->irq = irq;
1083
1084         ret = arch_setup_dmar_msi(irq);
1085         if (ret) {
1086                 set_irq_data(irq, NULL);
1087                 iommu->irq = 0;
1088                 destroy_irq(irq);
1089                 return 0;
1090         }
1091
1092         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1093         if (ret)
1094                 printk(KERN_ERR "IOMMU: can't request irq\n");
1095         return ret;
1096 }
1097
1098 int __init enable_drhd_fault_handling(void)
1099 {
1100         struct dmar_drhd_unit *drhd;
1101
1102         /*
1103          * Enable fault control interrupt.
1104          */
1105         for_each_drhd_unit(drhd) {
1106                 int ret;
1107                 struct intel_iommu *iommu = drhd->iommu;
1108                 ret = dmar_set_interrupt(iommu);
1109
1110                 if (ret) {
1111                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1112                                " interrupt, ret %d\n",
1113                                (unsigned long long)drhd->reg_base_addr, ret);
1114                         return -1;
1115                 }
1116         }
1117
1118         return 0;
1119 }
1120
1121 /*
1122  * Re-enable Queued Invalidation interface.
1123  */
1124 int dmar_reenable_qi(struct intel_iommu *iommu)
1125 {
1126         if (!ecap_qis(iommu->ecap))
1127                 return -ENOENT;
1128
1129         if (!iommu->qi)
1130                 return -ENOENT;
1131
1132         /*
1133          * First disable queued invalidation.
1134          */
1135         dmar_disable_qi(iommu);
1136         /*
1137          * Then enable queued invalidation again. Since there is no pending
1138          * invalidation requests now, it's safe to re-enable queued
1139          * invalidation.
1140          */
1141         __dmar_enable_qi(iommu);
1142
1143         return 0;
1144 }