ixgbe: When in DCB mode with PFC enabled, show LFC is disabled
[linux-2.6] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270 #endif
271
272 static void __init
273 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
274 {
275         struct acpi_dmar_hardware_unit *drhd;
276         struct acpi_dmar_reserved_memory *rmrr;
277
278         switch (header->type) {
279         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
280                 drhd = (struct acpi_dmar_hardware_unit *)header;
281                 printk (KERN_INFO PREFIX
282                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
283                         drhd->flags, (unsigned long long)drhd->address);
284                 break;
285         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
286                 rmrr = (struct acpi_dmar_reserved_memory *)header;
287
288                 printk (KERN_INFO PREFIX
289                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
290                         (unsigned long long)rmrr->base_address,
291                         (unsigned long long)rmrr->end_address);
292                 break;
293         }
294 }
295
296 /**
297  * dmar_table_detect - checks to see if the platform supports DMAR devices
298  */
299 static int __init dmar_table_detect(void)
300 {
301         acpi_status status = AE_OK;
302
303         /* if we could find DMAR table, then there are DMAR devices */
304         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
305                                 (struct acpi_table_header **)&dmar_tbl,
306                                 &dmar_tbl_size);
307
308         if (ACPI_SUCCESS(status) && !dmar_tbl) {
309                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
310                 status = AE_NOT_FOUND;
311         }
312
313         return (ACPI_SUCCESS(status) ? 1 : 0);
314 }
315
316 /**
317  * parse_dmar_table - parses the DMA reporting table
318  */
319 static int __init
320 parse_dmar_table(void)
321 {
322         struct acpi_table_dmar *dmar;
323         struct acpi_dmar_header *entry_header;
324         int ret = 0;
325
326         /*
327          * Do it again, earlier dmar_tbl mapping could be mapped with
328          * fixed map.
329          */
330         dmar_table_detect();
331
332         dmar = (struct acpi_table_dmar *)dmar_tbl;
333         if (!dmar)
334                 return -ENODEV;
335
336         if (dmar->width < PAGE_SHIFT - 1) {
337                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
338                 return -EINVAL;
339         }
340
341         printk (KERN_INFO PREFIX "Host address width %d\n",
342                 dmar->width + 1);
343
344         entry_header = (struct acpi_dmar_header *)(dmar + 1);
345         while (((unsigned long)entry_header) <
346                         (((unsigned long)dmar) + dmar_tbl->length)) {
347                 /* Avoid looping forever on bad ACPI tables */
348                 if (entry_header->length == 0) {
349                         printk(KERN_WARNING PREFIX
350                                 "Invalid 0-length structure\n");
351                         ret = -EINVAL;
352                         break;
353                 }
354
355                 dmar_table_print_dmar_entry(entry_header);
356
357                 switch (entry_header->type) {
358                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                         ret = dmar_parse_one_drhd(entry_header);
360                         break;
361                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
362 #ifdef CONFIG_DMAR
363                         ret = dmar_parse_one_rmrr(entry_header);
364 #endif
365                         break;
366                 default:
367                         printk(KERN_WARNING PREFIX
368                                 "Unknown DMAR structure type\n");
369                         ret = 0; /* for forward compatibility */
370                         break;
371                 }
372                 if (ret)
373                         break;
374
375                 entry_header = ((void *)entry_header + entry_header->length);
376         }
377         return ret;
378 }
379
380 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
381                           struct pci_dev *dev)
382 {
383         int index;
384
385         while (dev) {
386                 for (index = 0; index < cnt; index++)
387                         if (dev == devices[index])
388                                 return 1;
389
390                 /* Check our parent */
391                 dev = dev->bus->self;
392         }
393
394         return 0;
395 }
396
397 struct dmar_drhd_unit *
398 dmar_find_matched_drhd_unit(struct pci_dev *dev)
399 {
400         struct dmar_drhd_unit *dmaru = NULL;
401         struct acpi_dmar_hardware_unit *drhd;
402
403         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
404                 drhd = container_of(dmaru->hdr,
405                                     struct acpi_dmar_hardware_unit,
406                                     header);
407
408                 if (dmaru->include_all &&
409                     drhd->segment == pci_domain_nr(dev->bus))
410                         return dmaru;
411
412                 if (dmar_pci_device_match(dmaru->devices,
413                                           dmaru->devices_cnt, dev))
414                         return dmaru;
415         }
416
417         return NULL;
418 }
419
420 int __init dmar_dev_scope_init(void)
421 {
422         struct dmar_drhd_unit *drhd, *drhd_n;
423         int ret = -ENODEV;
424
425         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
426                 ret = dmar_parse_dev(drhd);
427                 if (ret)
428                         return ret;
429         }
430
431 #ifdef CONFIG_DMAR
432         {
433                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
434                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
435                         ret = rmrr_parse_dev(rmrr);
436                         if (ret)
437                                 return ret;
438                 }
439         }
440 #endif
441
442         return ret;
443 }
444
445
446 int __init dmar_table_init(void)
447 {
448         static int dmar_table_initialized;
449         int ret;
450
451         if (dmar_table_initialized)
452                 return 0;
453
454         dmar_table_initialized = 1;
455
456         ret = parse_dmar_table();
457         if (ret) {
458                 if (ret != -ENODEV)
459                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
460                 return ret;
461         }
462
463         if (list_empty(&dmar_drhd_units)) {
464                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
465                 return -ENODEV;
466         }
467
468 #ifdef CONFIG_DMAR
469         if (list_empty(&dmar_rmrr_units))
470                 printk(KERN_INFO PREFIX "No RMRR found\n");
471 #endif
472
473 #ifdef CONFIG_INTR_REMAP
474         parse_ioapics_under_ir();
475 #endif
476         return 0;
477 }
478
479 void __init detect_intel_iommu(void)
480 {
481         int ret;
482
483         ret = dmar_table_detect();
484
485         {
486 #ifdef CONFIG_INTR_REMAP
487                 struct acpi_table_dmar *dmar;
488                 /*
489                  * for now we will disable dma-remapping when interrupt
490                  * remapping is enabled.
491                  * When support for queued invalidation for IOTLB invalidation
492                  * is added, we will not need this any more.
493                  */
494                 dmar = (struct acpi_table_dmar *) dmar_tbl;
495                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
496                         printk(KERN_INFO
497                                "Queued invalidation will be enabled to support "
498                                "x2apic and Intr-remapping.\n");
499 #endif
500 #ifdef CONFIG_DMAR
501                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
502                     !dmar_disabled)
503                         iommu_detected = 1;
504 #endif
505         }
506         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
507         dmar_tbl = NULL;
508 }
509
510
511 int alloc_iommu(struct dmar_drhd_unit *drhd)
512 {
513         struct intel_iommu *iommu;
514         int map_size;
515         u32 ver;
516         static int iommu_allocated = 0;
517         int agaw = 0;
518
519         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
520         if (!iommu)
521                 return -ENOMEM;
522
523         iommu->seq_id = iommu_allocated++;
524         sprintf (iommu->name, "dmar%d", iommu->seq_id);
525
526         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
527         if (!iommu->reg) {
528                 printk(KERN_ERR "IOMMU: can't map the region\n");
529                 goto error;
530         }
531         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
532         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
533
534 #ifdef CONFIG_DMAR
535         agaw = iommu_calculate_agaw(iommu);
536         if (agaw < 0) {
537                 printk(KERN_ERR
538                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
539                         iommu->seq_id);
540                 goto error;
541         }
542 #endif
543         iommu->agaw = agaw;
544
545         /* the registers might be more than one page */
546         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
547                 cap_max_fault_reg_offset(iommu->cap));
548         map_size = VTD_PAGE_ALIGN(map_size);
549         if (map_size > VTD_PAGE_SIZE) {
550                 iounmap(iommu->reg);
551                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
552                 if (!iommu->reg) {
553                         printk(KERN_ERR "IOMMU: can't map the region\n");
554                         goto error;
555                 }
556         }
557
558         ver = readl(iommu->reg + DMAR_VER_REG);
559         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
560                 (unsigned long long)drhd->reg_base_addr,
561                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
562                 (unsigned long long)iommu->cap,
563                 (unsigned long long)iommu->ecap);
564
565         spin_lock_init(&iommu->register_lock);
566
567         drhd->iommu = iommu;
568         return 0;
569 error:
570         kfree(iommu);
571         return -1;
572 }
573
574 void free_iommu(struct intel_iommu *iommu)
575 {
576         if (!iommu)
577                 return;
578
579 #ifdef CONFIG_DMAR
580         free_dmar_iommu(iommu);
581 #endif
582
583         if (iommu->reg)
584                 iounmap(iommu->reg);
585         kfree(iommu);
586 }
587
588 /*
589  * Reclaim all the submitted descriptors which have completed its work.
590  */
591 static inline void reclaim_free_desc(struct q_inval *qi)
592 {
593         while (qi->desc_status[qi->free_tail] == QI_DONE) {
594                 qi->desc_status[qi->free_tail] = QI_FREE;
595                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
596                 qi->free_cnt++;
597         }
598 }
599
600 static int qi_check_fault(struct intel_iommu *iommu, int index)
601 {
602         u32 fault;
603         int head;
604         struct q_inval *qi = iommu->qi;
605         int wait_index = (index + 1) % QI_LENGTH;
606
607         fault = readl(iommu->reg + DMAR_FSTS_REG);
608
609         /*
610          * If IQE happens, the head points to the descriptor associated
611          * with the error. No new descriptors are fetched until the IQE
612          * is cleared.
613          */
614         if (fault & DMA_FSTS_IQE) {
615                 head = readl(iommu->reg + DMAR_IQH_REG);
616                 if ((head >> 4) == index) {
617                         memcpy(&qi->desc[index], &qi->desc[wait_index],
618                                         sizeof(struct qi_desc));
619                         __iommu_flush_cache(iommu, &qi->desc[index],
620                                         sizeof(struct qi_desc));
621                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
622                         return -EINVAL;
623                 }
624         }
625
626         return 0;
627 }
628
629 /*
630  * Submit the queued invalidation descriptor to the remapping
631  * hardware unit and wait for its completion.
632  */
633 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
634 {
635         int rc = 0;
636         struct q_inval *qi = iommu->qi;
637         struct qi_desc *hw, wait_desc;
638         int wait_index, index;
639         unsigned long flags;
640
641         if (!qi)
642                 return 0;
643
644         hw = qi->desc;
645
646         spin_lock_irqsave(&qi->q_lock, flags);
647         while (qi->free_cnt < 3) {
648                 spin_unlock_irqrestore(&qi->q_lock, flags);
649                 cpu_relax();
650                 spin_lock_irqsave(&qi->q_lock, flags);
651         }
652
653         index = qi->free_head;
654         wait_index = (index + 1) % QI_LENGTH;
655
656         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
657
658         hw[index] = *desc;
659
660         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
661                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
662         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
663
664         hw[wait_index] = wait_desc;
665
666         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
667         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
668
669         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
670         qi->free_cnt -= 2;
671
672         /*
673          * update the HW tail register indicating the presence of
674          * new descriptors.
675          */
676         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
677
678         while (qi->desc_status[wait_index] != QI_DONE) {
679                 /*
680                  * We will leave the interrupts disabled, to prevent interrupt
681                  * context to queue another cmd while a cmd is already submitted
682                  * and waiting for completion on this cpu. This is to avoid
683                  * a deadlock where the interrupt context can wait indefinitely
684                  * for free slots in the queue.
685                  */
686                 rc = qi_check_fault(iommu, index);
687                 if (rc)
688                         goto out;
689
690                 spin_unlock(&qi->q_lock);
691                 cpu_relax();
692                 spin_lock(&qi->q_lock);
693         }
694 out:
695         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
696
697         reclaim_free_desc(qi);
698         spin_unlock_irqrestore(&qi->q_lock, flags);
699
700         return rc;
701 }
702
703 /*
704  * Flush the global interrupt entry cache.
705  */
706 void qi_global_iec(struct intel_iommu *iommu)
707 {
708         struct qi_desc desc;
709
710         desc.low = QI_IEC_TYPE;
711         desc.high = 0;
712
713         /* should never fail */
714         qi_submit_sync(&desc, iommu);
715 }
716
717 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
718                      u64 type, int non_present_entry_flush)
719 {
720         struct qi_desc desc;
721
722         if (non_present_entry_flush) {
723                 if (!cap_caching_mode(iommu->cap))
724                         return 1;
725                 else
726                         did = 0;
727         }
728
729         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
730                         | QI_CC_GRAN(type) | QI_CC_TYPE;
731         desc.high = 0;
732
733         return qi_submit_sync(&desc, iommu);
734 }
735
736 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
737                    unsigned int size_order, u64 type,
738                    int non_present_entry_flush)
739 {
740         u8 dw = 0, dr = 0;
741
742         struct qi_desc desc;
743         int ih = 0;
744
745         if (non_present_entry_flush) {
746                 if (!cap_caching_mode(iommu->cap))
747                         return 1;
748                 else
749                         did = 0;
750         }
751
752         if (cap_write_drain(iommu->cap))
753                 dw = 1;
754
755         if (cap_read_drain(iommu->cap))
756                 dr = 1;
757
758         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
759                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
760         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
761                 | QI_IOTLB_AM(size_order);
762
763         return qi_submit_sync(&desc, iommu);
764 }
765
766 /*
767  * Disable Queued Invalidation interface.
768  */
769 void dmar_disable_qi(struct intel_iommu *iommu)
770 {
771         unsigned long flags;
772         u32 sts;
773         cycles_t start_time = get_cycles();
774
775         if (!ecap_qis(iommu->ecap))
776                 return;
777
778         spin_lock_irqsave(&iommu->register_lock, flags);
779
780         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
781         if (!(sts & DMA_GSTS_QIES))
782                 goto end;
783
784         /*
785          * Give a chance to HW to complete the pending invalidation requests.
786          */
787         while ((readl(iommu->reg + DMAR_IQT_REG) !=
788                 readl(iommu->reg + DMAR_IQH_REG)) &&
789                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
790                 cpu_relax();
791
792         iommu->gcmd &= ~DMA_GCMD_QIE;
793
794         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
795
796         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
797                       !(sts & DMA_GSTS_QIES), sts);
798 end:
799         spin_unlock_irqrestore(&iommu->register_lock, flags);
800 }
801
802 /*
803  * Enable queued invalidation.
804  */
805 static void __dmar_enable_qi(struct intel_iommu *iommu)
806 {
807         u32 cmd, sts;
808         unsigned long flags;
809         struct q_inval *qi = iommu->qi;
810
811         qi->free_head = qi->free_tail = 0;
812         qi->free_cnt = QI_LENGTH;
813
814         spin_lock_irqsave(&iommu->register_lock, flags);
815
816         /* write zero to the tail reg */
817         writel(0, iommu->reg + DMAR_IQT_REG);
818
819         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
820
821         cmd = iommu->gcmd | DMA_GCMD_QIE;
822         iommu->gcmd |= DMA_GCMD_QIE;
823         writel(cmd, iommu->reg + DMAR_GCMD_REG);
824
825         /* Make sure hardware complete it */
826         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
827
828         spin_unlock_irqrestore(&iommu->register_lock, flags);
829 }
830
831 /*
832  * Enable Queued Invalidation interface. This is a must to support
833  * interrupt-remapping. Also used by DMA-remapping, which replaces
834  * register based IOTLB invalidation.
835  */
836 int dmar_enable_qi(struct intel_iommu *iommu)
837 {
838         struct q_inval *qi;
839
840         if (!ecap_qis(iommu->ecap))
841                 return -ENOENT;
842
843         /*
844          * queued invalidation is already setup and enabled.
845          */
846         if (iommu->qi)
847                 return 0;
848
849         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
850         if (!iommu->qi)
851                 return -ENOMEM;
852
853         qi = iommu->qi;
854
855         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
856         if (!qi->desc) {
857                 kfree(qi);
858                 iommu->qi = 0;
859                 return -ENOMEM;
860         }
861
862         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
863         if (!qi->desc_status) {
864                 free_page((unsigned long) qi->desc);
865                 kfree(qi);
866                 iommu->qi = 0;
867                 return -ENOMEM;
868         }
869
870         qi->free_head = qi->free_tail = 0;
871         qi->free_cnt = QI_LENGTH;
872
873         spin_lock_init(&qi->q_lock);
874
875         __dmar_enable_qi(iommu);
876
877         return 0;
878 }
879
880 /* iommu interrupt handling. Most stuff are MSI-like. */
881
882 enum faulttype {
883         DMA_REMAP,
884         INTR_REMAP,
885         UNKNOWN,
886 };
887
888 static const char *dma_remap_fault_reasons[] =
889 {
890         "Software",
891         "Present bit in root entry is clear",
892         "Present bit in context entry is clear",
893         "Invalid context entry",
894         "Access beyond MGAW",
895         "PTE Write access is not set",
896         "PTE Read access is not set",
897         "Next page table ptr is invalid",
898         "Root table address invalid",
899         "Context table ptr is invalid",
900         "non-zero reserved fields in RTP",
901         "non-zero reserved fields in CTP",
902         "non-zero reserved fields in PTE",
903 };
904
905 static const char *intr_remap_fault_reasons[] =
906 {
907         "Detected reserved fields in the decoded interrupt-remapped request",
908         "Interrupt index exceeded the interrupt-remapping table size",
909         "Present field in the IRTE entry is clear",
910         "Error accessing interrupt-remapping table pointed by IRTA_REG",
911         "Detected reserved fields in the IRTE entry",
912         "Blocked a compatibility format interrupt request",
913         "Blocked an interrupt request due to source-id verification failure",
914 };
915
916 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
917
918 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
919 {
920         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
921                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
922                 *fault_type = INTR_REMAP;
923                 return intr_remap_fault_reasons[fault_reason - 0x20];
924         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
925                 *fault_type = DMA_REMAP;
926                 return dma_remap_fault_reasons[fault_reason];
927         } else {
928                 *fault_type = UNKNOWN;
929                 return "Unknown";
930         }
931 }
932
933 void dmar_msi_unmask(unsigned int irq)
934 {
935         struct intel_iommu *iommu = get_irq_data(irq);
936         unsigned long flag;
937
938         /* unmask it */
939         spin_lock_irqsave(&iommu->register_lock, flag);
940         writel(0, iommu->reg + DMAR_FECTL_REG);
941         /* Read a reg to force flush the post write */
942         readl(iommu->reg + DMAR_FECTL_REG);
943         spin_unlock_irqrestore(&iommu->register_lock, flag);
944 }
945
946 void dmar_msi_mask(unsigned int irq)
947 {
948         unsigned long flag;
949         struct intel_iommu *iommu = get_irq_data(irq);
950
951         /* mask it */
952         spin_lock_irqsave(&iommu->register_lock, flag);
953         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
954         /* Read a reg to force flush the post write */
955         readl(iommu->reg + DMAR_FECTL_REG);
956         spin_unlock_irqrestore(&iommu->register_lock, flag);
957 }
958
959 void dmar_msi_write(int irq, struct msi_msg *msg)
960 {
961         struct intel_iommu *iommu = get_irq_data(irq);
962         unsigned long flag;
963
964         spin_lock_irqsave(&iommu->register_lock, flag);
965         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
966         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
967         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
968         spin_unlock_irqrestore(&iommu->register_lock, flag);
969 }
970
971 void dmar_msi_read(int irq, struct msi_msg *msg)
972 {
973         struct intel_iommu *iommu = get_irq_data(irq);
974         unsigned long flag;
975
976         spin_lock_irqsave(&iommu->register_lock, flag);
977         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
978         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
979         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
980         spin_unlock_irqrestore(&iommu->register_lock, flag);
981 }
982
983 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
984                 u8 fault_reason, u16 source_id, unsigned long long addr)
985 {
986         const char *reason;
987         int fault_type;
988
989         reason = dmar_get_fault_reason(fault_reason, &fault_type);
990
991         if (fault_type == INTR_REMAP)
992                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
993                        "fault index %llx\n"
994                         "INTR-REMAP:[fault reason %02d] %s\n",
995                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
996                         PCI_FUNC(source_id & 0xFF), addr >> 48,
997                         fault_reason, reason);
998         else
999                 printk(KERN_ERR
1000                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1001                        "fault addr %llx \n"
1002                        "DMAR:[fault reason %02d] %s\n",
1003                        (type ? "DMA Read" : "DMA Write"),
1004                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1005                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1006         return 0;
1007 }
1008
1009 #define PRIMARY_FAULT_REG_LEN (16)
1010 irqreturn_t dmar_fault(int irq, void *dev_id)
1011 {
1012         struct intel_iommu *iommu = dev_id;
1013         int reg, fault_index;
1014         u32 fault_status;
1015         unsigned long flag;
1016
1017         spin_lock_irqsave(&iommu->register_lock, flag);
1018         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1019         if (fault_status)
1020                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1021                        fault_status);
1022
1023         /* TBD: ignore advanced fault log currently */
1024         if (!(fault_status & DMA_FSTS_PPF))
1025                 goto clear_rest;
1026
1027         fault_index = dma_fsts_fault_record_index(fault_status);
1028         reg = cap_fault_reg_offset(iommu->cap);
1029         while (1) {
1030                 u8 fault_reason;
1031                 u16 source_id;
1032                 u64 guest_addr;
1033                 int type;
1034                 u32 data;
1035
1036                 /* highest 32 bits */
1037                 data = readl(iommu->reg + reg +
1038                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1039                 if (!(data & DMA_FRCD_F))
1040                         break;
1041
1042                 fault_reason = dma_frcd_fault_reason(data);
1043                 type = dma_frcd_type(data);
1044
1045                 data = readl(iommu->reg + reg +
1046                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1047                 source_id = dma_frcd_source_id(data);
1048
1049                 guest_addr = dmar_readq(iommu->reg + reg +
1050                                 fault_index * PRIMARY_FAULT_REG_LEN);
1051                 guest_addr = dma_frcd_page_addr(guest_addr);
1052                 /* clear the fault */
1053                 writel(DMA_FRCD_F, iommu->reg + reg +
1054                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1055
1056                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1057
1058                 dmar_fault_do_one(iommu, type, fault_reason,
1059                                 source_id, guest_addr);
1060
1061                 fault_index++;
1062                 if (fault_index > cap_num_fault_regs(iommu->cap))
1063                         fault_index = 0;
1064                 spin_lock_irqsave(&iommu->register_lock, flag);
1065         }
1066 clear_rest:
1067         /* clear all the other faults */
1068         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1069         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1070
1071         spin_unlock_irqrestore(&iommu->register_lock, flag);
1072         return IRQ_HANDLED;
1073 }
1074
1075 int dmar_set_interrupt(struct intel_iommu *iommu)
1076 {
1077         int irq, ret;
1078
1079         /*
1080          * Check if the fault interrupt is already initialized.
1081          */
1082         if (iommu->irq)
1083                 return 0;
1084
1085         irq = create_irq();
1086         if (!irq) {
1087                 printk(KERN_ERR "IOMMU: no free vectors\n");
1088                 return -EINVAL;
1089         }
1090
1091         set_irq_data(irq, iommu);
1092         iommu->irq = irq;
1093
1094         ret = arch_setup_dmar_msi(irq);
1095         if (ret) {
1096                 set_irq_data(irq, NULL);
1097                 iommu->irq = 0;
1098                 destroy_irq(irq);
1099                 return 0;
1100         }
1101
1102         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1103         if (ret)
1104                 printk(KERN_ERR "IOMMU: can't request irq\n");
1105         return ret;
1106 }
1107
1108 int __init enable_drhd_fault_handling(void)
1109 {
1110         struct dmar_drhd_unit *drhd;
1111
1112         /*
1113          * Enable fault control interrupt.
1114          */
1115         for_each_drhd_unit(drhd) {
1116                 int ret;
1117                 struct intel_iommu *iommu = drhd->iommu;
1118                 ret = dmar_set_interrupt(iommu);
1119
1120                 if (ret) {
1121                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1122                                " interrupt, ret %d\n",
1123                                (unsigned long long)drhd->reg_base_addr, ret);
1124                         return -1;
1125                 }
1126         }
1127
1128         return 0;
1129 }
1130
1131 /*
1132  * Re-enable Queued Invalidation interface.
1133  */
1134 int dmar_reenable_qi(struct intel_iommu *iommu)
1135 {
1136         if (!ecap_qis(iommu->ecap))
1137                 return -ENOENT;
1138
1139         if (!iommu->qi)
1140                 return -ENOENT;
1141
1142         /*
1143          * First disable queued invalidation.
1144          */
1145         dmar_disable_qi(iommu);
1146         /*
1147          * Then enable queued invalidation again. Since there is no pending
1148          * invalidation requests now, it's safe to re-enable queued
1149          * invalidation.
1150          */
1151         __dmar_enable_qi(iommu);
1152
1153         return 0;
1154 }