Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / arch / ia64 / kernel / iosapic.c
1 /*
2  * I/O SAPIC support.
3  *
4  * Copyright (C) 1999 Intel Corp.
5  * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6  * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7  * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8  *      David Mosberger-Tang <davidm@hpl.hp.com>
9  * Copyright (C) 1999 VA Linux Systems
10  * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11  *
12  * 00/04/19     D. Mosberger    Rewritten to mirror more closely the x86 I/O
13  *                              APIC code.  In particular, we now have separate
14  *                              handlers for edge and level triggered
15  *                              interrupts.
16  * 00/10/27     Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector
17  *                              allocation PCI to vector mapping, shared PCI
18  *                              interrupts.
19  * 00/10/27     D. Mosberger    Document things a bit more to make them more
20  *                              understandable.  Clean up much of the old
21  *                              IOSAPIC cruft.
22  * 01/07/27     J.I. Lee        PCI irq routing, Platform/Legacy interrupts
23  *                              and fixes for ACPI S5(SoftOff) support.
24  * 02/01/23     J.I. Lee        iosapic pgm fixes for PCI irq routing from _PRT
25  * 02/01/07     E. Focht        <efocht@ess.nec.de> Redirectable interrupt
26  *                              vectors in iosapic_set_affinity(),
27  *                              initializations for /proc/irq/#/smp_affinity
28  * 02/04/02     P. Diefenbaugh  Cleaned up ACPI PCI IRQ routing.
29  * 02/04/18     J.I. Lee        bug fix in iosapic_init_pci_irq
30  * 02/04/30     J.I. Lee        bug fix in find_iosapic to fix ACPI PCI IRQ to
31  *                              IOSAPIC mapping error
32  * 02/07/29     T. Kochi        Allocate interrupt vectors dynamically
33  * 02/08/04     T. Kochi        Cleaned up terminology (irq, global system
34  *                              interrupt, vector, etc.)
35  * 02/09/20     D. Mosberger    Simplified by taking advantage of ACPI's
36  *                              pci_irq code.
37  * 03/02/19     B. Helgaas      Make pcat_compat system-wide, not per-IOSAPIC.
38  *                              Remove iosapic_address & gsi_base from
39  *                              external interfaces.  Rationalize
40  *                              __init/__devinit attributes.
41  * 04/12/04 Ashok Raj   <ashok.raj@intel.com> Intel Corporation 2004
42  *                              Updated to work with irq migration necessary
43  *                              for CPU Hotplug
44  */
45 /*
46  * Here is what the interrupt logic between a PCI device and the kernel looks
47  * like:
48  *
49  * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC,
50  *     INTD).  The device is uniquely identified by its bus-, and slot-number
51  *     (the function number does not matter here because all functions share
52  *     the same interrupt lines).
53  *
54  * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC
55  *     controller.  Multiple interrupt lines may have to share the same
56  *     IOSAPIC pin (if they're level triggered and use the same polarity).
57  *     Each interrupt line has a unique Global System Interrupt (GSI) number
58  *     which can be calculated as the sum of the controller's base GSI number
59  *     and the IOSAPIC pin number to which the line connects.
60  *
61  * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the
62  * IOSAPIC pin into the IA-64 interrupt vector.  This interrupt vector is then
63  * sent to the CPU.
64  *
65  * (4) The kernel recognizes an interrupt as an IRQ.  The IRQ interface is
66  *     used as architecture-independent interrupt handling mechanism in Linux.
67  *     As an IRQ is a number, we have to have
68  *     IA-64 interrupt vector number <-> IRQ number mapping.  On smaller
69  *     systems, we use one-to-one mapping between IA-64 vector and IRQ.  A
70  *     platform can implement platform_irq_to_vector(irq) and
71  *     platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
72  *     Please see also include/asm-ia64/hw_irq.h for those APIs.
73  *
74  * To sum up, there are three levels of mappings involved:
75  *
76  *      PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
77  *
78  * Note: The term "IRQ" is loosely used everywhere in Linux kernel to
79  * describeinterrupts.  Now we use "IRQ" only for Linux IRQ's.  ISA IRQ
80  * (isa_irq) is the only exception in this source code.
81  */
82 #include <linux/config.h>
83
84 #include <linux/acpi.h>
85 #include <linux/init.h>
86 #include <linux/irq.h>
87 #include <linux/kernel.h>
88 #include <linux/list.h>
89 #include <linux/pci.h>
90 #include <linux/smp.h>
91 #include <linux/smp_lock.h>
92 #include <linux/string.h>
93 #include <linux/bootmem.h>
94
95 #include <asm/delay.h>
96 #include <asm/hw_irq.h>
97 #include <asm/io.h>
98 #include <asm/iosapic.h>
99 #include <asm/machvec.h>
100 #include <asm/processor.h>
101 #include <asm/ptrace.h>
102 #include <asm/system.h>
103
104 #undef DEBUG_INTERRUPT_ROUTING
105
106 #ifdef DEBUG_INTERRUPT_ROUTING
107 #define DBG(fmt...)     printk(fmt)
108 #else
109 #define DBG(fmt...)
110 #endif
111
112 #define NR_PREALLOCATE_RTE_ENTRIES \
113         (PAGE_SIZE / sizeof(struct iosapic_rte_info))
114 #define RTE_PREALLOCATED        (1)
115
116 static DEFINE_SPINLOCK(iosapic_lock);
117
118 /*
119  * These tables map IA-64 vectors to the IOSAPIC pin that generates this
120  * vector.
121  */
122
123 struct iosapic_rte_info {
124         struct list_head rte_list;      /* node in list of RTEs sharing the
125                                          * same vector */
126         char __iomem    *addr;          /* base address of IOSAPIC */
127         unsigned int    gsi_base;       /* first GSI assigned to this
128                                          * IOSAPIC */
129         char            rte_index;      /* IOSAPIC RTE index */
130         int             refcnt;         /* reference counter */
131         unsigned int    flags;          /* flags */
132 } ____cacheline_aligned;
133
134 static struct iosapic_intr_info {
135         struct list_head rtes;          /* RTEs using this vector (empty =>
136                                          * not an IOSAPIC interrupt) */
137         int             count;          /* # of RTEs that shares this vector */
138         u32             low32;          /* current value of low word of
139                                          * Redirection table entry */
140         unsigned int    dest;           /* destination CPU physical ID */
141         unsigned char   dmode   : 3;    /* delivery mode (see iosapic.h) */
142         unsigned char   polarity: 1;    /* interrupt polarity
143                                          * (see iosapic.h) */
144         unsigned char   trigger : 1;    /* trigger mode (see iosapic.h) */
145 } iosapic_intr_info[IA64_NUM_VECTORS];
146
147 static struct iosapic {
148         char __iomem    *addr;          /* base address of IOSAPIC */
149         unsigned int    gsi_base;       /* first GSI assigned to this
150                                          * IOSAPIC */
151         unsigned short  num_rte;        /* # of RTEs on this IOSAPIC */
152         int             rtes_inuse;     /* # of RTEs in use on this IOSAPIC */
153 #ifdef CONFIG_NUMA
154         unsigned short  node;           /* numa node association via pxm */
155 #endif
156 } iosapic_lists[NR_IOSAPICS];
157
158 static unsigned char pcat_compat __devinitdata; /* 8259 compatibility flag */
159
160 static int iosapic_kmalloc_ok;
161 static LIST_HEAD(free_rte_list);
162
163 /*
164  * Find an IOSAPIC associated with a GSI
165  */
166 static inline int
167 find_iosapic (unsigned int gsi)
168 {
169         int i;
170
171         for (i = 0; i < NR_IOSAPICS; i++) {
172                 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) <
173                     iosapic_lists[i].num_rte)
174                         return i;
175         }
176
177         return -1;
178 }
179
180 static inline int
181 _gsi_to_vector (unsigned int gsi)
182 {
183         struct iosapic_intr_info *info;
184         struct iosapic_rte_info *rte;
185
186         for (info = iosapic_intr_info; info <
187                      iosapic_intr_info + IA64_NUM_VECTORS; ++info)
188                 list_for_each_entry(rte, &info->rtes, rte_list)
189                         if (rte->gsi_base + rte->rte_index == gsi)
190                                 return info - iosapic_intr_info;
191         return -1;
192 }
193
194 /*
195  * Translate GSI number to the corresponding IA-64 interrupt vector.  If no
196  * entry exists, return -1.
197  */
198 inline int
199 gsi_to_vector (unsigned int gsi)
200 {
201         return _gsi_to_vector(gsi);
202 }
203
204 int
205 gsi_to_irq (unsigned int gsi)
206 {
207         unsigned long flags;
208         int irq;
209         /*
210          * XXX fix me: this assumes an identity mapping between IA-64 vector
211          * and Linux irq numbers...
212          */
213         spin_lock_irqsave(&iosapic_lock, flags);
214         {
215                 irq = _gsi_to_vector(gsi);
216         }
217         spin_unlock_irqrestore(&iosapic_lock, flags);
218
219         return irq;
220 }
221
222 static struct iosapic_rte_info *gsi_vector_to_rte(unsigned int gsi,
223                                                   unsigned int vec)
224 {
225         struct iosapic_rte_info *rte;
226
227         list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
228                 if (rte->gsi_base + rte->rte_index == gsi)
229                         return rte;
230         return NULL;
231 }
232
233 static void
234 set_rte (unsigned int gsi, unsigned int vector, unsigned int dest, int mask)
235 {
236         unsigned long pol, trigger, dmode;
237         u32 low32, high32;
238         char __iomem *addr;
239         int rte_index;
240         char redir;
241         struct iosapic_rte_info *rte;
242
243         DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
244
245         rte = gsi_vector_to_rte(gsi, vector);
246         if (!rte)
247                 return;         /* not an IOSAPIC interrupt */
248
249         rte_index = rte->rte_index;
250         addr    = rte->addr;
251         pol     = iosapic_intr_info[vector].polarity;
252         trigger = iosapic_intr_info[vector].trigger;
253         dmode   = iosapic_intr_info[vector].dmode;
254
255         redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
256
257 #ifdef CONFIG_SMP
258         {
259                 unsigned int irq;
260
261                 for (irq = 0; irq < NR_IRQS; ++irq)
262                         if (irq_to_vector(irq) == vector) {
263                                 set_irq_affinity_info(irq,
264                                                       (int)(dest & 0xffff),
265                                                       redir);
266                                 break;
267                         }
268         }
269 #endif
270
271         low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
272                  (trigger << IOSAPIC_TRIGGER_SHIFT) |
273                  (dmode << IOSAPIC_DELIVERY_SHIFT) |
274                  ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
275                  vector);
276
277         /* dest contains both id and eid */
278         high32 = (dest << IOSAPIC_DEST_SHIFT);
279
280         iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
281         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
282         iosapic_intr_info[vector].low32 = low32;
283         iosapic_intr_info[vector].dest = dest;
284 }
285
286 static void
287 nop (unsigned int irq)
288 {
289         /* do nothing... */
290 }
291
292 static void
293 mask_irq (unsigned int irq)
294 {
295         unsigned long flags;
296         char __iomem *addr;
297         u32 low32;
298         int rte_index;
299         ia64_vector vec = irq_to_vector(irq);
300         struct iosapic_rte_info *rte;
301
302         if (list_empty(&iosapic_intr_info[vec].rtes))
303                 return;                 /* not an IOSAPIC interrupt! */
304
305         spin_lock_irqsave(&iosapic_lock, flags);
306         {
307                 /* set only the mask bit */
308                 low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
309                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
310                                     rte_list) {
311                         addr = rte->addr;
312                         rte_index = rte->rte_index;
313                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
314                 }
315         }
316         spin_unlock_irqrestore(&iosapic_lock, flags);
317 }
318
319 static void
320 unmask_irq (unsigned int irq)
321 {
322         unsigned long flags;
323         char __iomem *addr;
324         u32 low32;
325         int rte_index;
326         ia64_vector vec = irq_to_vector(irq);
327         struct iosapic_rte_info *rte;
328
329         if (list_empty(&iosapic_intr_info[vec].rtes))
330                 return;                 /* not an IOSAPIC interrupt! */
331
332         spin_lock_irqsave(&iosapic_lock, flags);
333         {
334                 low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
335                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
336                                     rte_list) {
337                         addr = rte->addr;
338                         rte_index = rte->rte_index;
339                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
340                 }
341         }
342         spin_unlock_irqrestore(&iosapic_lock, flags);
343 }
344
345
346 static void
347 iosapic_set_affinity (unsigned int irq, cpumask_t mask)
348 {
349 #ifdef CONFIG_SMP
350         unsigned long flags;
351         u32 high32, low32;
352         int dest, rte_index;
353         char __iomem *addr;
354         int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
355         ia64_vector vec;
356         struct iosapic_rte_info *rte;
357
358         irq &= (~IA64_IRQ_REDIRECTED);
359         vec = irq_to_vector(irq);
360
361         if (cpus_empty(mask))
362                 return;
363
364         dest = cpu_physical_id(first_cpu(mask));
365
366         if (list_empty(&iosapic_intr_info[vec].rtes))
367                 return;                 /* not an IOSAPIC interrupt */
368
369         set_irq_affinity_info(irq, dest, redir);
370
371         /* dest contains both id and eid */
372         high32 = dest << IOSAPIC_DEST_SHIFT;
373
374         spin_lock_irqsave(&iosapic_lock, flags);
375         {
376                 low32 = iosapic_intr_info[vec].low32 &
377                         ~(7 << IOSAPIC_DELIVERY_SHIFT);
378
379                 if (redir)
380                         /* change delivery mode to lowest priority */
381                         low32 |= (IOSAPIC_LOWEST_PRIORITY <<
382                                   IOSAPIC_DELIVERY_SHIFT);
383                 else
384                         /* change delivery mode to fixed */
385                         low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
386
387                 iosapic_intr_info[vec].low32 = low32;
388                 iosapic_intr_info[vec].dest = dest;
389                 list_for_each_entry(rte, &iosapic_intr_info[vec].rtes,
390                                     rte_list) {
391                         addr = rte->addr;
392                         rte_index = rte->rte_index;
393                         iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index),
394                                       high32);
395                         iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
396                 }
397         }
398         spin_unlock_irqrestore(&iosapic_lock, flags);
399 #endif
400 }
401
402 /*
403  * Handlers for level-triggered interrupts.
404  */
405
406 static unsigned int
407 iosapic_startup_level_irq (unsigned int irq)
408 {
409         unmask_irq(irq);
410         return 0;
411 }
412
413 static void
414 iosapic_end_level_irq (unsigned int irq)
415 {
416         ia64_vector vec = irq_to_vector(irq);
417         struct iosapic_rte_info *rte;
418
419         move_native_irq(irq);
420         list_for_each_entry(rte, &iosapic_intr_info[vec].rtes, rte_list)
421                 iosapic_eoi(rte->addr, vec);
422 }
423
424 #define iosapic_shutdown_level_irq      mask_irq
425 #define iosapic_enable_level_irq        unmask_irq
426 #define iosapic_disable_level_irq       mask_irq
427 #define iosapic_ack_level_irq           nop
428
429 struct hw_interrupt_type irq_type_iosapic_level = {
430         .typename =     "IO-SAPIC-level",
431         .startup =      iosapic_startup_level_irq,
432         .shutdown =     iosapic_shutdown_level_irq,
433         .enable =       iosapic_enable_level_irq,
434         .disable =      iosapic_disable_level_irq,
435         .ack =          iosapic_ack_level_irq,
436         .end =          iosapic_end_level_irq,
437         .set_affinity = iosapic_set_affinity
438 };
439
440 /*
441  * Handlers for edge-triggered interrupts.
442  */
443
444 static unsigned int
445 iosapic_startup_edge_irq (unsigned int irq)
446 {
447         unmask_irq(irq);
448         /*
449          * IOSAPIC simply drops interrupts pended while the
450          * corresponding pin was masked, so we can't know if an
451          * interrupt is pending already.  Let's hope not...
452          */
453         return 0;
454 }
455
456 static void
457 iosapic_ack_edge_irq (unsigned int irq)
458 {
459         irq_desc_t *idesc = irq_descp(irq);
460
461         move_native_irq(irq);
462         /*
463          * Once we have recorded IRQ_PENDING already, we can mask the
464          * interrupt for real. This prevents IRQ storms from unhandled
465          * devices.
466          */
467         if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) ==
468             (IRQ_PENDING|IRQ_DISABLED))
469                 mask_irq(irq);
470 }
471
472 #define iosapic_enable_edge_irq         unmask_irq
473 #define iosapic_disable_edge_irq        nop
474 #define iosapic_end_edge_irq            nop
475
476 struct hw_interrupt_type irq_type_iosapic_edge = {
477         .typename =     "IO-SAPIC-edge",
478         .startup =      iosapic_startup_edge_irq,
479         .shutdown =     iosapic_disable_edge_irq,
480         .enable =       iosapic_enable_edge_irq,
481         .disable =      iosapic_disable_edge_irq,
482         .ack =          iosapic_ack_edge_irq,
483         .end =          iosapic_end_edge_irq,
484         .set_affinity = iosapic_set_affinity
485 };
486
487 unsigned int
488 iosapic_version (char __iomem *addr)
489 {
490         /*
491          * IOSAPIC Version Register return 32 bit structure like:
492          * {
493          *      unsigned int version   : 8;
494          *      unsigned int reserved1 : 8;
495          *      unsigned int max_redir : 8;
496          *      unsigned int reserved2 : 8;
497          * }
498          */
499         return iosapic_read(addr, IOSAPIC_VERSION);
500 }
501
502 static int iosapic_find_sharable_vector (unsigned long trigger,
503                                          unsigned long pol)
504 {
505         int i, vector = -1, min_count = -1;
506         struct iosapic_intr_info *info;
507
508         /*
509          * shared vectors for edge-triggered interrupts are not
510          * supported yet
511          */
512         if (trigger == IOSAPIC_EDGE)
513                 return -1;
514
515         for (i = IA64_FIRST_DEVICE_VECTOR; i <= IA64_LAST_DEVICE_VECTOR; i++) {
516                 info = &iosapic_intr_info[i];
517                 if (info->trigger == trigger && info->polarity == pol &&
518                     (info->dmode == IOSAPIC_FIXED || info->dmode ==
519                      IOSAPIC_LOWEST_PRIORITY)) {
520                         if (min_count == -1 || info->count < min_count) {
521                                 vector = i;
522                                 min_count = info->count;
523                         }
524                 }
525         }
526
527         return vector;
528 }
529
530 /*
531  * if the given vector is already owned by other,
532  *  assign a new vector for the other and make the vector available
533  */
534 static void __init
535 iosapic_reassign_vector (int vector)
536 {
537         int new_vector;
538
539         if (!list_empty(&iosapic_intr_info[vector].rtes)) {
540                 new_vector = assign_irq_vector(AUTO_ASSIGN);
541                 if (new_vector < 0)
542                         panic("%s: out of interrupt vectors!\n", __FUNCTION__);
543                 printk(KERN_INFO "Reassigning vector %d to %d\n",
544                        vector, new_vector);
545                 memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
546                        sizeof(struct iosapic_intr_info));
547                 INIT_LIST_HEAD(&iosapic_intr_info[new_vector].rtes);
548                 list_move(iosapic_intr_info[vector].rtes.next,
549                           &iosapic_intr_info[new_vector].rtes);
550                 memset(&iosapic_intr_info[vector], 0,
551                        sizeof(struct iosapic_intr_info));
552                 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
553                 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
554         }
555 }
556
557 static struct iosapic_rte_info *iosapic_alloc_rte (void)
558 {
559         int i;
560         struct iosapic_rte_info *rte;
561         int preallocated = 0;
562
563         if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
564                 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
565                                     NR_PREALLOCATE_RTE_ENTRIES);
566                 if (!rte)
567                         return NULL;
568                 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
569                         list_add(&rte->rte_list, &free_rte_list);
570         }
571
572         if (!list_empty(&free_rte_list)) {
573                 rte = list_entry(free_rte_list.next, struct iosapic_rte_info,
574                                  rte_list);
575                 list_del(&rte->rte_list);
576                 preallocated++;
577         } else {
578                 rte = kmalloc(sizeof(struct iosapic_rte_info), GFP_ATOMIC);
579                 if (!rte)
580                         return NULL;
581         }
582
583         memset(rte, 0, sizeof(struct iosapic_rte_info));
584         if (preallocated)
585                 rte->flags |= RTE_PREALLOCATED;
586
587         return rte;
588 }
589
590 static void iosapic_free_rte (struct iosapic_rte_info *rte)
591 {
592         if (rte->flags & RTE_PREALLOCATED)
593                 list_add_tail(&rte->rte_list, &free_rte_list);
594         else
595                 kfree(rte);
596 }
597
598 static inline int vector_is_shared (int vector)
599 {
600         return (iosapic_intr_info[vector].count > 1);
601 }
602
603 static int
604 register_intr (unsigned int gsi, int vector, unsigned char delivery,
605                unsigned long polarity, unsigned long trigger)
606 {
607         irq_desc_t *idesc;
608         struct hw_interrupt_type *irq_type;
609         int rte_index;
610         int index;
611         unsigned long gsi_base;
612         void __iomem *iosapic_address;
613         struct iosapic_rte_info *rte;
614
615         index = find_iosapic(gsi);
616         if (index < 0) {
617                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
618                        __FUNCTION__, gsi);
619                 return -ENODEV;
620         }
621
622         iosapic_address = iosapic_lists[index].addr;
623         gsi_base = iosapic_lists[index].gsi_base;
624
625         rte = gsi_vector_to_rte(gsi, vector);
626         if (!rte) {
627                 rte = iosapic_alloc_rte();
628                 if (!rte) {
629                         printk(KERN_WARNING "%s: cannot allocate memory\n",
630                                __FUNCTION__);
631                         return -ENOMEM;
632                 }
633
634                 rte_index = gsi - gsi_base;
635                 rte->rte_index  = rte_index;
636                 rte->addr       = iosapic_address;
637                 rte->gsi_base   = gsi_base;
638                 rte->refcnt++;
639                 list_add_tail(&rte->rte_list, &iosapic_intr_info[vector].rtes);
640                 iosapic_intr_info[vector].count++;
641                 iosapic_lists[index].rtes_inuse++;
642         }
643         else if (vector_is_shared(vector)) {
644                 struct iosapic_intr_info *info = &iosapic_intr_info[vector];
645                 if (info->trigger != trigger || info->polarity != polarity) {
646                         printk (KERN_WARNING
647                                 "%s: cannot override the interrupt\n",
648                                 __FUNCTION__);
649                         return -EINVAL;
650                 }
651         }
652
653         iosapic_intr_info[vector].polarity = polarity;
654         iosapic_intr_info[vector].dmode    = delivery;
655         iosapic_intr_info[vector].trigger  = trigger;
656
657         if (trigger == IOSAPIC_EDGE)
658                 irq_type = &irq_type_iosapic_edge;
659         else
660                 irq_type = &irq_type_iosapic_level;
661
662         idesc = irq_descp(vector);
663         if (idesc->handler != irq_type) {
664                 if (idesc->handler != &no_irq_type)
665                         printk(KERN_WARNING
666                                "%s: changing vector %d from %s to %s\n",
667                                __FUNCTION__, vector,
668                                idesc->handler->typename, irq_type->typename);
669                 idesc->handler = irq_type;
670         }
671         return 0;
672 }
673
674 static unsigned int
675 get_target_cpu (unsigned int gsi, int vector)
676 {
677 #ifdef CONFIG_SMP
678         static int cpu = -1;
679         extern int cpe_vector;
680
681         /*
682          * In case of vector shared by multiple RTEs, all RTEs that
683          * share the vector need to use the same destination CPU.
684          */
685         if (!list_empty(&iosapic_intr_info[vector].rtes))
686                 return iosapic_intr_info[vector].dest;
687
688         /*
689          * If the platform supports redirection via XTP, let it
690          * distribute interrupts.
691          */
692         if (smp_int_redirect & SMP_IRQ_REDIRECTION)
693                 return cpu_physical_id(smp_processor_id());
694
695         /*
696          * Some interrupts (ACPI SCI, for instance) are registered
697          * before the BSP is marked as online.
698          */
699         if (!cpu_online(smp_processor_id()))
700                 return cpu_physical_id(smp_processor_id());
701
702 #ifdef CONFIG_ACPI
703         if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
704                 return get_cpei_target_cpu();
705 #endif
706
707 #ifdef CONFIG_NUMA
708         {
709                 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
710                 cpumask_t cpu_mask;
711
712                 iosapic_index = find_iosapic(gsi);
713                 if (iosapic_index < 0 ||
714                     iosapic_lists[iosapic_index].node == MAX_NUMNODES)
715                         goto skip_numa_setup;
716
717                 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
718
719                 for_each_cpu_mask(numa_cpu, cpu_mask) {
720                         if (!cpu_online(numa_cpu))
721                                 cpu_clear(numa_cpu, cpu_mask);
722                 }
723
724                 num_cpus = cpus_weight(cpu_mask);
725
726                 if (!num_cpus)
727                         goto skip_numa_setup;
728
729                 /* Use vector assignment to distribute across cpus in node */
730                 cpu_index = vector % num_cpus;
731
732                 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
733                         numa_cpu = next_cpu(numa_cpu, cpu_mask);
734
735                 if (numa_cpu != NR_CPUS)
736                         return cpu_physical_id(numa_cpu);
737         }
738 skip_numa_setup:
739 #endif
740         /*
741          * Otherwise, round-robin interrupt vectors across all the
742          * processors.  (It'd be nice if we could be smarter in the
743          * case of NUMA.)
744          */
745         do {
746                 if (++cpu >= NR_CPUS)
747                         cpu = 0;
748         } while (!cpu_online(cpu));
749
750         return cpu_physical_id(cpu);
751 #else  /* CONFIG_SMP */
752         return cpu_physical_id(smp_processor_id());
753 #endif
754 }
755
756 /*
757  * ACPI can describe IOSAPIC interrupts via static tables and namespace
758  * methods.  This provides an interface to register those interrupts and
759  * program the IOSAPIC RTE.
760  */
761 int
762 iosapic_register_intr (unsigned int gsi,
763                        unsigned long polarity, unsigned long trigger)
764 {
765         int vector, mask = 1, err;
766         unsigned int dest;
767         unsigned long flags;
768         struct iosapic_rte_info *rte;
769         u32 low32;
770 again:
771         /*
772          * If this GSI has already been registered (i.e., it's a
773          * shared interrupt, or we lost a race to register it),
774          * don't touch the RTE.
775          */
776         spin_lock_irqsave(&iosapic_lock, flags);
777         {
778                 vector = gsi_to_vector(gsi);
779                 if (vector > 0) {
780                         rte = gsi_vector_to_rte(gsi, vector);
781                         rte->refcnt++;
782                         spin_unlock_irqrestore(&iosapic_lock, flags);
783                         return vector;
784                 }
785         }
786         spin_unlock_irqrestore(&iosapic_lock, flags);
787
788         /* If vector is running out, we try to find a sharable vector */
789         vector = assign_irq_vector(AUTO_ASSIGN);
790         if (vector < 0) {
791                 vector = iosapic_find_sharable_vector(trigger, polarity);
792                 if (vector < 0)
793                         return -ENOSPC;
794         }
795
796         spin_lock_irqsave(&irq_descp(vector)->lock, flags);
797         spin_lock(&iosapic_lock);
798         {
799                 if (gsi_to_vector(gsi) > 0) {
800                         if (list_empty(&iosapic_intr_info[vector].rtes))
801                                 free_irq_vector(vector);
802                         spin_unlock(&iosapic_lock);
803                         spin_unlock_irqrestore(&irq_descp(vector)->lock,
804                                                flags);
805                         goto again;
806                 }
807
808                 dest = get_target_cpu(gsi, vector);
809                 err = register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
810                               polarity, trigger);
811                 if (err < 0) {
812                         spin_unlock(&iosapic_lock);
813                         spin_unlock_irqrestore(&irq_descp(vector)->lock,
814                                                flags);
815                         return err;
816                 }
817
818                 /*
819                  * If the vector is shared and already unmasked for
820                  * other interrupt sources, don't mask it.
821                  */
822                 low32 = iosapic_intr_info[vector].low32;
823                 if (vector_is_shared(vector) && !(low32 & IOSAPIC_MASK))
824                         mask = 0;
825                 set_rte(gsi, vector, dest, mask);
826         }
827         spin_unlock(&iosapic_lock);
828         spin_unlock_irqrestore(&irq_descp(vector)->lock, flags);
829
830         printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
831                gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
832                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
833                cpu_logical_id(dest), dest, vector);
834
835         return vector;
836 }
837
838 void
839 iosapic_unregister_intr (unsigned int gsi)
840 {
841         unsigned long flags;
842         int irq, vector, index;
843         irq_desc_t *idesc;
844         u32 low32;
845         unsigned long trigger, polarity;
846         unsigned int dest;
847         struct iosapic_rte_info *rte;
848
849         /*
850          * If the irq associated with the gsi is not found,
851          * iosapic_unregister_intr() is unbalanced. We need to check
852          * this again after getting locks.
853          */
854         irq = gsi_to_irq(gsi);
855         if (irq < 0) {
856                 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n",
857                        gsi);
858                 WARN_ON(1);
859                 return;
860         }
861         vector = irq_to_vector(irq);
862
863         idesc = irq_descp(irq);
864         spin_lock_irqsave(&idesc->lock, flags);
865         spin_lock(&iosapic_lock);
866         {
867                 if ((rte = gsi_vector_to_rte(gsi, vector)) == NULL) {
868                         printk(KERN_ERR
869                                "iosapic_unregister_intr(%u) unbalanced\n",
870                                gsi);
871                         WARN_ON(1);
872                         goto out;
873                 }
874
875                 if (--rte->refcnt > 0)
876                         goto out;
877
878                 /* Mask the interrupt */
879                 low32 = iosapic_intr_info[vector].low32 | IOSAPIC_MASK;
880                 iosapic_write(rte->addr, IOSAPIC_RTE_LOW(rte->rte_index),
881                               low32);
882
883                 /* Remove the rte entry from the list */
884                 list_del(&rte->rte_list);
885                 iosapic_intr_info[vector].count--;
886                 iosapic_free_rte(rte);
887                 index = find_iosapic(gsi);
888                 iosapic_lists[index].rtes_inuse--;
889                 WARN_ON(iosapic_lists[index].rtes_inuse < 0);
890
891                 trigger  = iosapic_intr_info[vector].trigger;
892                 polarity = iosapic_intr_info[vector].polarity;
893                 dest     = iosapic_intr_info[vector].dest;
894                 printk(KERN_INFO
895                        "GSI %u (%s, %s) -> CPU %d (0x%04x)"
896                        " vector %d unregistered\n",
897                        gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
898                        (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
899                        cpu_logical_id(dest), dest, vector);
900
901                 if (list_empty(&iosapic_intr_info[vector].rtes)) {
902                         /* Sanity check */
903                         BUG_ON(iosapic_intr_info[vector].count);
904
905                         /* Clear the interrupt controller descriptor */
906                         idesc->handler = &no_irq_type;
907
908                         /* Clear the interrupt information */
909                         memset(&iosapic_intr_info[vector], 0,
910                                sizeof(struct iosapic_intr_info));
911                         iosapic_intr_info[vector].low32 |= IOSAPIC_MASK;
912                         INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
913
914                         if (idesc->action) {
915                                 printk(KERN_ERR
916                                        "interrupt handlers still exist on"
917                                        "IRQ %u\n", irq);
918                                 WARN_ON(1);
919                         }
920
921                         /* Free the interrupt vector */
922                         free_irq_vector(vector);
923                 }
924         }
925  out:
926         spin_unlock(&iosapic_lock);
927         spin_unlock_irqrestore(&idesc->lock, flags);
928 }
929
930 /*
931  * ACPI calls this when it finds an entry for a platform interrupt.
932  */
933 int __init
934 iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
935                                 int iosapic_vector, u16 eid, u16 id,
936                                 unsigned long polarity, unsigned long trigger)
937 {
938         static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
939         unsigned char delivery;
940         int vector, mask = 0;
941         unsigned int dest = ((id << 8) | eid) & 0xffff;
942
943         switch (int_type) {
944               case ACPI_INTERRUPT_PMI:
945                 vector = iosapic_vector;
946                 /*
947                  * since PMI vector is alloc'd by FW(ACPI) not by kernel,
948                  * we need to make sure the vector is available
949                  */
950                 iosapic_reassign_vector(vector);
951                 delivery = IOSAPIC_PMI;
952                 break;
953               case ACPI_INTERRUPT_INIT:
954                 vector = assign_irq_vector(AUTO_ASSIGN);
955                 if (vector < 0)
956                         panic("%s: out of interrupt vectors!\n", __FUNCTION__);
957                 delivery = IOSAPIC_INIT;
958                 break;
959               case ACPI_INTERRUPT_CPEI:
960                 vector = IA64_CPE_VECTOR;
961                 delivery = IOSAPIC_LOWEST_PRIORITY;
962                 mask = 1;
963                 break;
964               default:
965                 printk(KERN_ERR "%s: invalid int type 0x%x\n", __FUNCTION__,
966                        int_type);
967                 return -1;
968         }
969
970         register_intr(gsi, vector, delivery, polarity, trigger);
971
972         printk(KERN_INFO
973                "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)"
974                " vector %d\n",
975                int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
976                int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
977                (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
978                cpu_logical_id(dest), dest, vector);
979
980         set_rte(gsi, vector, dest, mask);
981         return vector;
982 }
983
984 /*
985  * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
986  */
987 void __init
988 iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
989                           unsigned long polarity,
990                           unsigned long trigger)
991 {
992         int vector;
993         unsigned int dest = cpu_physical_id(smp_processor_id());
994
995         vector = isa_irq_to_vector(isa_irq);
996
997         register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
998
999         DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
1000             isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
1001             polarity == IOSAPIC_POL_HIGH ? "high" : "low",
1002             cpu_logical_id(dest), dest, vector);
1003
1004         set_rte(gsi, vector, dest, 1);
1005 }
1006
1007 void __init
1008 iosapic_system_init (int system_pcat_compat)
1009 {
1010         int vector;
1011
1012         for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) {
1013                 iosapic_intr_info[vector].low32 = IOSAPIC_MASK;
1014                 /* mark as unused */
1015                 INIT_LIST_HEAD(&iosapic_intr_info[vector].rtes);
1016         }
1017
1018         pcat_compat = system_pcat_compat;
1019         if (pcat_compat) {
1020                 /*
1021                  * Disable the compatibility mode interrupts (8259 style),
1022                  * needs IN/OUT support enabled.
1023                  */
1024                 printk(KERN_INFO
1025                        "%s: Disabling PC-AT compatible 8259 interrupts\n",
1026                        __FUNCTION__);
1027                 outb(0xff, 0xA1);
1028                 outb(0xff, 0x21);
1029         }
1030 }
1031
1032 static inline int
1033 iosapic_alloc (void)
1034 {
1035         int index;
1036
1037         for (index = 0; index < NR_IOSAPICS; index++)
1038                 if (!iosapic_lists[index].addr)
1039                         return index;
1040
1041         printk(KERN_WARNING "%s: failed to allocate iosapic\n", __FUNCTION__);
1042         return -1;
1043 }
1044
1045 static inline void
1046 iosapic_free (int index)
1047 {
1048         memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0]));
1049 }
1050
1051 static inline int
1052 iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver)
1053 {
1054         int index;
1055         unsigned int gsi_end, base, end;
1056
1057         /* check gsi range */
1058         gsi_end = gsi_base + ((ver >> 16) & 0xff);
1059         for (index = 0; index < NR_IOSAPICS; index++) {
1060                 if (!iosapic_lists[index].addr)
1061                         continue;
1062
1063                 base = iosapic_lists[index].gsi_base;
1064                 end  = base + iosapic_lists[index].num_rte - 1;
1065
1066                 if (gsi_end < base || end < gsi_base)
1067                         continue; /* OK */
1068
1069                 return -EBUSY;
1070         }
1071         return 0;
1072 }
1073
1074 int __devinit
1075 iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1076 {
1077         int num_rte, err, index;
1078         unsigned int isa_irq, ver;
1079         char __iomem *addr;
1080         unsigned long flags;
1081
1082         spin_lock_irqsave(&iosapic_lock, flags);
1083         {
1084                 addr = ioremap(phys_addr, 0);
1085                 ver = iosapic_version(addr);
1086
1087                 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1088                         iounmap(addr);
1089                         spin_unlock_irqrestore(&iosapic_lock, flags);
1090                         return err;
1091                 }
1092
1093                 /*
1094                  * The MAX_REDIR register holds the highest input pin
1095                  * number (starting from 0).
1096                  * We add 1 so that we can use it for number of pins (= RTEs)
1097                  */
1098                 num_rte = ((ver >> 16) & 0xff) + 1;
1099
1100                 index = iosapic_alloc();
1101                 iosapic_lists[index].addr = addr;
1102                 iosapic_lists[index].gsi_base = gsi_base;
1103                 iosapic_lists[index].num_rte = num_rte;
1104 #ifdef CONFIG_NUMA
1105                 iosapic_lists[index].node = MAX_NUMNODES;
1106 #endif
1107         }
1108         spin_unlock_irqrestore(&iosapic_lock, flags);
1109
1110         if ((gsi_base == 0) && pcat_compat) {
1111                 /*
1112                  * Map the legacy ISA devices into the IOSAPIC data.  Some of
1113                  * these may get reprogrammed later on with data from the ACPI
1114                  * Interrupt Source Override table.
1115                  */
1116                 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
1117                         iosapic_override_isa_irq(isa_irq, isa_irq,
1118                                                  IOSAPIC_POL_HIGH,
1119                                                  IOSAPIC_EDGE);
1120         }
1121         return 0;
1122 }
1123
1124 #ifdef CONFIG_HOTPLUG
1125 int
1126 iosapic_remove (unsigned int gsi_base)
1127 {
1128         int index, err = 0;
1129         unsigned long flags;
1130
1131         spin_lock_irqsave(&iosapic_lock, flags);
1132         {
1133                 index = find_iosapic(gsi_base);
1134                 if (index < 0) {
1135                         printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n",
1136                                __FUNCTION__, gsi_base);
1137                         goto out;
1138                 }
1139
1140                 if (iosapic_lists[index].rtes_inuse) {
1141                         err = -EBUSY;
1142                         printk(KERN_WARNING
1143                                "%s: IOSAPIC for GSI base %u is busy\n",
1144                                __FUNCTION__, gsi_base);
1145                         goto out;
1146                 }
1147
1148                 iounmap(iosapic_lists[index].addr);
1149                 iosapic_free(index);
1150         }
1151  out:
1152         spin_unlock_irqrestore(&iosapic_lock, flags);
1153         return err;
1154 }
1155 #endif /* CONFIG_HOTPLUG */
1156
1157 #ifdef CONFIG_NUMA
1158 void __devinit
1159 map_iosapic_to_node(unsigned int gsi_base, int node)
1160 {
1161         int index;
1162
1163         index = find_iosapic(gsi_base);
1164         if (index < 0) {
1165                 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
1166                        __FUNCTION__, gsi_base);
1167                 return;
1168         }
1169         iosapic_lists[index].node = node;
1170         return;
1171 }
1172 #endif
1173
1174 static int __init iosapic_enable_kmalloc (void)
1175 {
1176         iosapic_kmalloc_ok = 1;
1177         return 0;
1178 }
1179 core_initcall (iosapic_enable_kmalloc);