KVM: Fix cpuid feature misreporting
[linux-2.6] / kernel / irq / handle.c
1 /*
2  * linux/kernel/irq/handle.c
3  *
4  * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
5  * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
6  *
7  * This file contains the core interrupt handling code.
8  *
9  * Detailed information is available in Documentation/DocBook/genericirq
10  *
11  */
12
13 #include <linux/irq.h>
14 #include <linux/module.h>
15 #include <linux/random.h>
16 #include <linux/interrupt.h>
17 #include <linux/kernel_stat.h>
18 #include <linux/rculist.h>
19 #include <linux/hash.h>
20 #include <trace/irq.h>
21 #include <linux/bootmem.h>
22
23 #include "internals.h"
24
25 /*
26  * lockdep: we want to handle all irq_desc locks as a single lock-class:
27  */
28 struct lock_class_key irq_desc_lock_class;
29
30 /**
31  * handle_bad_irq - handle spurious and unhandled irqs
32  * @irq:       the interrupt number
33  * @desc:      description of the interrupt
34  *
35  * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
36  */
37 void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
38 {
39         print_irq_desc(irq, desc);
40         kstat_incr_irqs_this_cpu(irq, desc);
41         ack_bad_irq(irq);
42 }
43
44 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
45 static void __init init_irq_default_affinity(void)
46 {
47         alloc_bootmem_cpumask_var(&irq_default_affinity);
48         cpumask_setall(irq_default_affinity);
49 }
50 #else
51 static void __init init_irq_default_affinity(void)
52 {
53 }
54 #endif
55
56 /*
57  * Linux has a controller-independent interrupt architecture.
58  * Every controller has a 'controller-template', that is used
59  * by the main code to do the right thing. Each driver-visible
60  * interrupt source is transparently wired to the appropriate
61  * controller. Thus drivers need not be aware of the
62  * interrupt-controller.
63  *
64  * The code is designed to be easily extended with new/different
65  * interrupt controllers, without having to do assembly magic or
66  * having to touch the generic code.
67  *
68  * Controller mappings for all interrupt sources:
69  */
70 int nr_irqs = NR_IRQS;
71 EXPORT_SYMBOL_GPL(nr_irqs);
72
73 #ifdef CONFIG_SPARSE_IRQ
74
75 static struct irq_desc irq_desc_init = {
76         .irq        = -1,
77         .status     = IRQ_DISABLED,
78         .chip       = &no_irq_chip,
79         .handle_irq = handle_bad_irq,
80         .depth      = 1,
81         .lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
82 };
83
84 void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
85 {
86         int node;
87         void *ptr;
88
89         node = cpu_to_node(cpu);
90         ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
91
92         /*
93          * don't overwite if can not get new one
94          * init_copy_kstat_irqs() could still use old one
95          */
96         if (ptr) {
97                 printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
98                          cpu, node);
99                 desc->kstat_irqs = ptr;
100         }
101 }
102
103 static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
104 {
105         memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
106
107         spin_lock_init(&desc->lock);
108         desc->irq = irq;
109 #ifdef CONFIG_SMP
110         desc->cpu = cpu;
111 #endif
112         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
113         init_kstat_irqs(desc, cpu, nr_cpu_ids);
114         if (!desc->kstat_irqs) {
115                 printk(KERN_ERR "can not alloc kstat_irqs\n");
116                 BUG_ON(1);
117         }
118         if (!init_alloc_desc_masks(desc, cpu, false)) {
119                 printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
120                 BUG_ON(1);
121         }
122         arch_init_chip_data(desc, cpu);
123 }
124
125 /*
126  * Protect the sparse_irqs:
127  */
128 DEFINE_SPINLOCK(sparse_irq_lock);
129
130 struct irq_desc **irq_desc_ptrs __read_mostly;
131
132 static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
133         [0 ... NR_IRQS_LEGACY-1] = {
134                 .irq        = -1,
135                 .status     = IRQ_DISABLED,
136                 .chip       = &no_irq_chip,
137                 .handle_irq = handle_bad_irq,
138                 .depth      = 1,
139                 .lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
140         }
141 };
142
143 static unsigned int *kstat_irqs_legacy;
144
145 int __init early_irq_init(void)
146 {
147         struct irq_desc *desc;
148         int legacy_count;
149         int i;
150
151         init_irq_default_affinity();
152
153          /* initialize nr_irqs based on nr_cpu_ids */
154         arch_probe_nr_irqs();
155         printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
156
157         desc = irq_desc_legacy;
158         legacy_count = ARRAY_SIZE(irq_desc_legacy);
159
160         /* allocate irq_desc_ptrs array based on nr_irqs */
161         irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
162
163         /* allocate based on nr_cpu_ids */
164         /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
165         kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
166                                           sizeof(int));
167
168         for (i = 0; i < legacy_count; i++) {
169                 desc[i].irq = i;
170                 desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
171                 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
172                 init_alloc_desc_masks(&desc[i], 0, true);
173                 irq_desc_ptrs[i] = desc + i;
174         }
175
176         for (i = legacy_count; i < nr_irqs; i++)
177                 irq_desc_ptrs[i] = NULL;
178
179         return arch_early_irq_init();
180 }
181
182 struct irq_desc *irq_to_desc(unsigned int irq)
183 {
184         if (irq_desc_ptrs && irq < nr_irqs)
185                 return irq_desc_ptrs[irq];
186
187         return NULL;
188 }
189
190 struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
191 {
192         struct irq_desc *desc;
193         unsigned long flags;
194         int node;
195
196         if (irq >= nr_irqs) {
197                 WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
198                         irq, nr_irqs);
199                 return NULL;
200         }
201
202         desc = irq_desc_ptrs[irq];
203         if (desc)
204                 return desc;
205
206         spin_lock_irqsave(&sparse_irq_lock, flags);
207
208         /* We have to check it to avoid races with another CPU */
209         desc = irq_desc_ptrs[irq];
210         if (desc)
211                 goto out_unlock;
212
213         node = cpu_to_node(cpu);
214         desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
215         printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
216                  irq, cpu, node);
217         if (!desc) {
218                 printk(KERN_ERR "can not alloc irq_desc\n");
219                 BUG_ON(1);
220         }
221         init_one_irq_desc(irq, desc, cpu);
222
223         irq_desc_ptrs[irq] = desc;
224
225 out_unlock:
226         spin_unlock_irqrestore(&sparse_irq_lock, flags);
227
228         return desc;
229 }
230
231 #else /* !CONFIG_SPARSE_IRQ */
232
233 struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
234         [0 ... NR_IRQS-1] = {
235                 .status = IRQ_DISABLED,
236                 .chip = &no_irq_chip,
237                 .handle_irq = handle_bad_irq,
238                 .depth = 1,
239                 .lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
240         }
241 };
242
243 static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
244 int __init early_irq_init(void)
245 {
246         struct irq_desc *desc;
247         int count;
248         int i;
249
250         init_irq_default_affinity();
251
252         printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
253
254         desc = irq_desc;
255         count = ARRAY_SIZE(irq_desc);
256
257         for (i = 0; i < count; i++) {
258                 desc[i].irq = i;
259                 init_alloc_desc_masks(&desc[i], 0, true);
260                 desc[i].kstat_irqs = kstat_irqs_all[i];
261         }
262         return arch_early_irq_init();
263 }
264
265 struct irq_desc *irq_to_desc(unsigned int irq)
266 {
267         return (irq < NR_IRQS) ? irq_desc + irq : NULL;
268 }
269
270 struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
271 {
272         return irq_to_desc(irq);
273 }
274 #endif /* !CONFIG_SPARSE_IRQ */
275
276 void clear_kstat_irqs(struct irq_desc *desc)
277 {
278         memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
279 }
280
281 /*
282  * What should we do if we get a hw irq event on an illegal vector?
283  * Each architecture has to answer this themself.
284  */
285 static void ack_bad(unsigned int irq)
286 {
287         struct irq_desc *desc = irq_to_desc(irq);
288
289         print_irq_desc(irq, desc);
290         ack_bad_irq(irq);
291 }
292
293 /*
294  * NOP functions
295  */
296 static void noop(unsigned int irq)
297 {
298 }
299
300 static unsigned int noop_ret(unsigned int irq)
301 {
302         return 0;
303 }
304
305 /*
306  * Generic no controller implementation
307  */
308 struct irq_chip no_irq_chip = {
309         .name           = "none",
310         .startup        = noop_ret,
311         .shutdown       = noop,
312         .enable         = noop,
313         .disable        = noop,
314         .ack            = ack_bad,
315         .end            = noop,
316 };
317
318 /*
319  * Generic dummy implementation which can be used for
320  * real dumb interrupt sources
321  */
322 struct irq_chip dummy_irq_chip = {
323         .name           = "dummy",
324         .startup        = noop_ret,
325         .shutdown       = noop,
326         .enable         = noop,
327         .disable        = noop,
328         .ack            = noop,
329         .mask           = noop,
330         .unmask         = noop,
331         .end            = noop,
332 };
333
334 /*
335  * Special, empty irq handler:
336  */
337 irqreturn_t no_action(int cpl, void *dev_id)
338 {
339         return IRQ_NONE;
340 }
341
342 static void warn_no_thread(unsigned int irq, struct irqaction *action)
343 {
344         if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
345                 return;
346
347         printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
348                "but no thread function available.", irq, action->name);
349 }
350
351 DEFINE_TRACE(irq_handler_entry);
352 DEFINE_TRACE(irq_handler_exit);
353
354 /**
355  * handle_IRQ_event - irq action chain handler
356  * @irq:        the interrupt number
357  * @action:     the interrupt action chain for this irq
358  *
359  * Handles the action chain of an irq event
360  */
361 irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
362 {
363         irqreturn_t ret, retval = IRQ_NONE;
364         unsigned int status = 0;
365
366         if (!(action->flags & IRQF_DISABLED))
367                 local_irq_enable_in_hardirq();
368
369         do {
370                 trace_irq_handler_entry(irq, action);
371                 ret = action->handler(irq, action->dev_id);
372                 trace_irq_handler_exit(irq, action, ret);
373
374                 switch (ret) {
375                 case IRQ_WAKE_THREAD:
376                         /*
377                          * Set result to handled so the spurious check
378                          * does not trigger.
379                          */
380                         ret = IRQ_HANDLED;
381
382                         /*
383                          * Catch drivers which return WAKE_THREAD but
384                          * did not set up a thread function
385                          */
386                         if (unlikely(!action->thread_fn)) {
387                                 warn_no_thread(irq, action);
388                                 break;
389                         }
390
391                         /*
392                          * Wake up the handler thread for this
393                          * action. In case the thread crashed and was
394                          * killed we just pretend that we handled the
395                          * interrupt. The hardirq handler above has
396                          * disabled the device interrupt, so no irq
397                          * storm is lurking.
398                          */
399                         if (likely(!test_bit(IRQTF_DIED,
400                                              &action->thread_flags))) {
401                                 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
402                                 wake_up_process(action->thread);
403                         }
404
405                         /* Fall through to add to randomness */
406                 case IRQ_HANDLED:
407                         status |= action->flags;
408                         break;
409
410                 default:
411                         break;
412                 }
413
414                 retval |= ret;
415                 action = action->next;
416         } while (action);
417
418         if (status & IRQF_SAMPLE_RANDOM)
419                 add_interrupt_randomness(irq);
420         local_irq_disable();
421
422         return retval;
423 }
424
425 #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
426
427 #ifdef CONFIG_ENABLE_WARN_DEPRECATED
428 # warning __do_IRQ is deprecated. Please convert to proper flow handlers
429 #endif
430
431 /**
432  * __do_IRQ - original all in one highlevel IRQ handler
433  * @irq:        the interrupt number
434  *
435  * __do_IRQ handles all normal device IRQ's (the special
436  * SMP cross-CPU interrupts have their own specific
437  * handlers).
438  *
439  * This is the original x86 implementation which is used for every
440  * interrupt type.
441  */
442 unsigned int __do_IRQ(unsigned int irq)
443 {
444         struct irq_desc *desc = irq_to_desc(irq);
445         struct irqaction *action;
446         unsigned int status;
447
448         kstat_incr_irqs_this_cpu(irq, desc);
449
450         if (CHECK_IRQ_PER_CPU(desc->status)) {
451                 irqreturn_t action_ret;
452
453                 /*
454                  * No locking required for CPU-local interrupts:
455                  */
456                 if (desc->chip->ack) {
457                         desc->chip->ack(irq);
458                         /* get new one */
459                         desc = irq_remap_to_desc(irq, desc);
460                 }
461                 if (likely(!(desc->status & IRQ_DISABLED))) {
462                         action_ret = handle_IRQ_event(irq, desc->action);
463                         if (!noirqdebug)
464                                 note_interrupt(irq, desc, action_ret);
465                 }
466                 desc->chip->end(irq);
467                 return 1;
468         }
469
470         spin_lock(&desc->lock);
471         if (desc->chip->ack) {
472                 desc->chip->ack(irq);
473                 desc = irq_remap_to_desc(irq, desc);
474         }
475         /*
476          * REPLAY is when Linux resends an IRQ that was dropped earlier
477          * WAITING is used by probe to mark irqs that are being tested
478          */
479         status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
480         status |= IRQ_PENDING; /* we _want_ to handle it */
481
482         /*
483          * If the IRQ is disabled for whatever reason, we cannot
484          * use the action we have.
485          */
486         action = NULL;
487         if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
488                 action = desc->action;
489                 status &= ~IRQ_PENDING; /* we commit to handling */
490                 status |= IRQ_INPROGRESS; /* we are handling it */
491         }
492         desc->status = status;
493
494         /*
495          * If there is no IRQ handler or it was disabled, exit early.
496          * Since we set PENDING, if another processor is handling
497          * a different instance of this same irq, the other processor
498          * will take care of it.
499          */
500         if (unlikely(!action))
501                 goto out;
502
503         /*
504          * Edge triggered interrupts need to remember
505          * pending events.
506          * This applies to any hw interrupts that allow a second
507          * instance of the same irq to arrive while we are in do_IRQ
508          * or in the handler. But the code here only handles the _second_
509          * instance of the irq, not the third or fourth. So it is mostly
510          * useful for irq hardware that does not mask cleanly in an
511          * SMP environment.
512          */
513         for (;;) {
514                 irqreturn_t action_ret;
515
516                 spin_unlock(&desc->lock);
517
518                 action_ret = handle_IRQ_event(irq, action);
519                 if (!noirqdebug)
520                         note_interrupt(irq, desc, action_ret);
521
522                 spin_lock(&desc->lock);
523                 if (likely(!(desc->status & IRQ_PENDING)))
524                         break;
525                 desc->status &= ~IRQ_PENDING;
526         }
527         desc->status &= ~IRQ_INPROGRESS;
528
529 out:
530         /*
531          * The ->end() handler has to deal with interrupts which got
532          * disabled while the handler was running.
533          */
534         desc->chip->end(irq);
535         spin_unlock(&desc->lock);
536
537         return 1;
538 }
539 #endif
540
541 void early_init_irq_lock_class(void)
542 {
543         struct irq_desc *desc;
544         int i;
545
546         for_each_irq_desc(i, desc) {
547                 lockdep_set_class(&desc->lock, &irq_desc_lock_class);
548         }
549 }
550
551 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
552 {
553         struct irq_desc *desc = irq_to_desc(irq);
554         return desc ? desc->kstat_irqs[cpu] : 0;
555 }
556 EXPORT_SYMBOL(kstat_irqs_cpu);
557