2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/smp.h>
25 #include <linux/tick.h>
29 - No shared variables, all the data are CPU local.
30 - If a softirq needs serialization, let it serialize itself
32 - Even if softirq is serialized, only local cpu is marked for
33 execution. Hence, we get something sort of weak cpu binding.
34 Though it is still not clear, will it result in better locality
38 - NET RX softirq. It is multithreaded and does not require
39 any global serialization.
40 - NET TX softirq. It kicks software netdevice queues, hence
41 it is logically serialized per device, but this serialization
42 is invisible to common code.
43 - Tasklets: serialized wrt itself.
46 #ifndef __ARCH_IRQ_STAT
47 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
48 EXPORT_SYMBOL(irq_stat);
51 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
53 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
56 * we cannot loop indefinitely here to avoid userspace starvation,
57 * but we also don't want to introduce a worst case 1/HZ latency
58 * to the pending events, so lets the scheduler to balance
59 * the softirq load for us.
61 static inline void wakeup_softirqd(void)
63 /* Interrupts are disabled: no need to stop preemption */
64 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
66 if (tsk && tsk->state != TASK_RUNNING)
71 * This one is for softirq.c-internal use,
72 * where hardirqs are disabled legitimately:
74 #ifdef CONFIG_TRACE_IRQFLAGS
75 static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq());
81 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET);
84 * Were softirqs turned off above:
86 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags);
90 #else /* !CONFIG_TRACE_IRQFLAGS */
91 static inline void __local_bh_disable(unsigned long ip)
93 add_preempt_count(SOFTIRQ_OFFSET);
96 #endif /* CONFIG_TRACE_IRQFLAGS */
98 void local_bh_disable(void)
100 __local_bh_disable((unsigned long)__builtin_return_address(0));
103 EXPORT_SYMBOL(local_bh_disable);
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
110 void _local_bh_enable(void)
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
120 EXPORT_SYMBOL(_local_bh_enable);
122 static inline void _local_bh_enable_ip(unsigned long ip)
124 WARN_ON_ONCE(in_irq() || irqs_disabled());
125 #ifdef CONFIG_TRACE_IRQFLAGS
129 * Are softirqs going to be turned on now:
131 if (softirq_count() == SOFTIRQ_OFFSET)
132 trace_softirqs_on(ip);
134 * Keep preemption disabled until we are done with
135 * softirq processing:
137 sub_preempt_count(SOFTIRQ_OFFSET - 1);
139 if (unlikely(!in_interrupt() && local_softirq_pending()))
143 #ifdef CONFIG_TRACE_IRQFLAGS
146 preempt_check_resched();
149 void local_bh_enable(void)
151 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
153 EXPORT_SYMBOL(local_bh_enable);
155 void local_bh_enable_ip(unsigned long ip)
157 _local_bh_enable_ip(ip);
159 EXPORT_SYMBOL(local_bh_enable_ip);
162 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
163 * and we fall back to softirqd after that.
165 * This number has been established via experimentation.
166 * The two things to balance is latency against fairness -
167 * we want to handle softirqs as soon as possible, but they
168 * should not be able to lock up the box.
170 #define MAX_SOFTIRQ_RESTART 10
172 asmlinkage void __do_softirq(void)
174 struct softirq_action *h;
176 int max_restart = MAX_SOFTIRQ_RESTART;
179 pending = local_softirq_pending();
180 account_system_vtime(current);
182 __local_bh_disable((unsigned long)__builtin_return_address(0));
183 trace_softirq_enter();
185 cpu = smp_processor_id();
187 /* Reset the pending bitmask before enabling irqs */
188 set_softirq_pending(0);
196 int prev_count = preempt_count();
200 if (unlikely(prev_count != preempt_count())) {
201 printk(KERN_ERR "huh, entered softirq %td %p"
202 "with preempt_count %08x,"
203 " exited with %08x?\n", h - softirq_vec,
204 h->action, prev_count, preempt_count());
205 preempt_count() = prev_count;
208 rcu_bh_qsctr_inc(cpu);
216 pending = local_softirq_pending();
217 if (pending && --max_restart)
223 trace_softirq_exit();
225 account_system_vtime(current);
229 #ifndef __ARCH_HAS_DO_SOFTIRQ
231 asmlinkage void do_softirq(void)
239 local_irq_save(flags);
241 pending = local_softirq_pending();
246 local_irq_restore(flags);
252 * Enter an interrupt context.
256 int cpu = smp_processor_id();
259 if (idle_cpu(cpu) && !in_interrupt()) {
261 tick_check_idle(cpu);
266 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
267 # define invoke_softirq() __do_softirq()
269 # define invoke_softirq() do_softirq()
273 * Exit an interrupt context. Process softirqs if needed and possible:
277 account_system_vtime(current);
278 trace_hardirq_exit();
279 sub_preempt_count(IRQ_EXIT_OFFSET);
280 if (!in_interrupt() && local_softirq_pending())
284 /* Make sure that timer wheel updates are propagated */
286 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
287 tick_nohz_stop_sched_tick(0);
289 preempt_enable_no_resched();
293 * This function must run with irqs disabled!
295 inline void raise_softirq_irqoff(unsigned int nr)
297 __raise_softirq_irqoff(nr);
300 * If we're in an interrupt or softirq, we're done
301 * (this also catches softirq-disabled code). We will
302 * actually run the softirq once we return from
303 * the irq or softirq.
305 * Otherwise we wake up ksoftirqd to make sure we
306 * schedule the softirq soon.
312 void raise_softirq(unsigned int nr)
316 local_irq_save(flags);
317 raise_softirq_irqoff(nr);
318 local_irq_restore(flags);
321 void open_softirq(int nr, void (*action)(struct softirq_action *))
323 softirq_vec[nr].action = action;
329 struct tasklet_struct *head;
330 struct tasklet_struct **tail;
333 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
334 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
336 void __tasklet_schedule(struct tasklet_struct *t)
340 local_irq_save(flags);
342 *__get_cpu_var(tasklet_vec).tail = t;
343 __get_cpu_var(tasklet_vec).tail = &(t->next);
344 raise_softirq_irqoff(TASKLET_SOFTIRQ);
345 local_irq_restore(flags);
348 EXPORT_SYMBOL(__tasklet_schedule);
350 void __tasklet_hi_schedule(struct tasklet_struct *t)
354 local_irq_save(flags);
356 *__get_cpu_var(tasklet_hi_vec).tail = t;
357 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
358 raise_softirq_irqoff(HI_SOFTIRQ);
359 local_irq_restore(flags);
362 EXPORT_SYMBOL(__tasklet_hi_schedule);
364 static void tasklet_action(struct softirq_action *a)
366 struct tasklet_struct *list;
369 list = __get_cpu_var(tasklet_vec).head;
370 __get_cpu_var(tasklet_vec).head = NULL;
371 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
375 struct tasklet_struct *t = list;
379 if (tasklet_trylock(t)) {
380 if (!atomic_read(&t->count)) {
381 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
392 *__get_cpu_var(tasklet_vec).tail = t;
393 __get_cpu_var(tasklet_vec).tail = &(t->next);
394 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
399 static void tasklet_hi_action(struct softirq_action *a)
401 struct tasklet_struct *list;
404 list = __get_cpu_var(tasklet_hi_vec).head;
405 __get_cpu_var(tasklet_hi_vec).head = NULL;
406 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
410 struct tasklet_struct *t = list;
414 if (tasklet_trylock(t)) {
415 if (!atomic_read(&t->count)) {
416 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
427 *__get_cpu_var(tasklet_hi_vec).tail = t;
428 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
429 __raise_softirq_irqoff(HI_SOFTIRQ);
435 void tasklet_init(struct tasklet_struct *t,
436 void (*func)(unsigned long), unsigned long data)
440 atomic_set(&t->count, 0);
445 EXPORT_SYMBOL(tasklet_init);
447 void tasklet_kill(struct tasklet_struct *t)
450 printk("Attempt to kill tasklet from interrupt\n");
452 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
455 while (test_bit(TASKLET_STATE_SCHED, &t->state));
457 tasklet_unlock_wait(t);
458 clear_bit(TASKLET_STATE_SCHED, &t->state);
461 EXPORT_SYMBOL(tasklet_kill);
463 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
464 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
466 static void __local_trigger(struct call_single_data *cp, int softirq)
468 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
470 list_add_tail(&cp->list, head);
472 /* Trigger the softirq only if the list was previously empty. */
473 if (head->next == &cp->list)
474 raise_softirq_irqoff(softirq);
477 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
478 static void remote_softirq_receive(void *data)
480 struct call_single_data *cp = data;
486 local_irq_save(flags);
487 __local_trigger(cp, softirq);
488 local_irq_restore(flags);
491 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
493 if (cpu_online(cpu)) {
494 cp->func = remote_softirq_receive;
499 __smp_call_function_single(cpu, cp);
504 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
505 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
512 * __send_remote_softirq - try to schedule softirq work on a remote cpu
513 * @cp: private SMP call function data area
514 * @cpu: the remote cpu
515 * @this_cpu: the currently executing cpu
516 * @softirq: the softirq for the work
518 * Attempt to schedule softirq work on a remote cpu. If this cannot be
519 * done, the work is instead queued up on the local cpu.
521 * Interrupts must be disabled.
523 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
525 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
526 __local_trigger(cp, softirq);
528 EXPORT_SYMBOL(__send_remote_softirq);
531 * send_remote_softirq - try to schedule softirq work on a remote cpu
532 * @cp: private SMP call function data area
533 * @cpu: the remote cpu
534 * @softirq: the softirq for the work
536 * Like __send_remote_softirq except that disabling interrupts and
537 * computing the current cpu is done for the caller.
539 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
544 local_irq_save(flags);
545 this_cpu = smp_processor_id();
546 __send_remote_softirq(cp, cpu, this_cpu, softirq);
547 local_irq_restore(flags);
549 EXPORT_SYMBOL(send_remote_softirq);
551 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
552 unsigned long action, void *hcpu)
555 * If a CPU goes away, splice its entries to the current CPU
556 * and trigger a run of the softirq
558 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
559 int cpu = (unsigned long) hcpu;
563 for (i = 0; i < NR_SOFTIRQS; i++) {
564 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
565 struct list_head *local_head;
567 if (list_empty(head))
570 local_head = &__get_cpu_var(softirq_work_list[i]);
571 list_splice_init(head, local_head);
572 raise_softirq_irqoff(i);
580 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
581 .notifier_call = remote_softirq_cpu_notify,
584 void __init softirq_init(void)
588 for_each_possible_cpu(cpu) {
591 per_cpu(tasklet_vec, cpu).tail =
592 &per_cpu(tasklet_vec, cpu).head;
593 per_cpu(tasklet_hi_vec, cpu).tail =
594 &per_cpu(tasklet_hi_vec, cpu).head;
595 for (i = 0; i < NR_SOFTIRQS; i++)
596 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
599 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
601 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
602 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
605 static int ksoftirqd(void * __bind_cpu)
607 set_current_state(TASK_INTERRUPTIBLE);
609 while (!kthread_should_stop()) {
611 if (!local_softirq_pending()) {
612 preempt_enable_no_resched();
617 __set_current_state(TASK_RUNNING);
619 while (local_softirq_pending()) {
620 /* Preempt disable stops cpu going offline.
621 If already offline, we'll be on wrong CPU:
623 if (cpu_is_offline((long)__bind_cpu))
626 preempt_enable_no_resched();
629 rcu_qsctr_inc((long)__bind_cpu);
632 set_current_state(TASK_INTERRUPTIBLE);
634 __set_current_state(TASK_RUNNING);
639 /* Wait for kthread_stop */
640 set_current_state(TASK_INTERRUPTIBLE);
641 while (!kthread_should_stop()) {
643 set_current_state(TASK_INTERRUPTIBLE);
645 __set_current_state(TASK_RUNNING);
649 #ifdef CONFIG_HOTPLUG_CPU
651 * tasklet_kill_immediate is called to remove a tasklet which can already be
652 * scheduled for execution on @cpu.
654 * Unlike tasklet_kill, this function removes the tasklet
655 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
657 * When this function is called, @cpu must be in the CPU_DEAD state.
659 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
661 struct tasklet_struct **i;
663 BUG_ON(cpu_online(cpu));
664 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
666 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
669 /* CPU is dead, so no lock needed. */
670 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
673 /* If this was the tail element, move the tail ptr */
675 per_cpu(tasklet_vec, cpu).tail = i;
682 static void takeover_tasklets(unsigned int cpu)
684 /* CPU is dead, so no lock needed. */
687 /* Find end, append list for that CPU. */
688 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
689 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
690 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
691 per_cpu(tasklet_vec, cpu).head = NULL;
692 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
694 raise_softirq_irqoff(TASKLET_SOFTIRQ);
696 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
697 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
698 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
699 per_cpu(tasklet_hi_vec, cpu).head = NULL;
700 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
702 raise_softirq_irqoff(HI_SOFTIRQ);
706 #endif /* CONFIG_HOTPLUG_CPU */
708 static int __cpuinit cpu_callback(struct notifier_block *nfb,
709 unsigned long action,
712 int hotcpu = (unsigned long)hcpu;
713 struct task_struct *p;
717 case CPU_UP_PREPARE_FROZEN:
718 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
720 printk("ksoftirqd for %i failed\n", hotcpu);
723 kthread_bind(p, hotcpu);
724 per_cpu(ksoftirqd, hotcpu) = p;
727 case CPU_ONLINE_FROZEN:
728 wake_up_process(per_cpu(ksoftirqd, hotcpu));
730 #ifdef CONFIG_HOTPLUG_CPU
731 case CPU_UP_CANCELED:
732 case CPU_UP_CANCELED_FROZEN:
733 if (!per_cpu(ksoftirqd, hotcpu))
735 /* Unbind so it can run. Fall thru. */
736 kthread_bind(per_cpu(ksoftirqd, hotcpu),
737 cpumask_any(cpu_online_mask));
739 case CPU_DEAD_FROZEN: {
740 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
742 p = per_cpu(ksoftirqd, hotcpu);
743 per_cpu(ksoftirqd, hotcpu) = NULL;
744 sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
746 takeover_tasklets(hotcpu);
749 #endif /* CONFIG_HOTPLUG_CPU */
754 static struct notifier_block __cpuinitdata cpu_nfb = {
755 .notifier_call = cpu_callback
758 static __init int spawn_ksoftirqd(void)
760 void *cpu = (void *)(long)smp_processor_id();
761 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
763 BUG_ON(err == NOTIFY_BAD);
764 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
765 register_cpu_notifier(&cpu_nfb);
768 early_initcall(spawn_ksoftirqd);
772 * Call a function on all processors
774 int on_each_cpu(void (*func) (void *info), void *info, int wait)
779 ret = smp_call_function(func, info, wait);
786 EXPORT_SYMBOL(on_each_cpu);
790 * [ These __weak aliases are kept in a separate compilation unit, so that
791 * GCC does not inline them incorrectly. ]
794 int __init __weak early_irq_init(void)
799 int __init __weak arch_probe_nr_irqs(void)
804 int __init __weak arch_early_irq_init(void)
809 int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)