2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
27 #include <trace/events/irq.h>
31 - No shared variables, all the data are CPU local.
32 - If a softirq needs serialization, let it serialize itself
34 - Even if softirq is serialized, only local cpu is marked for
35 execution. Hence, we get something sort of weak cpu binding.
36 Though it is still not clear, will it result in better locality
40 - NET RX softirq. It is multithreaded and does not require
41 any global serialization.
42 - NET TX softirq. It kicks software netdevice queues, hence
43 it is logically serialized per device, but this serialization
44 is invisible to common code.
45 - Tasklets: serialized wrt itself.
48 #ifndef __ARCH_IRQ_STAT
49 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
50 EXPORT_SYMBOL(irq_stat);
53 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
55 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
57 char *softirq_to_name[NR_SOFTIRQS] = {
58 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
59 "TASKLET", "SCHED", "HRTIMER", "RCU"
63 * we cannot loop indefinitely here to avoid userspace starvation,
64 * but we also don't want to introduce a worst case 1/HZ latency
65 * to the pending events, so lets the scheduler to balance
66 * the softirq load for us.
68 void wakeup_softirqd(void)
70 /* Interrupts are disabled: no need to stop preemption */
71 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
73 if (tsk && tsk->state != TASK_RUNNING)
78 * This one is for softirq.c-internal use,
79 * where hardirqs are disabled legitimately:
81 #ifdef CONFIG_TRACE_IRQFLAGS
82 static void __local_bh_disable(unsigned long ip)
86 WARN_ON_ONCE(in_irq());
88 raw_local_irq_save(flags);
90 * The preempt tracer hooks into add_preempt_count and will break
91 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
92 * is set and before current->softirq_enabled is cleared.
93 * We must manually increment preempt_count here and manually
94 * call the trace_preempt_off later.
96 preempt_count() += SOFTIRQ_OFFSET;
98 * Were softirqs turned off above:
100 if (softirq_count() == SOFTIRQ_OFFSET)
101 trace_softirqs_off(ip);
102 raw_local_irq_restore(flags);
104 if (preempt_count() == SOFTIRQ_OFFSET)
105 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
107 #else /* !CONFIG_TRACE_IRQFLAGS */
108 static inline void __local_bh_disable(unsigned long ip)
110 add_preempt_count(SOFTIRQ_OFFSET);
113 #endif /* CONFIG_TRACE_IRQFLAGS */
115 void local_bh_disable(void)
117 __local_bh_disable((unsigned long)__builtin_return_address(0));
120 EXPORT_SYMBOL(local_bh_disable);
123 * Special-case - softirqs can safely be enabled in
124 * cond_resched_softirq(), or by __do_softirq(),
125 * without processing still-pending softirqs:
127 void _local_bh_enable(void)
129 WARN_ON_ONCE(in_irq());
130 WARN_ON_ONCE(!irqs_disabled());
132 if (softirq_count() == SOFTIRQ_OFFSET)
133 trace_softirqs_on((unsigned long)__builtin_return_address(0));
134 sub_preempt_count(SOFTIRQ_OFFSET);
137 EXPORT_SYMBOL(_local_bh_enable);
139 static inline void _local_bh_enable_ip(unsigned long ip)
141 WARN_ON_ONCE(in_irq() || irqs_disabled());
142 #ifdef CONFIG_TRACE_IRQFLAGS
146 * Are softirqs going to be turned on now:
148 if (softirq_count() == SOFTIRQ_OFFSET)
149 trace_softirqs_on(ip);
151 * Keep preemption disabled until we are done with
152 * softirq processing:
154 sub_preempt_count(SOFTIRQ_OFFSET - 1);
156 if (unlikely(!in_interrupt() && local_softirq_pending()))
160 #ifdef CONFIG_TRACE_IRQFLAGS
163 preempt_check_resched();
166 void local_bh_enable(void)
168 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
170 EXPORT_SYMBOL(local_bh_enable);
172 void local_bh_enable_ip(unsigned long ip)
174 _local_bh_enable_ip(ip);
176 EXPORT_SYMBOL(local_bh_enable_ip);
179 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
180 * and we fall back to softirqd after that.
182 * This number has been established via experimentation.
183 * The two things to balance is latency against fairness -
184 * we want to handle softirqs as soon as possible, but they
185 * should not be able to lock up the box.
187 #define MAX_SOFTIRQ_RESTART 10
189 asmlinkage void __do_softirq(void)
191 struct softirq_action *h;
193 int max_restart = MAX_SOFTIRQ_RESTART;
196 pending = local_softirq_pending();
197 account_system_vtime(current);
199 __local_bh_disable((unsigned long)__builtin_return_address(0));
200 lockdep_softirq_enter();
202 cpu = smp_processor_id();
204 /* Reset the pending bitmask before enabling irqs */
205 set_softirq_pending(0);
213 int prev_count = preempt_count();
215 trace_softirq_entry(h, softirq_vec);
217 trace_softirq_exit(h, softirq_vec);
218 if (unlikely(prev_count != preempt_count())) {
219 printk(KERN_ERR "huh, entered softirq %td %s %p"
220 "with preempt_count %08x,"
221 " exited with %08x?\n", h - softirq_vec,
222 softirq_to_name[h - softirq_vec],
223 h->action, prev_count, preempt_count());
224 preempt_count() = prev_count;
227 rcu_bh_qsctr_inc(cpu);
235 pending = local_softirq_pending();
236 if (pending && --max_restart)
242 lockdep_softirq_exit();
244 account_system_vtime(current);
248 #ifndef __ARCH_HAS_DO_SOFTIRQ
250 asmlinkage void do_softirq(void)
258 local_irq_save(flags);
260 pending = local_softirq_pending();
265 local_irq_restore(flags);
271 * Enter an interrupt context.
275 int cpu = smp_processor_id();
278 if (idle_cpu(cpu) && !in_interrupt()) {
280 tick_check_idle(cpu);
285 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
286 # define invoke_softirq() __do_softirq()
288 # define invoke_softirq() do_softirq()
292 * Exit an interrupt context. Process softirqs if needed and possible:
296 account_system_vtime(current);
297 trace_hardirq_exit();
298 sub_preempt_count(IRQ_EXIT_OFFSET);
299 if (!in_interrupt() && local_softirq_pending())
303 /* Make sure that timer wheel updates are propagated */
305 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
306 tick_nohz_stop_sched_tick(0);
308 preempt_enable_no_resched();
312 * This function must run with irqs disabled!
314 inline void raise_softirq_irqoff(unsigned int nr)
316 __raise_softirq_irqoff(nr);
319 * If we're in an interrupt or softirq, we're done
320 * (this also catches softirq-disabled code). We will
321 * actually run the softirq once we return from
322 * the irq or softirq.
324 * Otherwise we wake up ksoftirqd to make sure we
325 * schedule the softirq soon.
331 void raise_softirq(unsigned int nr)
335 local_irq_save(flags);
336 raise_softirq_irqoff(nr);
337 local_irq_restore(flags);
340 void open_softirq(int nr, void (*action)(struct softirq_action *))
342 softirq_vec[nr].action = action;
348 struct tasklet_struct *head;
349 struct tasklet_struct **tail;
352 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
353 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
355 void __tasklet_schedule(struct tasklet_struct *t)
359 local_irq_save(flags);
361 *__get_cpu_var(tasklet_vec).tail = t;
362 __get_cpu_var(tasklet_vec).tail = &(t->next);
363 raise_softirq_irqoff(TASKLET_SOFTIRQ);
364 local_irq_restore(flags);
367 EXPORT_SYMBOL(__tasklet_schedule);
369 void __tasklet_hi_schedule(struct tasklet_struct *t)
373 local_irq_save(flags);
375 *__get_cpu_var(tasklet_hi_vec).tail = t;
376 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
377 raise_softirq_irqoff(HI_SOFTIRQ);
378 local_irq_restore(flags);
381 EXPORT_SYMBOL(__tasklet_hi_schedule);
383 static void tasklet_action(struct softirq_action *a)
385 struct tasklet_struct *list;
388 list = __get_cpu_var(tasklet_vec).head;
389 __get_cpu_var(tasklet_vec).head = NULL;
390 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
394 struct tasklet_struct *t = list;
398 if (tasklet_trylock(t)) {
399 if (!atomic_read(&t->count)) {
400 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
411 *__get_cpu_var(tasklet_vec).tail = t;
412 __get_cpu_var(tasklet_vec).tail = &(t->next);
413 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
418 static void tasklet_hi_action(struct softirq_action *a)
420 struct tasklet_struct *list;
423 list = __get_cpu_var(tasklet_hi_vec).head;
424 __get_cpu_var(tasklet_hi_vec).head = NULL;
425 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
429 struct tasklet_struct *t = list;
433 if (tasklet_trylock(t)) {
434 if (!atomic_read(&t->count)) {
435 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
446 *__get_cpu_var(tasklet_hi_vec).tail = t;
447 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
448 __raise_softirq_irqoff(HI_SOFTIRQ);
454 void tasklet_init(struct tasklet_struct *t,
455 void (*func)(unsigned long), unsigned long data)
459 atomic_set(&t->count, 0);
464 EXPORT_SYMBOL(tasklet_init);
466 void tasklet_kill(struct tasklet_struct *t)
469 printk("Attempt to kill tasklet from interrupt\n");
471 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
474 while (test_bit(TASKLET_STATE_SCHED, &t->state));
476 tasklet_unlock_wait(t);
477 clear_bit(TASKLET_STATE_SCHED, &t->state);
480 EXPORT_SYMBOL(tasklet_kill);
482 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
483 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
485 static void __local_trigger(struct call_single_data *cp, int softirq)
487 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
489 list_add_tail(&cp->list, head);
491 /* Trigger the softirq only if the list was previously empty. */
492 if (head->next == &cp->list)
493 raise_softirq_irqoff(softirq);
496 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
497 static void remote_softirq_receive(void *data)
499 struct call_single_data *cp = data;
505 local_irq_save(flags);
506 __local_trigger(cp, softirq);
507 local_irq_restore(flags);
510 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
512 if (cpu_online(cpu)) {
513 cp->func = remote_softirq_receive;
518 __smp_call_function_single(cpu, cp, 0);
523 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
524 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
531 * __send_remote_softirq - try to schedule softirq work on a remote cpu
532 * @cp: private SMP call function data area
533 * @cpu: the remote cpu
534 * @this_cpu: the currently executing cpu
535 * @softirq: the softirq for the work
537 * Attempt to schedule softirq work on a remote cpu. If this cannot be
538 * done, the work is instead queued up on the local cpu.
540 * Interrupts must be disabled.
542 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
544 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
545 __local_trigger(cp, softirq);
547 EXPORT_SYMBOL(__send_remote_softirq);
550 * send_remote_softirq - try to schedule softirq work on a remote cpu
551 * @cp: private SMP call function data area
552 * @cpu: the remote cpu
553 * @softirq: the softirq for the work
555 * Like __send_remote_softirq except that disabling interrupts and
556 * computing the current cpu is done for the caller.
558 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
563 local_irq_save(flags);
564 this_cpu = smp_processor_id();
565 __send_remote_softirq(cp, cpu, this_cpu, softirq);
566 local_irq_restore(flags);
568 EXPORT_SYMBOL(send_remote_softirq);
570 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
571 unsigned long action, void *hcpu)
574 * If a CPU goes away, splice its entries to the current CPU
575 * and trigger a run of the softirq
577 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
578 int cpu = (unsigned long) hcpu;
582 for (i = 0; i < NR_SOFTIRQS; i++) {
583 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
584 struct list_head *local_head;
586 if (list_empty(head))
589 local_head = &__get_cpu_var(softirq_work_list[i]);
590 list_splice_init(head, local_head);
591 raise_softirq_irqoff(i);
599 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
600 .notifier_call = remote_softirq_cpu_notify,
603 void __init softirq_init(void)
607 for_each_possible_cpu(cpu) {
610 per_cpu(tasklet_vec, cpu).tail =
611 &per_cpu(tasklet_vec, cpu).head;
612 per_cpu(tasklet_hi_vec, cpu).tail =
613 &per_cpu(tasklet_hi_vec, cpu).head;
614 for (i = 0; i < NR_SOFTIRQS; i++)
615 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
618 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
620 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
621 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
624 static int ksoftirqd(void * __bind_cpu)
626 set_current_state(TASK_INTERRUPTIBLE);
628 while (!kthread_should_stop()) {
630 if (!local_softirq_pending()) {
631 preempt_enable_no_resched();
636 __set_current_state(TASK_RUNNING);
638 while (local_softirq_pending()) {
639 /* Preempt disable stops cpu going offline.
640 If already offline, we'll be on wrong CPU:
642 if (cpu_is_offline((long)__bind_cpu))
645 preempt_enable_no_resched();
648 rcu_qsctr_inc((long)__bind_cpu);
651 set_current_state(TASK_INTERRUPTIBLE);
653 __set_current_state(TASK_RUNNING);
658 /* Wait for kthread_stop */
659 set_current_state(TASK_INTERRUPTIBLE);
660 while (!kthread_should_stop()) {
662 set_current_state(TASK_INTERRUPTIBLE);
664 __set_current_state(TASK_RUNNING);
668 #ifdef CONFIG_HOTPLUG_CPU
670 * tasklet_kill_immediate is called to remove a tasklet which can already be
671 * scheduled for execution on @cpu.
673 * Unlike tasklet_kill, this function removes the tasklet
674 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
676 * When this function is called, @cpu must be in the CPU_DEAD state.
678 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
680 struct tasklet_struct **i;
682 BUG_ON(cpu_online(cpu));
683 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
685 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
688 /* CPU is dead, so no lock needed. */
689 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
692 /* If this was the tail element, move the tail ptr */
694 per_cpu(tasklet_vec, cpu).tail = i;
701 static void takeover_tasklets(unsigned int cpu)
703 /* CPU is dead, so no lock needed. */
706 /* Find end, append list for that CPU. */
707 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
708 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
709 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
710 per_cpu(tasklet_vec, cpu).head = NULL;
711 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
713 raise_softirq_irqoff(TASKLET_SOFTIRQ);
715 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
716 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
717 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
718 per_cpu(tasklet_hi_vec, cpu).head = NULL;
719 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
721 raise_softirq_irqoff(HI_SOFTIRQ);
725 #endif /* CONFIG_HOTPLUG_CPU */
727 static int __cpuinit cpu_callback(struct notifier_block *nfb,
728 unsigned long action,
731 int hotcpu = (unsigned long)hcpu;
732 struct task_struct *p;
736 case CPU_UP_PREPARE_FROZEN:
737 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
739 printk("ksoftirqd for %i failed\n", hotcpu);
742 kthread_bind(p, hotcpu);
743 per_cpu(ksoftirqd, hotcpu) = p;
746 case CPU_ONLINE_FROZEN:
747 wake_up_process(per_cpu(ksoftirqd, hotcpu));
749 #ifdef CONFIG_HOTPLUG_CPU
750 case CPU_UP_CANCELED:
751 case CPU_UP_CANCELED_FROZEN:
752 if (!per_cpu(ksoftirqd, hotcpu))
754 /* Unbind so it can run. Fall thru. */
755 kthread_bind(per_cpu(ksoftirqd, hotcpu),
756 cpumask_any(cpu_online_mask));
758 case CPU_DEAD_FROZEN: {
759 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
761 p = per_cpu(ksoftirqd, hotcpu);
762 per_cpu(ksoftirqd, hotcpu) = NULL;
763 sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
765 takeover_tasklets(hotcpu);
768 #endif /* CONFIG_HOTPLUG_CPU */
773 static struct notifier_block __cpuinitdata cpu_nfb = {
774 .notifier_call = cpu_callback
777 static __init int spawn_ksoftirqd(void)
779 void *cpu = (void *)(long)smp_processor_id();
780 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
782 BUG_ON(err == NOTIFY_BAD);
783 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
784 register_cpu_notifier(&cpu_nfb);
787 early_initcall(spawn_ksoftirqd);
791 * Call a function on all processors
793 int on_each_cpu(void (*func) (void *info), void *info, int wait)
798 ret = smp_call_function(func, info, wait);
805 EXPORT_SYMBOL(on_each_cpu);
809 * [ These __weak aliases are kept in a separate compilation unit, so that
810 * GCC does not inline them incorrectly. ]
813 int __init __weak early_irq_init(void)
818 int __init __weak arch_probe_nr_irqs(void)
823 int __init __weak arch_early_irq_init(void)
828 int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)