2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/ftrace.h>
25 #include <linux/smp.h>
26 #include <linux/tick.h>
28 #define CREATE_TRACE_POINTS
29 #include <trace/events/irq.h>
33 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself
36 - Even if softirq is serialized, only local cpu is marked for
37 execution. Hence, we get something sort of weak cpu binding.
38 Though it is still not clear, will it result in better locality
42 - NET RX softirq. It is multithreaded and does not require
43 any global serialization.
44 - NET TX softirq. It kicks software netdevice queues, hence
45 it is logically serialized per device, but this serialization
46 is invisible to common code.
47 - Tasklets: serialized wrt itself.
50 #ifndef __ARCH_IRQ_STAT
51 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52 EXPORT_SYMBOL(irq_stat);
55 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
57 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
59 char *softirq_to_name[NR_SOFTIRQS] = {
60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
61 "TASKLET", "SCHED", "HRTIMER", "RCU"
65 * we cannot loop indefinitely here to avoid userspace starvation,
66 * but we also don't want to introduce a worst case 1/HZ latency
67 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us.
70 void wakeup_softirqd(void)
72 /* Interrupts are disabled: no need to stop preemption */
73 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
75 if (tsk && tsk->state != TASK_RUNNING)
80 * This one is for softirq.c-internal use,
81 * where hardirqs are disabled legitimately:
83 #ifdef CONFIG_TRACE_IRQFLAGS
84 static void __local_bh_disable(unsigned long ip)
88 WARN_ON_ONCE(in_irq());
90 raw_local_irq_save(flags);
92 * The preempt tracer hooks into add_preempt_count and will break
93 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
94 * is set and before current->softirq_enabled is cleared.
95 * We must manually increment preempt_count here and manually
96 * call the trace_preempt_off later.
98 preempt_count() += SOFTIRQ_OFFSET;
100 * Were softirqs turned off above:
102 if (softirq_count() == SOFTIRQ_OFFSET)
103 trace_softirqs_off(ip);
104 raw_local_irq_restore(flags);
106 if (preempt_count() == SOFTIRQ_OFFSET)
107 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
109 #else /* !CONFIG_TRACE_IRQFLAGS */
110 static inline void __local_bh_disable(unsigned long ip)
112 add_preempt_count(SOFTIRQ_OFFSET);
115 #endif /* CONFIG_TRACE_IRQFLAGS */
117 void local_bh_disable(void)
119 __local_bh_disable((unsigned long)__builtin_return_address(0));
122 EXPORT_SYMBOL(local_bh_disable);
125 * Special-case - softirqs can safely be enabled in
126 * cond_resched_softirq(), or by __do_softirq(),
127 * without processing still-pending softirqs:
129 void _local_bh_enable(void)
131 WARN_ON_ONCE(in_irq());
132 WARN_ON_ONCE(!irqs_disabled());
134 if (softirq_count() == SOFTIRQ_OFFSET)
135 trace_softirqs_on((unsigned long)__builtin_return_address(0));
136 sub_preempt_count(SOFTIRQ_OFFSET);
139 EXPORT_SYMBOL(_local_bh_enable);
141 static inline void _local_bh_enable_ip(unsigned long ip)
143 WARN_ON_ONCE(in_irq() || irqs_disabled());
144 #ifdef CONFIG_TRACE_IRQFLAGS
148 * Are softirqs going to be turned on now:
150 if (softirq_count() == SOFTIRQ_OFFSET)
151 trace_softirqs_on(ip);
153 * Keep preemption disabled until we are done with
154 * softirq processing:
156 sub_preempt_count(SOFTIRQ_OFFSET - 1);
158 if (unlikely(!in_interrupt() && local_softirq_pending()))
162 #ifdef CONFIG_TRACE_IRQFLAGS
165 preempt_check_resched();
168 void local_bh_enable(void)
170 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
172 EXPORT_SYMBOL(local_bh_enable);
174 void local_bh_enable_ip(unsigned long ip)
176 _local_bh_enable_ip(ip);
178 EXPORT_SYMBOL(local_bh_enable_ip);
181 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
182 * and we fall back to softirqd after that.
184 * This number has been established via experimentation.
185 * The two things to balance is latency against fairness -
186 * we want to handle softirqs as soon as possible, but they
187 * should not be able to lock up the box.
189 #define MAX_SOFTIRQ_RESTART 10
191 asmlinkage void __do_softirq(void)
193 struct softirq_action *h;
195 int max_restart = MAX_SOFTIRQ_RESTART;
198 pending = local_softirq_pending();
199 account_system_vtime(current);
201 __local_bh_disable((unsigned long)__builtin_return_address(0));
202 lockdep_softirq_enter();
204 cpu = smp_processor_id();
206 /* Reset the pending bitmask before enabling irqs */
207 set_softirq_pending(0);
215 int prev_count = preempt_count();
217 trace_softirq_entry(h, softirq_vec);
219 trace_softirq_exit(h, softirq_vec);
220 if (unlikely(prev_count != preempt_count())) {
221 printk(KERN_ERR "huh, entered softirq %td %s %p"
222 "with preempt_count %08x,"
223 " exited with %08x?\n", h - softirq_vec,
224 softirq_to_name[h - softirq_vec],
225 h->action, prev_count, preempt_count());
226 preempt_count() = prev_count;
229 rcu_bh_qsctr_inc(cpu);
237 pending = local_softirq_pending();
238 if (pending && --max_restart)
244 lockdep_softirq_exit();
246 account_system_vtime(current);
250 #ifndef __ARCH_HAS_DO_SOFTIRQ
252 asmlinkage void do_softirq(void)
260 local_irq_save(flags);
262 pending = local_softirq_pending();
267 local_irq_restore(flags);
273 * Enter an interrupt context.
277 int cpu = smp_processor_id();
280 if (idle_cpu(cpu) && !in_interrupt()) {
282 tick_check_idle(cpu);
287 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
288 # define invoke_softirq() __do_softirq()
290 # define invoke_softirq() do_softirq()
294 * Exit an interrupt context. Process softirqs if needed and possible:
298 account_system_vtime(current);
299 trace_hardirq_exit();
300 sub_preempt_count(IRQ_EXIT_OFFSET);
301 if (!in_interrupt() && local_softirq_pending())
305 /* Make sure that timer wheel updates are propagated */
307 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
308 tick_nohz_stop_sched_tick(0);
310 preempt_enable_no_resched();
314 * This function must run with irqs disabled!
316 inline void raise_softirq_irqoff(unsigned int nr)
318 __raise_softirq_irqoff(nr);
321 * If we're in an interrupt or softirq, we're done
322 * (this also catches softirq-disabled code). We will
323 * actually run the softirq once we return from
324 * the irq or softirq.
326 * Otherwise we wake up ksoftirqd to make sure we
327 * schedule the softirq soon.
333 void raise_softirq(unsigned int nr)
337 local_irq_save(flags);
338 raise_softirq_irqoff(nr);
339 local_irq_restore(flags);
342 void open_softirq(int nr, void (*action)(struct softirq_action *))
344 softirq_vec[nr].action = action;
350 struct tasklet_struct *head;
351 struct tasklet_struct **tail;
354 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
355 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
357 void __tasklet_schedule(struct tasklet_struct *t)
361 local_irq_save(flags);
363 *__get_cpu_var(tasklet_vec).tail = t;
364 __get_cpu_var(tasklet_vec).tail = &(t->next);
365 raise_softirq_irqoff(TASKLET_SOFTIRQ);
366 local_irq_restore(flags);
369 EXPORT_SYMBOL(__tasklet_schedule);
371 void __tasklet_hi_schedule(struct tasklet_struct *t)
375 local_irq_save(flags);
377 *__get_cpu_var(tasklet_hi_vec).tail = t;
378 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
379 raise_softirq_irqoff(HI_SOFTIRQ);
380 local_irq_restore(flags);
383 EXPORT_SYMBOL(__tasklet_hi_schedule);
385 void __tasklet_hi_schedule_first(struct tasklet_struct *t)
387 BUG_ON(!irqs_disabled());
389 t->next = __get_cpu_var(tasklet_hi_vec).head;
390 __get_cpu_var(tasklet_hi_vec).head = t;
391 __raise_softirq_irqoff(HI_SOFTIRQ);
394 EXPORT_SYMBOL(__tasklet_hi_schedule_first);
396 static void tasklet_action(struct softirq_action *a)
398 struct tasklet_struct *list;
401 list = __get_cpu_var(tasklet_vec).head;
402 __get_cpu_var(tasklet_vec).head = NULL;
403 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
407 struct tasklet_struct *t = list;
411 if (tasklet_trylock(t)) {
412 if (!atomic_read(&t->count)) {
413 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
424 *__get_cpu_var(tasklet_vec).tail = t;
425 __get_cpu_var(tasklet_vec).tail = &(t->next);
426 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
431 static void tasklet_hi_action(struct softirq_action *a)
433 struct tasklet_struct *list;
436 list = __get_cpu_var(tasklet_hi_vec).head;
437 __get_cpu_var(tasklet_hi_vec).head = NULL;
438 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
442 struct tasklet_struct *t = list;
446 if (tasklet_trylock(t)) {
447 if (!atomic_read(&t->count)) {
448 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
459 *__get_cpu_var(tasklet_hi_vec).tail = t;
460 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
461 __raise_softirq_irqoff(HI_SOFTIRQ);
467 void tasklet_init(struct tasklet_struct *t,
468 void (*func)(unsigned long), unsigned long data)
472 atomic_set(&t->count, 0);
477 EXPORT_SYMBOL(tasklet_init);
479 void tasklet_kill(struct tasklet_struct *t)
482 printk("Attempt to kill tasklet from interrupt\n");
484 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
487 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
489 tasklet_unlock_wait(t);
490 clear_bit(TASKLET_STATE_SCHED, &t->state);
493 EXPORT_SYMBOL(tasklet_kill);
495 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
496 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
498 static void __local_trigger(struct call_single_data *cp, int softirq)
500 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
502 list_add_tail(&cp->list, head);
504 /* Trigger the softirq only if the list was previously empty. */
505 if (head->next == &cp->list)
506 raise_softirq_irqoff(softirq);
509 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
510 static void remote_softirq_receive(void *data)
512 struct call_single_data *cp = data;
518 local_irq_save(flags);
519 __local_trigger(cp, softirq);
520 local_irq_restore(flags);
523 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
525 if (cpu_online(cpu)) {
526 cp->func = remote_softirq_receive;
531 __smp_call_function_single(cpu, cp, 0);
536 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
537 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
544 * __send_remote_softirq - try to schedule softirq work on a remote cpu
545 * @cp: private SMP call function data area
546 * @cpu: the remote cpu
547 * @this_cpu: the currently executing cpu
548 * @softirq: the softirq for the work
550 * Attempt to schedule softirq work on a remote cpu. If this cannot be
551 * done, the work is instead queued up on the local cpu.
553 * Interrupts must be disabled.
555 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
557 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
558 __local_trigger(cp, softirq);
560 EXPORT_SYMBOL(__send_remote_softirq);
563 * send_remote_softirq - try to schedule softirq work on a remote cpu
564 * @cp: private SMP call function data area
565 * @cpu: the remote cpu
566 * @softirq: the softirq for the work
568 * Like __send_remote_softirq except that disabling interrupts and
569 * computing the current cpu is done for the caller.
571 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
576 local_irq_save(flags);
577 this_cpu = smp_processor_id();
578 __send_remote_softirq(cp, cpu, this_cpu, softirq);
579 local_irq_restore(flags);
581 EXPORT_SYMBOL(send_remote_softirq);
583 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
584 unsigned long action, void *hcpu)
587 * If a CPU goes away, splice its entries to the current CPU
588 * and trigger a run of the softirq
590 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
591 int cpu = (unsigned long) hcpu;
595 for (i = 0; i < NR_SOFTIRQS; i++) {
596 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
597 struct list_head *local_head;
599 if (list_empty(head))
602 local_head = &__get_cpu_var(softirq_work_list[i]);
603 list_splice_init(head, local_head);
604 raise_softirq_irqoff(i);
612 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
613 .notifier_call = remote_softirq_cpu_notify,
616 void __init softirq_init(void)
620 for_each_possible_cpu(cpu) {
623 per_cpu(tasklet_vec, cpu).tail =
624 &per_cpu(tasklet_vec, cpu).head;
625 per_cpu(tasklet_hi_vec, cpu).tail =
626 &per_cpu(tasklet_hi_vec, cpu).head;
627 for (i = 0; i < NR_SOFTIRQS; i++)
628 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
631 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
633 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
634 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
637 static int ksoftirqd(void * __bind_cpu)
639 set_current_state(TASK_INTERRUPTIBLE);
641 while (!kthread_should_stop()) {
643 if (!local_softirq_pending()) {
644 preempt_enable_no_resched();
649 __set_current_state(TASK_RUNNING);
651 while (local_softirq_pending()) {
652 /* Preempt disable stops cpu going offline.
653 If already offline, we'll be on wrong CPU:
655 if (cpu_is_offline((long)__bind_cpu))
658 preempt_enable_no_resched();
661 rcu_qsctr_inc((long)__bind_cpu);
664 set_current_state(TASK_INTERRUPTIBLE);
666 __set_current_state(TASK_RUNNING);
671 /* Wait for kthread_stop */
672 set_current_state(TASK_INTERRUPTIBLE);
673 while (!kthread_should_stop()) {
675 set_current_state(TASK_INTERRUPTIBLE);
677 __set_current_state(TASK_RUNNING);
681 #ifdef CONFIG_HOTPLUG_CPU
683 * tasklet_kill_immediate is called to remove a tasklet which can already be
684 * scheduled for execution on @cpu.
686 * Unlike tasklet_kill, this function removes the tasklet
687 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
689 * When this function is called, @cpu must be in the CPU_DEAD state.
691 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
693 struct tasklet_struct **i;
695 BUG_ON(cpu_online(cpu));
696 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
698 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
701 /* CPU is dead, so no lock needed. */
702 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
705 /* If this was the tail element, move the tail ptr */
707 per_cpu(tasklet_vec, cpu).tail = i;
714 static void takeover_tasklets(unsigned int cpu)
716 /* CPU is dead, so no lock needed. */
719 /* Find end, append list for that CPU. */
720 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
721 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
722 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
723 per_cpu(tasklet_vec, cpu).head = NULL;
724 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
726 raise_softirq_irqoff(TASKLET_SOFTIRQ);
728 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
729 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
730 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
731 per_cpu(tasklet_hi_vec, cpu).head = NULL;
732 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
734 raise_softirq_irqoff(HI_SOFTIRQ);
738 #endif /* CONFIG_HOTPLUG_CPU */
740 static int __cpuinit cpu_callback(struct notifier_block *nfb,
741 unsigned long action,
744 int hotcpu = (unsigned long)hcpu;
745 struct task_struct *p;
749 case CPU_UP_PREPARE_FROZEN:
750 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
752 printk("ksoftirqd for %i failed\n", hotcpu);
755 kthread_bind(p, hotcpu);
756 per_cpu(ksoftirqd, hotcpu) = p;
759 case CPU_ONLINE_FROZEN:
760 wake_up_process(per_cpu(ksoftirqd, hotcpu));
762 #ifdef CONFIG_HOTPLUG_CPU
763 case CPU_UP_CANCELED:
764 case CPU_UP_CANCELED_FROZEN:
765 if (!per_cpu(ksoftirqd, hotcpu))
767 /* Unbind so it can run. Fall thru. */
768 kthread_bind(per_cpu(ksoftirqd, hotcpu),
769 cpumask_any(cpu_online_mask));
771 case CPU_DEAD_FROZEN: {
772 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
774 p = per_cpu(ksoftirqd, hotcpu);
775 per_cpu(ksoftirqd, hotcpu) = NULL;
776 sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
778 takeover_tasklets(hotcpu);
781 #endif /* CONFIG_HOTPLUG_CPU */
786 static struct notifier_block __cpuinitdata cpu_nfb = {
787 .notifier_call = cpu_callback
790 static __init int spawn_ksoftirqd(void)
792 void *cpu = (void *)(long)smp_processor_id();
793 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
795 BUG_ON(err == NOTIFY_BAD);
796 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
797 register_cpu_notifier(&cpu_nfb);
800 early_initcall(spawn_ksoftirqd);
804 * Call a function on all processors
806 int on_each_cpu(void (*func) (void *info), void *info, int wait)
811 ret = smp_call_function(func, info, wait);
818 EXPORT_SYMBOL(on_each_cpu);
822 * [ These __weak aliases are kept in a separate compilation unit, so that
823 * GCC does not inline them incorrectly. ]
826 int __init __weak early_irq_init(void)
831 int __init __weak arch_probe_nr_irqs(void)
836 int __init __weak arch_early_irq_init(void)
841 int __weak arch_init_chip_data(struct irq_desc *desc, int node)