Merge git://git.infradead.org/mtd-2.6
[linux-2.6] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57         /* Interrupts are disabled: no need to stop preemption */
58         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60         if (tsk && tsk->state != TASK_RUNNING)
61                 wake_up_process(tsk);
62 }
63
64 /*
65  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
66  * and we fall back to softirqd after that.
67  *
68  * This number has been established via experimentation.
69  * The two things to balance is latency against fairness -
70  * we want to handle softirqs as soon as possible, but they
71  * should not be able to lock up the box.
72  */
73 #define MAX_SOFTIRQ_RESTART 10
74
75 asmlinkage void __do_softirq(void)
76 {
77         struct softirq_action *h;
78         __u32 pending;
79         int max_restart = MAX_SOFTIRQ_RESTART;
80         int cpu;
81
82         pending = local_softirq_pending();
83
84         local_bh_disable();
85         cpu = smp_processor_id();
86 restart:
87         /* Reset the pending bitmask before enabling irqs */
88         set_softirq_pending(0);
89
90         local_irq_enable();
91
92         h = softirq_vec;
93
94         do {
95                 if (pending & 1) {
96                         h->action(h);
97                         rcu_bh_qsctr_inc(cpu);
98                 }
99                 h++;
100                 pending >>= 1;
101         } while (pending);
102
103         local_irq_disable();
104
105         pending = local_softirq_pending();
106         if (pending && --max_restart)
107                 goto restart;
108
109         if (pending)
110                 wakeup_softirqd();
111
112         __local_bh_enable();
113 }
114
115 #ifndef __ARCH_HAS_DO_SOFTIRQ
116
117 asmlinkage void do_softirq(void)
118 {
119         __u32 pending;
120         unsigned long flags;
121
122         if (in_interrupt())
123                 return;
124
125         local_irq_save(flags);
126
127         pending = local_softirq_pending();
128
129         if (pending)
130                 __do_softirq();
131
132         local_irq_restore(flags);
133 }
134
135 EXPORT_SYMBOL(do_softirq);
136
137 #endif
138
139 void local_bh_enable(void)
140 {
141         WARN_ON(irqs_disabled());
142         /*
143          * Keep preemption disabled until we are done with
144          * softirq processing:
145          */
146         sub_preempt_count(SOFTIRQ_OFFSET - 1);
147
148         if (unlikely(!in_interrupt() && local_softirq_pending()))
149                 do_softirq();
150
151         dec_preempt_count();
152         preempt_check_resched();
153 }
154 EXPORT_SYMBOL(local_bh_enable);
155
156 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
157 # define invoke_softirq()       __do_softirq()
158 #else
159 # define invoke_softirq()       do_softirq()
160 #endif
161
162 /*
163  * Exit an interrupt context. Process softirqs if needed and possible:
164  */
165 void irq_exit(void)
166 {
167         account_system_vtime(current);
168         sub_preempt_count(IRQ_EXIT_OFFSET);
169         if (!in_interrupt() && local_softirq_pending())
170                 invoke_softirq();
171         preempt_enable_no_resched();
172 }
173
174 /*
175  * This function must run with irqs disabled!
176  */
177 inline fastcall void raise_softirq_irqoff(unsigned int nr)
178 {
179         __raise_softirq_irqoff(nr);
180
181         /*
182          * If we're in an interrupt or softirq, we're done
183          * (this also catches softirq-disabled code). We will
184          * actually run the softirq once we return from
185          * the irq or softirq.
186          *
187          * Otherwise we wake up ksoftirqd to make sure we
188          * schedule the softirq soon.
189          */
190         if (!in_interrupt())
191                 wakeup_softirqd();
192 }
193
194 EXPORT_SYMBOL(raise_softirq_irqoff);
195
196 void fastcall raise_softirq(unsigned int nr)
197 {
198         unsigned long flags;
199
200         local_irq_save(flags);
201         raise_softirq_irqoff(nr);
202         local_irq_restore(flags);
203 }
204
205 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
206 {
207         softirq_vec[nr].data = data;
208         softirq_vec[nr].action = action;
209 }
210
211 EXPORT_SYMBOL(open_softirq);
212
213 /* Tasklets */
214 struct tasklet_head
215 {
216         struct tasklet_struct *list;
217 };
218
219 /* Some compilers disobey section attribute on statics when not
220    initialized -- RR */
221 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
222 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
223
224 void fastcall __tasklet_schedule(struct tasklet_struct *t)
225 {
226         unsigned long flags;
227
228         local_irq_save(flags);
229         t->next = __get_cpu_var(tasklet_vec).list;
230         __get_cpu_var(tasklet_vec).list = t;
231         raise_softirq_irqoff(TASKLET_SOFTIRQ);
232         local_irq_restore(flags);
233 }
234
235 EXPORT_SYMBOL(__tasklet_schedule);
236
237 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
238 {
239         unsigned long flags;
240
241         local_irq_save(flags);
242         t->next = __get_cpu_var(tasklet_hi_vec).list;
243         __get_cpu_var(tasklet_hi_vec).list = t;
244         raise_softirq_irqoff(HI_SOFTIRQ);
245         local_irq_restore(flags);
246 }
247
248 EXPORT_SYMBOL(__tasklet_hi_schedule);
249
250 static void tasklet_action(struct softirq_action *a)
251 {
252         struct tasklet_struct *list;
253
254         local_irq_disable();
255         list = __get_cpu_var(tasklet_vec).list;
256         __get_cpu_var(tasklet_vec).list = NULL;
257         local_irq_enable();
258
259         while (list) {
260                 struct tasklet_struct *t = list;
261
262                 list = list->next;
263
264                 if (tasklet_trylock(t)) {
265                         if (!atomic_read(&t->count)) {
266                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
267                                         BUG();
268                                 t->func(t->data);
269                                 tasklet_unlock(t);
270                                 continue;
271                         }
272                         tasklet_unlock(t);
273                 }
274
275                 local_irq_disable();
276                 t->next = __get_cpu_var(tasklet_vec).list;
277                 __get_cpu_var(tasklet_vec).list = t;
278                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
279                 local_irq_enable();
280         }
281 }
282
283 static void tasklet_hi_action(struct softirq_action *a)
284 {
285         struct tasklet_struct *list;
286
287         local_irq_disable();
288         list = __get_cpu_var(tasklet_hi_vec).list;
289         __get_cpu_var(tasklet_hi_vec).list = NULL;
290         local_irq_enable();
291
292         while (list) {
293                 struct tasklet_struct *t = list;
294
295                 list = list->next;
296
297                 if (tasklet_trylock(t)) {
298                         if (!atomic_read(&t->count)) {
299                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
300                                         BUG();
301                                 t->func(t->data);
302                                 tasklet_unlock(t);
303                                 continue;
304                         }
305                         tasklet_unlock(t);
306                 }
307
308                 local_irq_disable();
309                 t->next = __get_cpu_var(tasklet_hi_vec).list;
310                 __get_cpu_var(tasklet_hi_vec).list = t;
311                 __raise_softirq_irqoff(HI_SOFTIRQ);
312                 local_irq_enable();
313         }
314 }
315
316
317 void tasklet_init(struct tasklet_struct *t,
318                   void (*func)(unsigned long), unsigned long data)
319 {
320         t->next = NULL;
321         t->state = 0;
322         atomic_set(&t->count, 0);
323         t->func = func;
324         t->data = data;
325 }
326
327 EXPORT_SYMBOL(tasklet_init);
328
329 void tasklet_kill(struct tasklet_struct *t)
330 {
331         if (in_interrupt())
332                 printk("Attempt to kill tasklet from interrupt\n");
333
334         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
335                 do
336                         yield();
337                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
338         }
339         tasklet_unlock_wait(t);
340         clear_bit(TASKLET_STATE_SCHED, &t->state);
341 }
342
343 EXPORT_SYMBOL(tasklet_kill);
344
345 void __init softirq_init(void)
346 {
347         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
348         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
349 }
350
351 static int ksoftirqd(void * __bind_cpu)
352 {
353         set_user_nice(current, 19);
354         current->flags |= PF_NOFREEZE;
355
356         set_current_state(TASK_INTERRUPTIBLE);
357
358         while (!kthread_should_stop()) {
359                 preempt_disable();
360                 if (!local_softirq_pending()) {
361                         preempt_enable_no_resched();
362                         schedule();
363                         preempt_disable();
364                 }
365
366                 __set_current_state(TASK_RUNNING);
367
368                 while (local_softirq_pending()) {
369                         /* Preempt disable stops cpu going offline.
370                            If already offline, we'll be on wrong CPU:
371                            don't process */
372                         if (cpu_is_offline((long)__bind_cpu))
373                                 goto wait_to_die;
374                         do_softirq();
375                         preempt_enable_no_resched();
376                         cond_resched();
377                         preempt_disable();
378                 }
379                 preempt_enable();
380                 set_current_state(TASK_INTERRUPTIBLE);
381         }
382         __set_current_state(TASK_RUNNING);
383         return 0;
384
385 wait_to_die:
386         preempt_enable();
387         /* Wait for kthread_stop */
388         set_current_state(TASK_INTERRUPTIBLE);
389         while (!kthread_should_stop()) {
390                 schedule();
391                 set_current_state(TASK_INTERRUPTIBLE);
392         }
393         __set_current_state(TASK_RUNNING);
394         return 0;
395 }
396
397 #ifdef CONFIG_HOTPLUG_CPU
398 /*
399  * tasklet_kill_immediate is called to remove a tasklet which can already be
400  * scheduled for execution on @cpu.
401  *
402  * Unlike tasklet_kill, this function removes the tasklet
403  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
404  *
405  * When this function is called, @cpu must be in the CPU_DEAD state.
406  */
407 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
408 {
409         struct tasklet_struct **i;
410
411         BUG_ON(cpu_online(cpu));
412         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
413
414         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
415                 return;
416
417         /* CPU is dead, so no lock needed. */
418         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
419                 if (*i == t) {
420                         *i = t->next;
421                         return;
422                 }
423         }
424         BUG();
425 }
426
427 static void takeover_tasklets(unsigned int cpu)
428 {
429         struct tasklet_struct **i;
430
431         /* CPU is dead, so no lock needed. */
432         local_irq_disable();
433
434         /* Find end, append list for that CPU. */
435         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
436         *i = per_cpu(tasklet_vec, cpu).list;
437         per_cpu(tasklet_vec, cpu).list = NULL;
438         raise_softirq_irqoff(TASKLET_SOFTIRQ);
439
440         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
441         *i = per_cpu(tasklet_hi_vec, cpu).list;
442         per_cpu(tasklet_hi_vec, cpu).list = NULL;
443         raise_softirq_irqoff(HI_SOFTIRQ);
444
445         local_irq_enable();
446 }
447 #endif /* CONFIG_HOTPLUG_CPU */
448
449 static int cpu_callback(struct notifier_block *nfb,
450                                   unsigned long action,
451                                   void *hcpu)
452 {
453         int hotcpu = (unsigned long)hcpu;
454         struct task_struct *p;
455
456         switch (action) {
457         case CPU_UP_PREPARE:
458                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
459                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
460                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
461                 if (IS_ERR(p)) {
462                         printk("ksoftirqd for %i failed\n", hotcpu);
463                         return NOTIFY_BAD;
464                 }
465                 kthread_bind(p, hotcpu);
466                 per_cpu(ksoftirqd, hotcpu) = p;
467                 break;
468         case CPU_ONLINE:
469                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
470                 break;
471 #ifdef CONFIG_HOTPLUG_CPU
472         case CPU_UP_CANCELED:
473                 /* Unbind so it can run.  Fall thru. */
474                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
475                              any_online_cpu(cpu_online_map));
476         case CPU_DEAD:
477                 p = per_cpu(ksoftirqd, hotcpu);
478                 per_cpu(ksoftirqd, hotcpu) = NULL;
479                 kthread_stop(p);
480                 takeover_tasklets(hotcpu);
481                 break;
482 #endif /* CONFIG_HOTPLUG_CPU */
483         }
484         return NOTIFY_OK;
485 }
486
487 static struct notifier_block cpu_nfb = {
488         .notifier_call = cpu_callback
489 };
490
491 __init int spawn_ksoftirqd(void)
492 {
493         void *cpu = (void *)(long)smp_processor_id();
494         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
495         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
496         register_cpu_notifier(&cpu_nfb);
497         return 0;
498 }
499
500 #ifdef CONFIG_SMP
501 /*
502  * Call a function on all processors
503  */
504 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
505 {
506         int ret = 0;
507
508         preempt_disable();
509         ret = smp_call_function(func, info, retry, wait);
510         local_irq_disable();
511         func(info);
512         local_irq_enable();
513         preempt_enable();
514         return ret;
515 }
516 EXPORT_SYMBOL(on_each_cpu);
517 #endif