[PATCH] per-task-delay-accounting: setup
[linux-2.6] / kernel / softirq.c
1 /*
2  *      linux/kernel/softirq.c
3  *
4  *      Copyright (C) 1992 Linus Torvalds
5  *
6  * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
7  */
8
9 #include <linux/module.h>
10 #include <linux/kernel_stat.h>
11 #include <linux/interrupt.h>
12 #include <linux/init.h>
13 #include <linux/mm.h>
14 #include <linux/notifier.h>
15 #include <linux/percpu.h>
16 #include <linux/cpu.h>
17 #include <linux/kthread.h>
18 #include <linux/rcupdate.h>
19 #include <linux/smp.h>
20
21 #include <asm/irq.h>
22 /*
23    - No shared variables, all the data are CPU local.
24    - If a softirq needs serialization, let it serialize itself
25      by its own spinlocks.
26    - Even if softirq is serialized, only local cpu is marked for
27      execution. Hence, we get something sort of weak cpu binding.
28      Though it is still not clear, will it result in better locality
29      or will not.
30
31    Examples:
32    - NET RX softirq. It is multithreaded and does not require
33      any global serialization.
34    - NET TX softirq. It kicks software netdevice queues, hence
35      it is logically serialized per device, but this serialization
36      is invisible to common code.
37    - Tasklets: serialized wrt itself.
38  */
39
40 #ifndef __ARCH_IRQ_STAT
41 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
42 EXPORT_SYMBOL(irq_stat);
43 #endif
44
45 static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
46
47 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
48
49 /*
50  * we cannot loop indefinitely here to avoid userspace starvation,
51  * but we also don't want to introduce a worst case 1/HZ latency
52  * to the pending events, so lets the scheduler to balance
53  * the softirq load for us.
54  */
55 static inline void wakeup_softirqd(void)
56 {
57         /* Interrupts are disabled: no need to stop preemption */
58         struct task_struct *tsk = __get_cpu_var(ksoftirqd);
59
60         if (tsk && tsk->state != TASK_RUNNING)
61                 wake_up_process(tsk);
62 }
63
64 /*
65  * This one is for softirq.c-internal use,
66  * where hardirqs are disabled legitimately:
67  */
68 static void __local_bh_disable(unsigned long ip)
69 {
70         unsigned long flags;
71
72         WARN_ON_ONCE(in_irq());
73
74         raw_local_irq_save(flags);
75         add_preempt_count(SOFTIRQ_OFFSET);
76         /*
77          * Were softirqs turned off above:
78          */
79         if (softirq_count() == SOFTIRQ_OFFSET)
80                 trace_softirqs_off(ip);
81         raw_local_irq_restore(flags);
82 }
83
84 void local_bh_disable(void)
85 {
86         __local_bh_disable((unsigned long)__builtin_return_address(0));
87 }
88
89 EXPORT_SYMBOL(local_bh_disable);
90
91 void __local_bh_enable(void)
92 {
93         WARN_ON_ONCE(in_irq());
94
95         /*
96          * softirqs should never be enabled by __local_bh_enable(),
97          * it always nests inside local_bh_enable() sections:
98          */
99         WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
100
101         sub_preempt_count(SOFTIRQ_OFFSET);
102 }
103 EXPORT_SYMBOL_GPL(__local_bh_enable);
104
105 /*
106  * Special-case - softirqs can safely be enabled in
107  * cond_resched_softirq(), or by __do_softirq(),
108  * without processing still-pending softirqs:
109  */
110 void _local_bh_enable(void)
111 {
112         WARN_ON_ONCE(in_irq());
113         WARN_ON_ONCE(!irqs_disabled());
114
115         if (softirq_count() == SOFTIRQ_OFFSET)
116                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117         sub_preempt_count(SOFTIRQ_OFFSET);
118 }
119
120 EXPORT_SYMBOL(_local_bh_enable);
121
122 void local_bh_enable(void)
123 {
124         unsigned long flags;
125
126         WARN_ON_ONCE(in_irq());
127         WARN_ON_ONCE(irqs_disabled());
128
129         local_irq_save(flags);
130         /*
131          * Are softirqs going to be turned on now:
132          */
133         if (softirq_count() == SOFTIRQ_OFFSET)
134                 trace_softirqs_on((unsigned long)__builtin_return_address(0));
135         /*
136          * Keep preemption disabled until we are done with
137          * softirq processing:
138          */
139         sub_preempt_count(SOFTIRQ_OFFSET - 1);
140
141         if (unlikely(!in_interrupt() && local_softirq_pending()))
142                 do_softirq();
143
144         dec_preempt_count();
145         local_irq_restore(flags);
146         preempt_check_resched();
147 }
148 EXPORT_SYMBOL(local_bh_enable);
149
150 void local_bh_enable_ip(unsigned long ip)
151 {
152         unsigned long flags;
153
154         WARN_ON_ONCE(in_irq());
155
156         local_irq_save(flags);
157         /*
158          * Are softirqs going to be turned on now:
159          */
160         if (softirq_count() == SOFTIRQ_OFFSET)
161                 trace_softirqs_on(ip);
162         /*
163          * Keep preemption disabled until we are done with
164          * softirq processing:
165          */
166         sub_preempt_count(SOFTIRQ_OFFSET - 1);
167
168         if (unlikely(!in_interrupt() && local_softirq_pending()))
169                 do_softirq();
170
171         dec_preempt_count();
172         local_irq_restore(flags);
173         preempt_check_resched();
174 }
175 EXPORT_SYMBOL(local_bh_enable_ip);
176
177 /*
178  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
179  * and we fall back to softirqd after that.
180  *
181  * This number has been established via experimentation.
182  * The two things to balance is latency against fairness -
183  * we want to handle softirqs as soon as possible, but they
184  * should not be able to lock up the box.
185  */
186 #define MAX_SOFTIRQ_RESTART 10
187
188 asmlinkage void __do_softirq(void)
189 {
190         struct softirq_action *h;
191         __u32 pending;
192         int max_restart = MAX_SOFTIRQ_RESTART;
193         int cpu;
194
195         pending = local_softirq_pending();
196         account_system_vtime(current);
197
198         __local_bh_disable((unsigned long)__builtin_return_address(0));
199         trace_softirq_enter();
200
201         cpu = smp_processor_id();
202 restart:
203         /* Reset the pending bitmask before enabling irqs */
204         set_softirq_pending(0);
205
206         local_irq_enable();
207
208         h = softirq_vec;
209
210         do {
211                 if (pending & 1) {
212                         h->action(h);
213                         rcu_bh_qsctr_inc(cpu);
214                 }
215                 h++;
216                 pending >>= 1;
217         } while (pending);
218
219         local_irq_disable();
220
221         pending = local_softirq_pending();
222         if (pending && --max_restart)
223                 goto restart;
224
225         if (pending)
226                 wakeup_softirqd();
227
228         trace_softirq_exit();
229
230         account_system_vtime(current);
231         _local_bh_enable();
232 }
233
234 #ifndef __ARCH_HAS_DO_SOFTIRQ
235
236 asmlinkage void do_softirq(void)
237 {
238         __u32 pending;
239         unsigned long flags;
240
241         if (in_interrupt())
242                 return;
243
244         local_irq_save(flags);
245
246         pending = local_softirq_pending();
247
248         if (pending)
249                 __do_softirq();
250
251         local_irq_restore(flags);
252 }
253
254 EXPORT_SYMBOL(do_softirq);
255
256 #endif
257
258 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
259 # define invoke_softirq()       __do_softirq()
260 #else
261 # define invoke_softirq()       do_softirq()
262 #endif
263
264 /*
265  * Exit an interrupt context. Process softirqs if needed and possible:
266  */
267 void irq_exit(void)
268 {
269         account_system_vtime(current);
270         trace_hardirq_exit();
271         sub_preempt_count(IRQ_EXIT_OFFSET);
272         if (!in_interrupt() && local_softirq_pending())
273                 invoke_softirq();
274         preempt_enable_no_resched();
275 }
276
277 /*
278  * This function must run with irqs disabled!
279  */
280 inline fastcall void raise_softirq_irqoff(unsigned int nr)
281 {
282         __raise_softirq_irqoff(nr);
283
284         /*
285          * If we're in an interrupt or softirq, we're done
286          * (this also catches softirq-disabled code). We will
287          * actually run the softirq once we return from
288          * the irq or softirq.
289          *
290          * Otherwise we wake up ksoftirqd to make sure we
291          * schedule the softirq soon.
292          */
293         if (!in_interrupt())
294                 wakeup_softirqd();
295 }
296
297 EXPORT_SYMBOL(raise_softirq_irqoff);
298
299 void fastcall raise_softirq(unsigned int nr)
300 {
301         unsigned long flags;
302
303         local_irq_save(flags);
304         raise_softirq_irqoff(nr);
305         local_irq_restore(flags);
306 }
307
308 void open_softirq(int nr, void (*action)(struct softirq_action*), void *data)
309 {
310         softirq_vec[nr].data = data;
311         softirq_vec[nr].action = action;
312 }
313
314 /* Tasklets */
315 struct tasklet_head
316 {
317         struct tasklet_struct *list;
318 };
319
320 /* Some compilers disobey section attribute on statics when not
321    initialized -- RR */
322 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec) = { NULL };
323 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec) = { NULL };
324
325 void fastcall __tasklet_schedule(struct tasklet_struct *t)
326 {
327         unsigned long flags;
328
329         local_irq_save(flags);
330         t->next = __get_cpu_var(tasklet_vec).list;
331         __get_cpu_var(tasklet_vec).list = t;
332         raise_softirq_irqoff(TASKLET_SOFTIRQ);
333         local_irq_restore(flags);
334 }
335
336 EXPORT_SYMBOL(__tasklet_schedule);
337
338 void fastcall __tasklet_hi_schedule(struct tasklet_struct *t)
339 {
340         unsigned long flags;
341
342         local_irq_save(flags);
343         t->next = __get_cpu_var(tasklet_hi_vec).list;
344         __get_cpu_var(tasklet_hi_vec).list = t;
345         raise_softirq_irqoff(HI_SOFTIRQ);
346         local_irq_restore(flags);
347 }
348
349 EXPORT_SYMBOL(__tasklet_hi_schedule);
350
351 static void tasklet_action(struct softirq_action *a)
352 {
353         struct tasklet_struct *list;
354
355         local_irq_disable();
356         list = __get_cpu_var(tasklet_vec).list;
357         __get_cpu_var(tasklet_vec).list = NULL;
358         local_irq_enable();
359
360         while (list) {
361                 struct tasklet_struct *t = list;
362
363                 list = list->next;
364
365                 if (tasklet_trylock(t)) {
366                         if (!atomic_read(&t->count)) {
367                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
368                                         BUG();
369                                 t->func(t->data);
370                                 tasklet_unlock(t);
371                                 continue;
372                         }
373                         tasklet_unlock(t);
374                 }
375
376                 local_irq_disable();
377                 t->next = __get_cpu_var(tasklet_vec).list;
378                 __get_cpu_var(tasklet_vec).list = t;
379                 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
380                 local_irq_enable();
381         }
382 }
383
384 static void tasklet_hi_action(struct softirq_action *a)
385 {
386         struct tasklet_struct *list;
387
388         local_irq_disable();
389         list = __get_cpu_var(tasklet_hi_vec).list;
390         __get_cpu_var(tasklet_hi_vec).list = NULL;
391         local_irq_enable();
392
393         while (list) {
394                 struct tasklet_struct *t = list;
395
396                 list = list->next;
397
398                 if (tasklet_trylock(t)) {
399                         if (!atomic_read(&t->count)) {
400                                 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
401                                         BUG();
402                                 t->func(t->data);
403                                 tasklet_unlock(t);
404                                 continue;
405                         }
406                         tasklet_unlock(t);
407                 }
408
409                 local_irq_disable();
410                 t->next = __get_cpu_var(tasklet_hi_vec).list;
411                 __get_cpu_var(tasklet_hi_vec).list = t;
412                 __raise_softirq_irqoff(HI_SOFTIRQ);
413                 local_irq_enable();
414         }
415 }
416
417
418 void tasklet_init(struct tasklet_struct *t,
419                   void (*func)(unsigned long), unsigned long data)
420 {
421         t->next = NULL;
422         t->state = 0;
423         atomic_set(&t->count, 0);
424         t->func = func;
425         t->data = data;
426 }
427
428 EXPORT_SYMBOL(tasklet_init);
429
430 void tasklet_kill(struct tasklet_struct *t)
431 {
432         if (in_interrupt())
433                 printk("Attempt to kill tasklet from interrupt\n");
434
435         while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
436                 do
437                         yield();
438                 while (test_bit(TASKLET_STATE_SCHED, &t->state));
439         }
440         tasklet_unlock_wait(t);
441         clear_bit(TASKLET_STATE_SCHED, &t->state);
442 }
443
444 EXPORT_SYMBOL(tasklet_kill);
445
446 void __init softirq_init(void)
447 {
448         open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL);
449         open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL);
450 }
451
452 static int ksoftirqd(void * __bind_cpu)
453 {
454         set_user_nice(current, 19);
455         current->flags |= PF_NOFREEZE;
456
457         set_current_state(TASK_INTERRUPTIBLE);
458
459         while (!kthread_should_stop()) {
460                 preempt_disable();
461                 if (!local_softirq_pending()) {
462                         preempt_enable_no_resched();
463                         schedule();
464                         preempt_disable();
465                 }
466
467                 __set_current_state(TASK_RUNNING);
468
469                 while (local_softirq_pending()) {
470                         /* Preempt disable stops cpu going offline.
471                            If already offline, we'll be on wrong CPU:
472                            don't process */
473                         if (cpu_is_offline((long)__bind_cpu))
474                                 goto wait_to_die;
475                         do_softirq();
476                         preempt_enable_no_resched();
477                         cond_resched();
478                         preempt_disable();
479                 }
480                 preempt_enable();
481                 set_current_state(TASK_INTERRUPTIBLE);
482         }
483         __set_current_state(TASK_RUNNING);
484         return 0;
485
486 wait_to_die:
487         preempt_enable();
488         /* Wait for kthread_stop */
489         set_current_state(TASK_INTERRUPTIBLE);
490         while (!kthread_should_stop()) {
491                 schedule();
492                 set_current_state(TASK_INTERRUPTIBLE);
493         }
494         __set_current_state(TASK_RUNNING);
495         return 0;
496 }
497
498 #ifdef CONFIG_HOTPLUG_CPU
499 /*
500  * tasklet_kill_immediate is called to remove a tasklet which can already be
501  * scheduled for execution on @cpu.
502  *
503  * Unlike tasklet_kill, this function removes the tasklet
504  * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
505  *
506  * When this function is called, @cpu must be in the CPU_DEAD state.
507  */
508 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
509 {
510         struct tasklet_struct **i;
511
512         BUG_ON(cpu_online(cpu));
513         BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
514
515         if (!test_bit(TASKLET_STATE_SCHED, &t->state))
516                 return;
517
518         /* CPU is dead, so no lock needed. */
519         for (i = &per_cpu(tasklet_vec, cpu).list; *i; i = &(*i)->next) {
520                 if (*i == t) {
521                         *i = t->next;
522                         return;
523                 }
524         }
525         BUG();
526 }
527
528 static void takeover_tasklets(unsigned int cpu)
529 {
530         struct tasklet_struct **i;
531
532         /* CPU is dead, so no lock needed. */
533         local_irq_disable();
534
535         /* Find end, append list for that CPU. */
536         for (i = &__get_cpu_var(tasklet_vec).list; *i; i = &(*i)->next);
537         *i = per_cpu(tasklet_vec, cpu).list;
538         per_cpu(tasklet_vec, cpu).list = NULL;
539         raise_softirq_irqoff(TASKLET_SOFTIRQ);
540
541         for (i = &__get_cpu_var(tasklet_hi_vec).list; *i; i = &(*i)->next);
542         *i = per_cpu(tasklet_hi_vec, cpu).list;
543         per_cpu(tasklet_hi_vec, cpu).list = NULL;
544         raise_softirq_irqoff(HI_SOFTIRQ);
545
546         local_irq_enable();
547 }
548 #endif /* CONFIG_HOTPLUG_CPU */
549
550 static int __devinit cpu_callback(struct notifier_block *nfb,
551                                   unsigned long action,
552                                   void *hcpu)
553 {
554         int hotcpu = (unsigned long)hcpu;
555         struct task_struct *p;
556
557         switch (action) {
558         case CPU_UP_PREPARE:
559                 BUG_ON(per_cpu(tasklet_vec, hotcpu).list);
560                 BUG_ON(per_cpu(tasklet_hi_vec, hotcpu).list);
561                 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
562                 if (IS_ERR(p)) {
563                         printk("ksoftirqd for %i failed\n", hotcpu);
564                         return NOTIFY_BAD;
565                 }
566                 kthread_bind(p, hotcpu);
567                 per_cpu(ksoftirqd, hotcpu) = p;
568                 break;
569         case CPU_ONLINE:
570                 wake_up_process(per_cpu(ksoftirqd, hotcpu));
571                 break;
572 #ifdef CONFIG_HOTPLUG_CPU
573         case CPU_UP_CANCELED:
574                 if (!per_cpu(ksoftirqd, hotcpu))
575                         break;
576                 /* Unbind so it can run.  Fall thru. */
577                 kthread_bind(per_cpu(ksoftirqd, hotcpu),
578                              any_online_cpu(cpu_online_map));
579         case CPU_DEAD:
580                 p = per_cpu(ksoftirqd, hotcpu);
581                 per_cpu(ksoftirqd, hotcpu) = NULL;
582                 kthread_stop(p);
583                 takeover_tasklets(hotcpu);
584                 break;
585 #endif /* CONFIG_HOTPLUG_CPU */
586         }
587         return NOTIFY_OK;
588 }
589
590 static struct notifier_block __devinitdata cpu_nfb = {
591         .notifier_call = cpu_callback
592 };
593
594 __init int spawn_ksoftirqd(void)
595 {
596         void *cpu = (void *)(long)smp_processor_id();
597         cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
598         cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
599         register_cpu_notifier(&cpu_nfb);
600         return 0;
601 }
602
603 #ifdef CONFIG_SMP
604 /*
605  * Call a function on all processors
606  */
607 int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
608 {
609         int ret = 0;
610
611         preempt_disable();
612         ret = smp_call_function(func, info, retry, wait);
613         local_irq_disable();
614         func(info);
615         local_irq_enable();
616         preempt_enable();
617         return ret;
618 }
619 EXPORT_SYMBOL(on_each_cpu);
620 #endif