Merge commit 'linux-pnfs/nfs41-for-2.6.31' into nfsv41-for-2.6.31
[linux-2.6] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/tick.h>
22
23 #include "tick-internal.h"
24
25 /*
26  * Broadcast support for broken x86 hardware, where the local apic
27  * timer stops in C3 state.
28  */
29
30 static struct tick_device tick_broadcast_device;
31 /* FIXME: Use cpumask_var_t. */
32 static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33 static DECLARE_BITMAP(tmpmask, NR_CPUS);
34 static DEFINE_SPINLOCK(tick_broadcast_lock);
35 static int tick_broadcast_force;
36
37 #ifdef CONFIG_TICK_ONESHOT
38 static void tick_broadcast_clear_oneshot(int cpu);
39 #else
40 static inline void tick_broadcast_clear_oneshot(int cpu) { }
41 #endif
42
43 /*
44  * Debugging: see timer_list.c
45  */
46 struct tick_device *tick_get_broadcast_device(void)
47 {
48         return &tick_broadcast_device;
49 }
50
51 struct cpumask *tick_get_broadcast_mask(void)
52 {
53         return to_cpumask(tick_broadcast_mask);
54 }
55
56 /*
57  * Start the device in periodic mode
58  */
59 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
60 {
61         if (bc)
62                 tick_setup_periodic(bc, 1);
63 }
64
65 /*
66  * Check, if the device can be utilized as broadcast device:
67  */
68 int tick_check_broadcast_device(struct clock_event_device *dev)
69 {
70         if ((tick_broadcast_device.evtdev &&
71              tick_broadcast_device.evtdev->rating >= dev->rating) ||
72              (dev->features & CLOCK_EVT_FEAT_C3STOP))
73                 return 0;
74
75         clockevents_exchange_device(NULL, dev);
76         tick_broadcast_device.evtdev = dev;
77         if (!cpumask_empty(tick_get_broadcast_mask()))
78                 tick_broadcast_start_periodic(dev);
79         return 1;
80 }
81
82 /*
83  * Check, if the device is the broadcast device
84  */
85 int tick_is_broadcast_device(struct clock_event_device *dev)
86 {
87         return (dev && tick_broadcast_device.evtdev == dev);
88 }
89
90 /*
91  * Check, if the device is disfunctional and a place holder, which
92  * needs to be handled by the broadcast device.
93  */
94 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
95 {
96         unsigned long flags;
97         int ret = 0;
98
99         spin_lock_irqsave(&tick_broadcast_lock, flags);
100
101         /*
102          * Devices might be registered with both periodic and oneshot
103          * mode disabled. This signals, that the device needs to be
104          * operated from the broadcast device and is a placeholder for
105          * the cpu local device.
106          */
107         if (!tick_device_is_functional(dev)) {
108                 dev->event_handler = tick_handle_periodic;
109                 cpumask_set_cpu(cpu, tick_get_broadcast_mask());
110                 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
111                 ret = 1;
112         } else {
113                 /*
114                  * When the new device is not affected by the stop
115                  * feature and the cpu is marked in the broadcast mask
116                  * then clear the broadcast bit.
117                  */
118                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
119                         int cpu = smp_processor_id();
120
121                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
122                         tick_broadcast_clear_oneshot(cpu);
123                 }
124         }
125         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
126         return ret;
127 }
128
129 /*
130  * Broadcast the event to the cpus, which are set in the mask (mangled).
131  */
132 static void tick_do_broadcast(struct cpumask *mask)
133 {
134         int cpu = smp_processor_id();
135         struct tick_device *td;
136
137         /*
138          * Check, if the current cpu is in the mask
139          */
140         if (cpumask_test_cpu(cpu, mask)) {
141                 cpumask_clear_cpu(cpu, mask);
142                 td = &per_cpu(tick_cpu_device, cpu);
143                 td->evtdev->event_handler(td->evtdev);
144         }
145
146         if (!cpumask_empty(mask)) {
147                 /*
148                  * It might be necessary to actually check whether the devices
149                  * have different broadcast functions. For now, just use the
150                  * one of the first device. This works as long as we have this
151                  * misfeature only on x86 (lapic)
152                  */
153                 td = &per_cpu(tick_cpu_device, cpumask_first(mask));
154                 td->evtdev->broadcast(mask);
155         }
156 }
157
158 /*
159  * Periodic broadcast:
160  * - invoke the broadcast handlers
161  */
162 static void tick_do_periodic_broadcast(void)
163 {
164         spin_lock(&tick_broadcast_lock);
165
166         cpumask_and(to_cpumask(tmpmask),
167                     cpu_online_mask, tick_get_broadcast_mask());
168         tick_do_broadcast(to_cpumask(tmpmask));
169
170         spin_unlock(&tick_broadcast_lock);
171 }
172
173 /*
174  * Event handler for periodic broadcast ticks
175  */
176 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
177 {
178         ktime_t next;
179
180         tick_do_periodic_broadcast();
181
182         /*
183          * The device is in periodic mode. No reprogramming necessary:
184          */
185         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
186                 return;
187
188         /*
189          * Setup the next period for devices, which do not have
190          * periodic mode. We read dev->next_event first and add to it
191          * when the event alrady expired. clockevents_program_event()
192          * sets dev->next_event only when the event is really
193          * programmed to the device.
194          */
195         for (next = dev->next_event; ;) {
196                 next = ktime_add(next, tick_period);
197
198                 if (!clockevents_program_event(dev, next, ktime_get()))
199                         return;
200                 tick_do_periodic_broadcast();
201         }
202 }
203
204 /*
205  * Powerstate information: The system enters/leaves a state, where
206  * affected devices might stop
207  */
208 static void tick_do_broadcast_on_off(void *why)
209 {
210         struct clock_event_device *bc, *dev;
211         struct tick_device *td;
212         unsigned long flags, *reason = why;
213         int cpu, bc_stopped;
214
215         spin_lock_irqsave(&tick_broadcast_lock, flags);
216
217         cpu = smp_processor_id();
218         td = &per_cpu(tick_cpu_device, cpu);
219         dev = td->evtdev;
220         bc = tick_broadcast_device.evtdev;
221
222         /*
223          * Is the device not affected by the powerstate ?
224          */
225         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
226                 goto out;
227
228         if (!tick_device_is_functional(dev))
229                 goto out;
230
231         bc_stopped = cpumask_empty(tick_get_broadcast_mask());
232
233         switch (*reason) {
234         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
235         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
236                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
237                         cpumask_set_cpu(cpu, tick_get_broadcast_mask());
238                         if (tick_broadcast_device.mode ==
239                             TICKDEV_MODE_PERIODIC)
240                                 clockevents_shutdown(dev);
241                 }
242                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
243                         tick_broadcast_force = 1;
244                 break;
245         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
246                 if (!tick_broadcast_force &&
247                     cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
248                         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
249                         if (tick_broadcast_device.mode ==
250                             TICKDEV_MODE_PERIODIC)
251                                 tick_setup_periodic(dev, 0);
252                 }
253                 break;
254         }
255
256         if (cpumask_empty(tick_get_broadcast_mask())) {
257                 if (!bc_stopped)
258                         clockevents_shutdown(bc);
259         } else if (bc_stopped) {
260                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
261                         tick_broadcast_start_periodic(bc);
262                 else
263                         tick_broadcast_setup_oneshot(bc);
264         }
265 out:
266         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
267 }
268
269 /*
270  * Powerstate information: The system enters/leaves a state, where
271  * affected devices might stop.
272  */
273 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
274 {
275         if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
276                 printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
277                        "offline CPU #%d\n", *oncpu);
278         else
279                 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
280                                          &reason, 1);
281 }
282
283 /*
284  * Set the periodic handler depending on broadcast on/off
285  */
286 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
287 {
288         if (!broadcast)
289                 dev->event_handler = tick_handle_periodic;
290         else
291                 dev->event_handler = tick_handle_periodic_broadcast;
292 }
293
294 /*
295  * Remove a CPU from broadcasting
296  */
297 void tick_shutdown_broadcast(unsigned int *cpup)
298 {
299         struct clock_event_device *bc;
300         unsigned long flags;
301         unsigned int cpu = *cpup;
302
303         spin_lock_irqsave(&tick_broadcast_lock, flags);
304
305         bc = tick_broadcast_device.evtdev;
306         cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
307
308         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
309                 if (bc && cpumask_empty(tick_get_broadcast_mask()))
310                         clockevents_shutdown(bc);
311         }
312
313         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
314 }
315
316 void tick_suspend_broadcast(void)
317 {
318         struct clock_event_device *bc;
319         unsigned long flags;
320
321         spin_lock_irqsave(&tick_broadcast_lock, flags);
322
323         bc = tick_broadcast_device.evtdev;
324         if (bc)
325                 clockevents_shutdown(bc);
326
327         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
328 }
329
330 int tick_resume_broadcast(void)
331 {
332         struct clock_event_device *bc;
333         unsigned long flags;
334         int broadcast = 0;
335
336         spin_lock_irqsave(&tick_broadcast_lock, flags);
337
338         bc = tick_broadcast_device.evtdev;
339
340         if (bc) {
341                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
342
343                 switch (tick_broadcast_device.mode) {
344                 case TICKDEV_MODE_PERIODIC:
345                         if (!cpumask_empty(tick_get_broadcast_mask()))
346                                 tick_broadcast_start_periodic(bc);
347                         broadcast = cpumask_test_cpu(smp_processor_id(),
348                                                      tick_get_broadcast_mask());
349                         break;
350                 case TICKDEV_MODE_ONESHOT:
351                         broadcast = tick_resume_broadcast_oneshot(bc);
352                         break;
353                 }
354         }
355         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
356
357         return broadcast;
358 }
359
360
361 #ifdef CONFIG_TICK_ONESHOT
362
363 /* FIXME: use cpumask_var_t. */
364 static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
365
366 /*
367  * Exposed for debugging: see timer_list.c
368  */
369 struct cpumask *tick_get_broadcast_oneshot_mask(void)
370 {
371         return to_cpumask(tick_broadcast_oneshot_mask);
372 }
373
374 static int tick_broadcast_set_event(ktime_t expires, int force)
375 {
376         struct clock_event_device *bc = tick_broadcast_device.evtdev;
377
378         return tick_dev_program_event(bc, expires, force);
379 }
380
381 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
382 {
383         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
384         return 0;
385 }
386
387 /*
388  * Called from irq_enter() when idle was interrupted to reenable the
389  * per cpu device.
390  */
391 void tick_check_oneshot_broadcast(int cpu)
392 {
393         if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
394                 struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
395
396                 clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
397         }
398 }
399
400 /*
401  * Handle oneshot mode broadcasting
402  */
403 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
404 {
405         struct tick_device *td;
406         ktime_t now, next_event;
407         int cpu;
408
409         spin_lock(&tick_broadcast_lock);
410 again:
411         dev->next_event.tv64 = KTIME_MAX;
412         next_event.tv64 = KTIME_MAX;
413         cpumask_clear(to_cpumask(tmpmask));
414         now = ktime_get();
415         /* Find all expired events */
416         for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
417                 td = &per_cpu(tick_cpu_device, cpu);
418                 if (td->evtdev->next_event.tv64 <= now.tv64)
419                         cpumask_set_cpu(cpu, to_cpumask(tmpmask));
420                 else if (td->evtdev->next_event.tv64 < next_event.tv64)
421                         next_event.tv64 = td->evtdev->next_event.tv64;
422         }
423
424         /*
425          * Wakeup the cpus which have an expired event.
426          */
427         tick_do_broadcast(to_cpumask(tmpmask));
428
429         /*
430          * Two reasons for reprogram:
431          *
432          * - The global event did not expire any CPU local
433          * events. This happens in dyntick mode, as the maximum PIT
434          * delta is quite small.
435          *
436          * - There are pending events on sleeping CPUs which were not
437          * in the event mask
438          */
439         if (next_event.tv64 != KTIME_MAX) {
440                 /*
441                  * Rearm the broadcast device. If event expired,
442                  * repeat the above
443                  */
444                 if (tick_broadcast_set_event(next_event, 0))
445                         goto again;
446         }
447         spin_unlock(&tick_broadcast_lock);
448 }
449
450 /*
451  * Powerstate information: The system enters/leaves a state, where
452  * affected devices might stop
453  */
454 void tick_broadcast_oneshot_control(unsigned long reason)
455 {
456         struct clock_event_device *bc, *dev;
457         struct tick_device *td;
458         unsigned long flags;
459         int cpu;
460
461         spin_lock_irqsave(&tick_broadcast_lock, flags);
462
463         /*
464          * Periodic mode does not care about the enter/exit of power
465          * states
466          */
467         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
468                 goto out;
469
470         bc = tick_broadcast_device.evtdev;
471         cpu = smp_processor_id();
472         td = &per_cpu(tick_cpu_device, cpu);
473         dev = td->evtdev;
474
475         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
476                 goto out;
477
478         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
479                 if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
480                         cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
481                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
482                         if (dev->next_event.tv64 < bc->next_event.tv64)
483                                 tick_broadcast_set_event(dev->next_event, 1);
484                 }
485         } else {
486                 if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
487                         cpumask_clear_cpu(cpu,
488                                           tick_get_broadcast_oneshot_mask());
489                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
490                         if (dev->next_event.tv64 != KTIME_MAX)
491                                 tick_program_event(dev->next_event, 1);
492                 }
493         }
494
495 out:
496         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
497 }
498
499 /*
500  * Reset the one shot broadcast for a cpu
501  *
502  * Called with tick_broadcast_lock held
503  */
504 static void tick_broadcast_clear_oneshot(int cpu)
505 {
506         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
507 }
508
509 static void tick_broadcast_init_next_event(struct cpumask *mask,
510                                            ktime_t expires)
511 {
512         struct tick_device *td;
513         int cpu;
514
515         for_each_cpu(cpu, mask) {
516                 td = &per_cpu(tick_cpu_device, cpu);
517                 if (td->evtdev)
518                         td->evtdev->next_event = expires;
519         }
520 }
521
522 /**
523  * tick_broadcast_setup_oneshot - setup the broadcast device
524  */
525 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
526 {
527         /* Set it up only once ! */
528         if (bc->event_handler != tick_handle_oneshot_broadcast) {
529                 int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
530                 int cpu = smp_processor_id();
531
532                 bc->event_handler = tick_handle_oneshot_broadcast;
533                 clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
534
535                 /* Take the do_timer update */
536                 tick_do_timer_cpu = cpu;
537
538                 /*
539                  * We must be careful here. There might be other CPUs
540                  * waiting for periodic broadcast. We need to set the
541                  * oneshot_mask bits for those and program the
542                  * broadcast device to fire.
543                  */
544                 cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
545                 cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
546                 cpumask_or(tick_get_broadcast_oneshot_mask(),
547                            tick_get_broadcast_oneshot_mask(),
548                            to_cpumask(tmpmask));
549
550                 if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
551                         tick_broadcast_init_next_event(to_cpumask(tmpmask),
552                                                        tick_next_period);
553                         tick_broadcast_set_event(tick_next_period, 1);
554                 } else
555                         bc->next_event.tv64 = KTIME_MAX;
556         }
557 }
558
559 /*
560  * Select oneshot operating mode for the broadcast device
561  */
562 void tick_broadcast_switch_to_oneshot(void)
563 {
564         struct clock_event_device *bc;
565         unsigned long flags;
566
567         spin_lock_irqsave(&tick_broadcast_lock, flags);
568
569         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
570         bc = tick_broadcast_device.evtdev;
571         if (bc)
572                 tick_broadcast_setup_oneshot(bc);
573         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
574 }
575
576
577 /*
578  * Remove a dead CPU from broadcasting
579  */
580 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
581 {
582         unsigned long flags;
583         unsigned int cpu = *cpup;
584
585         spin_lock_irqsave(&tick_broadcast_lock, flags);
586
587         /*
588          * Clear the broadcast mask flag for the dead cpu, but do not
589          * stop the broadcast device!
590          */
591         cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
592
593         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
594 }
595
596 /*
597  * Check, whether the broadcast device is in one shot mode
598  */
599 int tick_broadcast_oneshot_active(void)
600 {
601         return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
602 }
603
604 #endif