sched: fix fair sleepers
[linux-2.6] / kernel / time / tick-broadcast.c
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/irq.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/tick.h>
22
23 #include "tick-internal.h"
24
25 /*
26  * Broadcast support for broken x86 hardware, where the local apic
27  * timer stops in C3 state.
28  */
29
30 struct tick_device tick_broadcast_device;
31 static cpumask_t tick_broadcast_mask;
32 static DEFINE_SPINLOCK(tick_broadcast_lock);
33
34 #ifdef CONFIG_TICK_ONESHOT
35 static void tick_broadcast_clear_oneshot(int cpu);
36 #else
37 static inline void tick_broadcast_clear_oneshot(int cpu) { }
38 #endif
39
40 /*
41  * Debugging: see timer_list.c
42  */
43 struct tick_device *tick_get_broadcast_device(void)
44 {
45         return &tick_broadcast_device;
46 }
47
48 cpumask_t *tick_get_broadcast_mask(void)
49 {
50         return &tick_broadcast_mask;
51 }
52
53 /*
54  * Start the device in periodic mode
55  */
56 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
57 {
58         if (bc)
59                 tick_setup_periodic(bc, 1);
60 }
61
62 /*
63  * Check, if the device can be utilized as broadcast device:
64  */
65 int tick_check_broadcast_device(struct clock_event_device *dev)
66 {
67         if ((tick_broadcast_device.evtdev &&
68              tick_broadcast_device.evtdev->rating >= dev->rating) ||
69              (dev->features & CLOCK_EVT_FEAT_C3STOP))
70                 return 0;
71
72         clockevents_exchange_device(NULL, dev);
73         tick_broadcast_device.evtdev = dev;
74         if (!cpus_empty(tick_broadcast_mask))
75                 tick_broadcast_start_periodic(dev);
76         return 1;
77 }
78
79 /*
80  * Check, if the device is the broadcast device
81  */
82 int tick_is_broadcast_device(struct clock_event_device *dev)
83 {
84         return (dev && tick_broadcast_device.evtdev == dev);
85 }
86
87 /*
88  * Check, if the device is disfunctional and a place holder, which
89  * needs to be handled by the broadcast device.
90  */
91 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
92 {
93         unsigned long flags;
94         int ret = 0;
95
96         spin_lock_irqsave(&tick_broadcast_lock, flags);
97
98         /*
99          * Devices might be registered with both periodic and oneshot
100          * mode disabled. This signals, that the device needs to be
101          * operated from the broadcast device and is a placeholder for
102          * the cpu local device.
103          */
104         if (!tick_device_is_functional(dev)) {
105                 dev->event_handler = tick_handle_periodic;
106                 cpu_set(cpu, tick_broadcast_mask);
107                 tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
108                 ret = 1;
109         } else {
110                 /*
111                  * When the new device is not affected by the stop
112                  * feature and the cpu is marked in the broadcast mask
113                  * then clear the broadcast bit.
114                  */
115                 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
116                         int cpu = smp_processor_id();
117
118                         cpu_clear(cpu, tick_broadcast_mask);
119                         tick_broadcast_clear_oneshot(cpu);
120                 }
121         }
122         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
123         return ret;
124 }
125
126 /*
127  * Broadcast the event to the cpus, which are set in the mask
128  */
129 static void tick_do_broadcast(cpumask_t mask)
130 {
131         int cpu = smp_processor_id();
132         struct tick_device *td;
133
134         /*
135          * Check, if the current cpu is in the mask
136          */
137         if (cpu_isset(cpu, mask)) {
138                 cpu_clear(cpu, mask);
139                 td = &per_cpu(tick_cpu_device, cpu);
140                 td->evtdev->event_handler(td->evtdev);
141         }
142
143         if (!cpus_empty(mask)) {
144                 /*
145                  * It might be necessary to actually check whether the devices
146                  * have different broadcast functions. For now, just use the
147                  * one of the first device. This works as long as we have this
148                  * misfeature only on x86 (lapic)
149                  */
150                 cpu = first_cpu(mask);
151                 td = &per_cpu(tick_cpu_device, cpu);
152                 td->evtdev->broadcast(mask);
153         }
154 }
155
156 /*
157  * Periodic broadcast:
158  * - invoke the broadcast handlers
159  */
160 static void tick_do_periodic_broadcast(void)
161 {
162         cpumask_t mask;
163
164         spin_lock(&tick_broadcast_lock);
165
166         cpus_and(mask, cpu_online_map, tick_broadcast_mask);
167         tick_do_broadcast(mask);
168
169         spin_unlock(&tick_broadcast_lock);
170 }
171
172 /*
173  * Event handler for periodic broadcast ticks
174  */
175 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
176 {
177         tick_do_periodic_broadcast();
178
179         /*
180          * The device is in periodic mode. No reprogramming necessary:
181          */
182         if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
183                 return;
184
185         /*
186          * Setup the next period for devices, which do not have
187          * periodic mode:
188          */
189         for (;;) {
190                 ktime_t next = ktime_add(dev->next_event, tick_period);
191
192                 if (!clockevents_program_event(dev, next, ktime_get()))
193                         return;
194                 tick_do_periodic_broadcast();
195         }
196 }
197
198 /*
199  * Powerstate information: The system enters/leaves a state, where
200  * affected devices might stop
201  */
202 static void tick_do_broadcast_on_off(void *why)
203 {
204         struct clock_event_device *bc, *dev;
205         struct tick_device *td;
206         unsigned long flags, *reason = why;
207         int cpu;
208
209         spin_lock_irqsave(&tick_broadcast_lock, flags);
210
211         cpu = smp_processor_id();
212         td = &per_cpu(tick_cpu_device, cpu);
213         dev = td->evtdev;
214         bc = tick_broadcast_device.evtdev;
215
216         /*
217          * Is the device not affected by the powerstate ?
218          */
219         if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
220                 goto out;
221
222         if (!tick_device_is_functional(dev))
223                 goto out;
224
225         switch (*reason) {
226         case CLOCK_EVT_NOTIFY_BROADCAST_ON:
227         case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
228                 if (!cpu_isset(cpu, tick_broadcast_mask)) {
229                         cpu_set(cpu, tick_broadcast_mask);
230                         if (td->mode == TICKDEV_MODE_PERIODIC)
231                                 clockevents_set_mode(dev,
232                                                      CLOCK_EVT_MODE_SHUTDOWN);
233                 }
234                 if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
235                         dev->features |= CLOCK_EVT_FEAT_DUMMY;
236                 break;
237         case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
238                 if (cpu_isset(cpu, tick_broadcast_mask)) {
239                         cpu_clear(cpu, tick_broadcast_mask);
240                         if (td->mode == TICKDEV_MODE_PERIODIC)
241                                 tick_setup_periodic(dev, 0);
242                 }
243                 break;
244         }
245
246         if (cpus_empty(tick_broadcast_mask))
247                 clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
248         else {
249                 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
250                         tick_broadcast_start_periodic(bc);
251                 else
252                         tick_broadcast_setup_oneshot(bc);
253         }
254 out:
255         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
256 }
257
258 /*
259  * Powerstate information: The system enters/leaves a state, where
260  * affected devices might stop.
261  */
262 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
263 {
264         if (!cpu_isset(*oncpu, cpu_online_map))
265                 printk(KERN_ERR "tick-braodcast: ignoring broadcast for "
266                        "offline CPU #%d\n", *oncpu);
267         else
268                 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
269                                          &reason, 1, 1);
270 }
271
272 /*
273  * Set the periodic handler depending on broadcast on/off
274  */
275 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
276 {
277         if (!broadcast)
278                 dev->event_handler = tick_handle_periodic;
279         else
280                 dev->event_handler = tick_handle_periodic_broadcast;
281 }
282
283 /*
284  * Remove a CPU from broadcasting
285  */
286 void tick_shutdown_broadcast(unsigned int *cpup)
287 {
288         struct clock_event_device *bc;
289         unsigned long flags;
290         unsigned int cpu = *cpup;
291
292         spin_lock_irqsave(&tick_broadcast_lock, flags);
293
294         bc = tick_broadcast_device.evtdev;
295         cpu_clear(cpu, tick_broadcast_mask);
296
297         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
298                 if (bc && cpus_empty(tick_broadcast_mask))
299                         clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
300         }
301
302         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
303 }
304
305 void tick_suspend_broadcast(void)
306 {
307         struct clock_event_device *bc;
308         unsigned long flags;
309
310         spin_lock_irqsave(&tick_broadcast_lock, flags);
311
312         bc = tick_broadcast_device.evtdev;
313         if (bc)
314                 clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
315
316         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
317 }
318
319 int tick_resume_broadcast(void)
320 {
321         struct clock_event_device *bc;
322         unsigned long flags;
323         int broadcast = 0;
324
325         spin_lock_irqsave(&tick_broadcast_lock, flags);
326
327         bc = tick_broadcast_device.evtdev;
328
329         if (bc) {
330                 clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
331
332                 switch (tick_broadcast_device.mode) {
333                 case TICKDEV_MODE_PERIODIC:
334                         if(!cpus_empty(tick_broadcast_mask))
335                                 tick_broadcast_start_periodic(bc);
336                         broadcast = cpu_isset(smp_processor_id(),
337                                               tick_broadcast_mask);
338                         break;
339                 case TICKDEV_MODE_ONESHOT:
340                         broadcast = tick_resume_broadcast_oneshot(bc);
341                         break;
342                 }
343         }
344         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
345
346         return broadcast;
347 }
348
349
350 #ifdef CONFIG_TICK_ONESHOT
351
352 static cpumask_t tick_broadcast_oneshot_mask;
353
354 /*
355  * Debugging: see timer_list.c
356  */
357 cpumask_t *tick_get_broadcast_oneshot_mask(void)
358 {
359         return &tick_broadcast_oneshot_mask;
360 }
361
362 static int tick_broadcast_set_event(ktime_t expires, int force)
363 {
364         struct clock_event_device *bc = tick_broadcast_device.evtdev;
365         ktime_t now = ktime_get();
366         int res;
367
368         for(;;) {
369                 res = clockevents_program_event(bc, expires, now);
370                 if (!res || !force)
371                         return res;
372                 now = ktime_get();
373                 expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
374         }
375 }
376
377 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
378 {
379         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
380         return 0;
381 }
382
383 /*
384  * Handle oneshot mode broadcasting
385  */
386 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
387 {
388         struct tick_device *td;
389         cpumask_t mask;
390         ktime_t now, next_event;
391         int cpu;
392
393         spin_lock(&tick_broadcast_lock);
394 again:
395         dev->next_event.tv64 = KTIME_MAX;
396         next_event.tv64 = KTIME_MAX;
397         mask = CPU_MASK_NONE;
398         now = ktime_get();
399         /* Find all expired events */
400         for (cpu = first_cpu(tick_broadcast_oneshot_mask); cpu != NR_CPUS;
401              cpu = next_cpu(cpu, tick_broadcast_oneshot_mask)) {
402                 td = &per_cpu(tick_cpu_device, cpu);
403                 if (td->evtdev->next_event.tv64 <= now.tv64)
404                         cpu_set(cpu, mask);
405                 else if (td->evtdev->next_event.tv64 < next_event.tv64)
406                         next_event.tv64 = td->evtdev->next_event.tv64;
407         }
408
409         /*
410          * Wakeup the cpus which have an expired event.
411          */
412         tick_do_broadcast(mask);
413
414         /*
415          * Two reasons for reprogram:
416          *
417          * - The global event did not expire any CPU local
418          * events. This happens in dyntick mode, as the maximum PIT
419          * delta is quite small.
420          *
421          * - There are pending events on sleeping CPUs which were not
422          * in the event mask
423          */
424         if (next_event.tv64 != KTIME_MAX) {
425                 /*
426                  * Rearm the broadcast device. If event expired,
427                  * repeat the above
428                  */
429                 if (tick_broadcast_set_event(next_event, 0))
430                         goto again;
431         }
432         spin_unlock(&tick_broadcast_lock);
433 }
434
435 /*
436  * Powerstate information: The system enters/leaves a state, where
437  * affected devices might stop
438  */
439 void tick_broadcast_oneshot_control(unsigned long reason)
440 {
441         struct clock_event_device *bc, *dev;
442         struct tick_device *td;
443         unsigned long flags;
444         int cpu;
445
446         spin_lock_irqsave(&tick_broadcast_lock, flags);
447
448         /*
449          * Periodic mode does not care about the enter/exit of power
450          * states
451          */
452         if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
453                 goto out;
454
455         bc = tick_broadcast_device.evtdev;
456         cpu = smp_processor_id();
457         td = &per_cpu(tick_cpu_device, cpu);
458         dev = td->evtdev;
459
460         if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
461                 goto out;
462
463         if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
464                 if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
465                         cpu_set(cpu, tick_broadcast_oneshot_mask);
466                         clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
467                         if (dev->next_event.tv64 < bc->next_event.tv64)
468                                 tick_broadcast_set_event(dev->next_event, 1);
469                 }
470         } else {
471                 if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
472                         cpu_clear(cpu, tick_broadcast_oneshot_mask);
473                         clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
474                         if (dev->next_event.tv64 != KTIME_MAX)
475                                 tick_program_event(dev->next_event, 1);
476                 }
477         }
478
479 out:
480         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
481 }
482
483 /*
484  * Reset the one shot broadcast for a cpu
485  *
486  * Called with tick_broadcast_lock held
487  */
488 static void tick_broadcast_clear_oneshot(int cpu)
489 {
490         cpu_clear(cpu, tick_broadcast_oneshot_mask);
491 }
492
493 /**
494  * tick_broadcast_setup_oneshot - setup the broadcast device
495  */
496 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
497 {
498         bc->event_handler = tick_handle_oneshot_broadcast;
499         clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
500         bc->next_event.tv64 = KTIME_MAX;
501 }
502
503 /*
504  * Select oneshot operating mode for the broadcast device
505  */
506 void tick_broadcast_switch_to_oneshot(void)
507 {
508         struct clock_event_device *bc;
509         unsigned long flags;
510
511         spin_lock_irqsave(&tick_broadcast_lock, flags);
512
513         tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
514         bc = tick_broadcast_device.evtdev;
515         if (bc)
516                 tick_broadcast_setup_oneshot(bc);
517         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
518 }
519
520
521 /*
522  * Remove a dead CPU from broadcasting
523  */
524 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
525 {
526         unsigned long flags;
527         unsigned int cpu = *cpup;
528
529         spin_lock_irqsave(&tick_broadcast_lock, flags);
530
531         /*
532          * Clear the broadcast mask flag for the dead cpu, but do not
533          * stop the broadcast device!
534          */
535         cpu_clear(cpu, tick_broadcast_oneshot_mask);
536
537         spin_unlock_irqrestore(&tick_broadcast_lock, flags);
538 }
539
540 #endif