Merge ARM fixes
[linux-2.6] / arch / i386 / kernel / vmitime.c
1 /*
2  * VMI paravirtual timer support routines.
3  *
4  * Copyright (C) 2005, VMware, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
14  * NON INFRINGEMENT.  See the GNU General Public License for more
15  * details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  * Send feedback to dhecht@vmware.com
22  *
23  */
24
25 /*
26  * Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
27  * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
28  * See comments there for proper credits.
29  */
30
31 #include <linux/spinlock.h>
32 #include <linux/init.h>
33 #include <linux/errno.h>
34 #include <linux/jiffies.h>
35 #include <linux/interrupt.h>
36 #include <linux/kernel_stat.h>
37 #include <linux/rcupdate.h>
38 #include <linux/clocksource.h>
39
40 #include <asm/timer.h>
41 #include <asm/io.h>
42 #include <asm/apic.h>
43 #include <asm/div64.h>
44 #include <asm/timer.h>
45 #include <asm/desc.h>
46
47 #include <asm/vmi.h>
48 #include <asm/vmi_time.h>
49
50 #include <mach_timer.h>
51 #include <io_ports.h>
52
53 #ifdef CONFIG_X86_LOCAL_APIC
54 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
55 #else
56 #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
57 #endif
58
59 /* Cached VMI operations */
60 struct vmi_timer_ops vmi_timer_ops;
61
62 #ifdef CONFIG_NO_IDLE_HZ
63
64 /* /proc/sys/kernel/hz_timer state. */
65 int sysctl_hz_timer;
66
67 /* Some stats */
68 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
69 static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
70 static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
71
72 #endif /* CONFIG_NO_IDLE_HZ */
73
74 /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
75 static int alarm_hz = CONFIG_VMI_ALARM_HZ;
76
77 /* Cache of the value get_cycle_frequency / HZ. */
78 static signed long long cycles_per_jiffy;
79
80 /* Cache of the value get_cycle_frequency / alarm_hz. */
81 static signed long long cycles_per_alarm;
82
83 /* The number of cycles accounted for by the 'jiffies'/'xtime' count.
84  * Protected by xtime_lock. */
85 static unsigned long long real_cycles_accounted_system;
86
87 /* The number of cycles accounted for by update_process_times(), per cpu. */
88 static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
89
90 /* The number of stolen cycles accounted, per cpu. */
91 static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
92
93 /* Clock source. */
94 static cycle_t read_real_cycles(void)
95 {
96         return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
97 }
98
99 static cycle_t read_available_cycles(void)
100 {
101         return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
102 }
103
104 #if 0
105 static cycle_t read_stolen_cycles(void)
106 {
107         return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
108 }
109 #endif  /*  0  */
110
111 static struct clocksource clocksource_vmi = {
112         .name                   = "vmi-timer",
113         .rating                 = 450,
114         .read                   = read_real_cycles,
115         .mask                   = CLOCKSOURCE_MASK(64),
116         .mult                   = 0, /* to be set */
117         .shift                  = 22,
118         .flags                  = CLOCK_SOURCE_IS_CONTINUOUS,
119 };
120
121
122 /* Timer interrupt handler. */
123 static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
124
125 static struct irqaction vmi_timer_irq  = {
126         vmi_timer_interrupt,
127         SA_INTERRUPT,
128         CPU_MASK_NONE,
129         "VMI-alarm",
130         NULL,
131         NULL
132 };
133
134 /* Alarm rate */
135 static int __init vmi_timer_alarm_rate_setup(char* str)
136 {
137         int alarm_rate;
138         if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
139                 alarm_hz = alarm_rate;
140                 printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
141         }
142         return 1;
143 }
144 __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
145
146
147 /* Initialization */
148 static void vmi_get_wallclock_ts(struct timespec *ts)
149 {
150         unsigned long long wallclock;
151         wallclock = vmi_timer_ops.get_wallclock(); // nsec units
152         ts->tv_nsec = do_div(wallclock, 1000000000);
153         ts->tv_sec = wallclock;
154 }
155
156 static void update_xtime_from_wallclock(void)
157 {
158         struct timespec ts;
159         vmi_get_wallclock_ts(&ts);
160         do_settimeofday(&ts);
161 }
162
163 unsigned long vmi_get_wallclock(void)
164 {
165         struct timespec ts;
166         vmi_get_wallclock_ts(&ts);
167         return ts.tv_sec;
168 }
169
170 int vmi_set_wallclock(unsigned long now)
171 {
172         return -1;
173 }
174
175 unsigned long long vmi_sched_clock(void)
176 {
177         return read_available_cycles();
178 }
179
180 void __init vmi_time_init(void)
181 {
182         unsigned long long cycles_per_sec, cycles_per_msec;
183         unsigned long flags;
184
185         local_irq_save(flags);
186         setup_irq(0, &vmi_timer_irq);
187 #ifdef CONFIG_X86_LOCAL_APIC
188         set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
189 #endif
190
191         no_sync_cmos_clock = 1;
192
193         vmi_get_wallclock_ts(&xtime);
194         set_normalized_timespec(&wall_to_monotonic,
195                 -xtime.tv_sec, -xtime.tv_nsec);
196
197         real_cycles_accounted_system = read_real_cycles();
198         update_xtime_from_wallclock();
199         per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
200
201         cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
202
203         cycles_per_jiffy = cycles_per_sec;
204         (void)do_div(cycles_per_jiffy, HZ);
205         cycles_per_alarm = cycles_per_sec;
206         (void)do_div(cycles_per_alarm, alarm_hz);
207         cycles_per_msec = cycles_per_sec;
208         (void)do_div(cycles_per_msec, 1000);
209         cpu_khz = cycles_per_msec;
210
211         printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
212                "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
213                cycles_per_alarm);
214
215         clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
216                                                     clocksource_vmi.shift);
217         if (clocksource_register(&clocksource_vmi))
218                 printk(KERN_WARNING "Error registering VMITIME clocksource.");
219
220         /* Disable PIT. */
221         outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
222
223         /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
224          * reduce the latency calling update_process_times. */
225         vmi_timer_ops.set_alarm(
226                       VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
227                       per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
228                       cycles_per_alarm);
229
230         local_irq_restore(flags);
231 }
232
233 #ifdef CONFIG_X86_LOCAL_APIC
234
235 void __init vmi_timer_setup_boot_alarm(void)
236 {
237         local_irq_disable();
238
239         /* Route the interrupt to the correct vector. */
240         apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
241
242         /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
243         vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
244         vmi_timer_ops.set_alarm(
245                       VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
246                       per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
247                       cycles_per_alarm);
248         local_irq_enable();
249 }
250
251 /* Initialize the time accounting variables for an AP on an SMP system.
252  * Also, set the local alarm for the AP. */
253 void __init vmi_timer_setup_secondary_alarm(void)
254 {
255         int cpu = smp_processor_id();
256
257         /* Route the interrupt to the correct vector. */
258         apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
259
260         per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
261
262         vmi_timer_ops.set_alarm(
263                       VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
264                       per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
265                       cycles_per_alarm);
266 }
267
268 #endif
269
270 /* Update system wide (real) time accounting (e.g. jiffies, xtime). */
271 static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
272 {
273         long long cycles_not_accounted;
274
275         write_seqlock(&xtime_lock);
276
277         cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
278         while (cycles_not_accounted >= cycles_per_jiffy) {
279                 /* systems wide jiffies and wallclock. */
280                 do_timer(1);
281
282                 cycles_not_accounted -= cycles_per_jiffy;
283                 real_cycles_accounted_system += cycles_per_jiffy;
284         }
285
286         if (vmi_timer_ops.wallclock_updated())
287                 update_xtime_from_wallclock();
288
289         write_sequnlock(&xtime_lock);
290 }
291
292 /* Update per-cpu process times. */
293 static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
294                                              unsigned long long cur_process_times_cycles)
295 {
296         long long cycles_not_accounted;
297         cycles_not_accounted = cur_process_times_cycles -
298                 per_cpu(process_times_cycles_accounted_cpu, cpu);
299
300         while (cycles_not_accounted >= cycles_per_jiffy) {
301                 /* Account time to the current process.  This includes
302                  * calling into the scheduler to decrement the timeslice
303                  * and possibly reschedule.*/
304                 update_process_times(user_mode(regs));
305                 /* XXX handle /proc/profile multiplier.  */
306                 profile_tick(CPU_PROFILING);
307
308                 cycles_not_accounted -= cycles_per_jiffy;
309                 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
310         }
311 }
312
313 #ifdef CONFIG_NO_IDLE_HZ
314 /* Update per-cpu idle times.  Used when a no-hz halt is ended. */
315 static void vmi_account_no_hz_idle_cycles(int cpu,
316                                           unsigned long long cur_process_times_cycles)
317 {
318         long long cycles_not_accounted;
319         unsigned long no_idle_hz_jiffies = 0;
320
321         cycles_not_accounted = cur_process_times_cycles -
322                 per_cpu(process_times_cycles_accounted_cpu, cpu);
323
324         while (cycles_not_accounted >= cycles_per_jiffy) {
325                 no_idle_hz_jiffies++;
326                 cycles_not_accounted -= cycles_per_jiffy;
327                 per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
328         }
329         /* Account time to the idle process. */
330         account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
331 }
332 #endif
333
334 /* Update per-cpu stolen time. */
335 static void vmi_account_stolen_cycles(int cpu,
336                                       unsigned long long cur_real_cycles,
337                                       unsigned long long cur_avail_cycles)
338 {
339         long long stolen_cycles_not_accounted;
340         unsigned long stolen_jiffies = 0;
341
342         if (cur_real_cycles < cur_avail_cycles)
343                 return;
344
345         stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
346                 per_cpu(stolen_cycles_accounted_cpu, cpu);
347
348         while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
349                 stolen_jiffies++;
350                 stolen_cycles_not_accounted -= cycles_per_jiffy;
351                 per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
352         }
353         /* HACK: pass NULL to force time onto cpustat->steal. */
354         account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
355 }
356
357 /* Body of either IRQ0 interrupt handler (UP no local-APIC) or
358  * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
359 static void vmi_local_timer_interrupt(int cpu)
360 {
361         unsigned long long cur_real_cycles, cur_process_times_cycles;
362
363         cur_real_cycles = read_real_cycles();
364         cur_process_times_cycles = read_available_cycles();
365         /* Update system wide (real) time state (xtime, jiffies). */
366         vmi_account_real_cycles(cur_real_cycles);
367         /* Update per-cpu process times. */
368         vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
369         /* Update time stolen from this cpu by the hypervisor. */
370         vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
371 }
372
373 #ifdef CONFIG_NO_IDLE_HZ
374
375 /* Must be called only from idle loop, with interrupts disabled. */
376 int vmi_stop_hz_timer(void)
377 {
378         /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
379
380         unsigned long seq, next;
381         unsigned long long real_cycles_expiry;
382         int cpu = smp_processor_id();
383         int idle;
384
385         BUG_ON(!irqs_disabled());
386         if (sysctl_hz_timer != 0)
387                 return 0;
388
389         cpu_set(cpu, nohz_cpu_mask);
390         smp_mb();
391         if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
392             (next = next_timer_interrupt(), time_before_eq(next, jiffies))) {
393                 cpu_clear(cpu, nohz_cpu_mask);
394                 next = jiffies;
395                 idle = 0;
396         } else
397                 idle = 1;
398
399         /* Convert jiffies to the real cycle counter. */
400         do {
401                 seq = read_seqbegin(&xtime_lock);
402                 real_cycles_expiry = real_cycles_accounted_system +
403                         (long)(next - jiffies) * cycles_per_jiffy;
404         } while (read_seqretry(&xtime_lock, seq));
405
406         /* This cpu is going idle. Disable the periodic alarm. */
407         if (idle) {
408                 vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
409                 per_cpu(idle_start_jiffies, cpu) = jiffies;
410         }
411
412         /* Set the real time alarm to expire at the next event. */
413         vmi_timer_ops.set_alarm(
414                       VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
415                       real_cycles_expiry, 0);
416
417         return idle;
418 }
419
420 static void vmi_reenable_hz_timer(int cpu)
421 {
422         /* For /proc/vmi/info idle_hz stat. */
423         per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
424         per_cpu(vmi_idle_no_hz_irqs, cpu)++;
425
426         /* Don't bother explicitly cancelling the one-shot alarm -- at
427          * worse we will receive a spurious timer interrupt. */
428         vmi_timer_ops.set_alarm(
429                       VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
430                       per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
431                       cycles_per_alarm);
432         /* Indicate this cpu is no longer nohz idle. */
433         cpu_clear(cpu, nohz_cpu_mask);
434 }
435
436 /* Called from interrupt handlers when (local) HZ timer is disabled. */
437 void vmi_account_time_restart_hz_timer(void)
438 {
439         unsigned long long cur_real_cycles, cur_process_times_cycles;
440         int cpu = smp_processor_id();
441
442         BUG_ON(!irqs_disabled());
443         /* Account the time during which the HZ timer was disabled. */
444         cur_real_cycles = read_real_cycles();
445         cur_process_times_cycles = read_available_cycles();
446         /* Update system wide (real) time state (xtime, jiffies). */
447         vmi_account_real_cycles(cur_real_cycles);
448         /* Update per-cpu idle times. */
449         vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
450         /* Update time stolen from this cpu by the hypervisor. */
451         vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
452         /* Reenable the hz timer. */
453         vmi_reenable_hz_timer(cpu);
454 }
455
456 #endif /* CONFIG_NO_IDLE_HZ */
457
458 /* UP (and no local-APIC) VMI-timer alarm interrupt handler.
459  * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
460  * APIC setup and setup_boot_vmi_alarm() is called.  */
461 static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
462 {
463         vmi_local_timer_interrupt(smp_processor_id());
464         return IRQ_HANDLED;
465 }
466
467 #ifdef CONFIG_X86_LOCAL_APIC
468
469 /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
470  * Also used in UP when CONFIG_X86_LOCAL_APIC.
471  * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
472 void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
473 {
474         struct pt_regs *old_regs = set_irq_regs(regs);
475         int cpu = smp_processor_id();
476
477         /*
478          * the NMI deadlock-detector uses this.
479          */
480         per_cpu(irq_stat,cpu).apic_timer_irqs++;
481
482         /*
483          * NOTE! We'd better ACK the irq immediately,
484          * because timer handling can be slow.
485          */
486         ack_APIC_irq();
487
488         /*
489          * update_process_times() expects us to have done irq_enter().
490          * Besides, if we don't timer interrupts ignore the global
491          * interrupt lock, which is the WrongThing (tm) to do.
492          */
493         irq_enter();
494         vmi_local_timer_interrupt(cpu);
495         irq_exit();
496         set_irq_regs(old_regs);
497 }
498
499 #endif  /* CONFIG_X86_LOCAL_APIC */