Merge master.kernel.org:/pub/scm/linux/kernel/git/gregkh/usb-2.6
[linux-2.6] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2  * This code largely moved from arch/i386/kernel/time.c.
3  * See comments there for proper credits.
4  *
5  * 2004-06-25    Jesper Juhl
6  *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7  *      failing to inline.
8  */
9
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
17
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
22
23 #include "io_ports.h"
24 #include "mach_timer.h"
25
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
28
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
34
35 static inline void cpufreq_delayed_get(void);
36
37 int tsc_disable __devinitdata = 0;
38
39 static int use_tsc;
40 /* Number of usecs that the last interrupt was delayed */
41 static int delay_at_last_interrupt;
42
43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 static unsigned long long monotonic_base;
46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48 /* Avoid compensating for lost ticks before TSCs are synched */
49 static int detect_lost_ticks;
50 static int __init start_lost_tick_compensation(void)
51 {
52         detect_lost_ticks = 1;
53         return 0;
54 }
55 late_initcall(start_lost_tick_compensation);
56
57 /* convert from cycles(64bits) => nanoseconds (64bits)
58  *  basic equation:
59  *              ns = cycles / (freq / ns_per_sec)
60  *              ns = cycles * (ns_per_sec / freq)
61  *              ns = cycles * (10^9 / (cpu_khz * 10^3))
62  *              ns = cycles * (10^6 / cpu_khz)
63  *
64  *      Then we use scaling math (suggested by george@mvista.com) to get:
65  *              ns = cycles * (10^6 * SC / cpu_khz) / SC
66  *              ns = cycles * cyc2ns_scale / SC
67  *
68  *      And since SC is a constant power of two, we can convert the div
69  *  into a shift.
70  *
71  *  We can use khz divisor instead of mhz to keep a better percision, since
72  *  cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
73  *  (mathieu.desnoyers@polymtl.ca)
74  *
75  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
76  */
77 static unsigned long cyc2ns_scale; 
78 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
79
80 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
81 {
82         cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
83 }
84
85 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
86 {
87         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
88 }
89
90 static int count2; /* counter for mark_offset_tsc() */
91
92 /* Cached *multiplier* to convert TSC counts to microseconds.
93  * (see the equation below).
94  * Equal to 2^32 * (1 / (clocks per usec) ).
95  * Initialized in time_init.
96  */
97 static unsigned long fast_gettimeoffset_quotient;
98
99 static unsigned long get_offset_tsc(void)
100 {
101         register unsigned long eax, edx;
102
103         /* Read the Time Stamp Counter */
104
105         rdtsc(eax,edx);
106
107         /* .. relative to previous jiffy (32 bits is enough) */
108         eax -= last_tsc_low;    /* tsc_low delta */
109
110         /*
111          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
112          *             = (tsc_low delta) * (usecs_per_clock)
113          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
114          *
115          * Using a mull instead of a divl saves up to 31 clock cycles
116          * in the critical path.
117          */
118
119         __asm__("mull %2"
120                 :"=a" (eax), "=d" (edx)
121                 :"rm" (fast_gettimeoffset_quotient),
122                  "0" (eax));
123
124         /* our adjusted time offset in microseconds */
125         return delay_at_last_interrupt + edx;
126 }
127
128 static unsigned long long monotonic_clock_tsc(void)
129 {
130         unsigned long long last_offset, this_offset, base;
131         unsigned seq;
132         
133         /* atomically read monotonic base & last_offset */
134         do {
135                 seq = read_seqbegin(&monotonic_lock);
136                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
137                 base = monotonic_base;
138         } while (read_seqretry(&monotonic_lock, seq));
139
140         /* Read the Time Stamp Counter */
141         rdtscll(this_offset);
142
143         /* return the value in ns */
144         return base + cycles_2_ns(this_offset - last_offset);
145 }
146
147 /*
148  * Scheduler clock - returns current time in nanosec units.
149  */
150 unsigned long long sched_clock(void)
151 {
152         unsigned long long this_offset;
153
154         /*
155          * In the NUMA case we dont use the TSC as they are not
156          * synchronized across all CPUs.
157          */
158 #ifndef CONFIG_NUMA
159         if (!use_tsc)
160 #endif
161                 /* no locking but a rare wrong value is not a big deal */
162                 return jiffies_64 * (1000000000 / HZ);
163
164         /* Read the Time Stamp Counter */
165         rdtscll(this_offset);
166
167         /* return the value in ns */
168         return cycles_2_ns(this_offset);
169 }
170
171 static void delay_tsc(unsigned long loops)
172 {
173         unsigned long bclock, now;
174         
175         rdtscl(bclock);
176         do
177         {
178                 rep_nop();
179                 rdtscl(now);
180         } while ((now-bclock) < loops);
181 }
182
183 #ifdef CONFIG_HPET_TIMER
184 static void mark_offset_tsc_hpet(void)
185 {
186         unsigned long long this_offset, last_offset;
187         unsigned long offset, temp, hpet_current;
188
189         write_seqlock(&monotonic_lock);
190         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
191         /*
192          * It is important that these two operations happen almost at
193          * the same time. We do the RDTSC stuff first, since it's
194          * faster. To avoid any inconsistencies, we need interrupts
195          * disabled locally.
196          */
197         /*
198          * Interrupts are just disabled locally since the timer irq
199          * has the SA_INTERRUPT flag set. -arca
200          */
201         /* read Pentium cycle counter */
202
203         hpet_current = hpet_readl(HPET_COUNTER);
204         rdtsc(last_tsc_low, last_tsc_high);
205
206         /* lost tick compensation */
207         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
208         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))
209                                         && detect_lost_ticks) {
210                 int lost_ticks = (offset - hpet_last) / hpet_tick;
211                 jiffies_64 += lost_ticks;
212         }
213         hpet_last = hpet_current;
214
215         /* update the monotonic base value */
216         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
217         monotonic_base += cycles_2_ns(this_offset - last_offset);
218         write_sequnlock(&monotonic_lock);
219
220         /* calculate delay_at_last_interrupt */
221         /*
222          * Time offset = (hpet delta) * ( usecs per HPET clock )
223          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
224          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
225          * Where,
226          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
227          */
228         delay_at_last_interrupt = hpet_current - offset;
229         ASM_MUL64_REG(temp, delay_at_last_interrupt,
230                         hpet_usec_quotient, delay_at_last_interrupt);
231 }
232 #endif
233
234
235 #ifdef CONFIG_CPU_FREQ
236 #include <linux/workqueue.h>
237
238 static unsigned int cpufreq_delayed_issched = 0;
239 static unsigned int cpufreq_init = 0;
240 static struct work_struct cpufreq_delayed_get_work;
241
242 static void handle_cpufreq_delayed_get(void *v)
243 {
244         unsigned int cpu;
245         for_each_online_cpu(cpu) {
246                 cpufreq_get(cpu);
247         }
248         cpufreq_delayed_issched = 0;
249 }
250
251 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
252  * to verify the CPU frequency the timing core thinks the CPU is running
253  * at is still correct.
254  */
255 static inline void cpufreq_delayed_get(void) 
256 {
257         if (cpufreq_init && !cpufreq_delayed_issched) {
258                 cpufreq_delayed_issched = 1;
259                 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
260                 schedule_work(&cpufreq_delayed_get_work);
261         }
262 }
263
264 /* If the CPU frequency is scaled, TSC-based delays will need a different
265  * loops_per_jiffy value to function properly.
266  */
267
268 static unsigned int  ref_freq = 0;
269 static unsigned long loops_per_jiffy_ref = 0;
270
271 #ifndef CONFIG_SMP
272 static unsigned long fast_gettimeoffset_ref = 0;
273 static unsigned int cpu_khz_ref = 0;
274 #endif
275
276 static int
277 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
278                        void *data)
279 {
280         struct cpufreq_freqs *freq = data;
281
282         if (val != CPUFREQ_RESUMECHANGE)
283                 write_seqlock_irq(&xtime_lock);
284         if (!ref_freq) {
285                 if (!freq->old){
286                         ref_freq = freq->new;
287                         goto end;
288                 }
289                 ref_freq = freq->old;
290                 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
291 #ifndef CONFIG_SMP
292                 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
293                 cpu_khz_ref = cpu_khz;
294 #endif
295         }
296
297         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
298             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
299             (val == CPUFREQ_RESUMECHANGE)) {
300                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
301                         cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
302 #ifndef CONFIG_SMP
303                 if (cpu_khz)
304                         cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
305                 if (use_tsc) {
306                         if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
307                                 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
308                                 set_cyc2ns_scale(cpu_khz);
309                         }
310                 }
311 #endif
312         }
313
314 end:
315         if (val != CPUFREQ_RESUMECHANGE)
316                 write_sequnlock_irq(&xtime_lock);
317
318         return 0;
319 }
320
321 static struct notifier_block time_cpufreq_notifier_block = {
322         .notifier_call  = time_cpufreq_notifier
323 };
324
325
326 static int __init cpufreq_tsc(void)
327 {
328         int ret;
329         INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
330         ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
331                                         CPUFREQ_TRANSITION_NOTIFIER);
332         if (!ret)
333                 cpufreq_init = 1;
334         return ret;
335 }
336 core_initcall(cpufreq_tsc);
337
338 #else /* CONFIG_CPU_FREQ */
339 static inline void cpufreq_delayed_get(void) { return; }
340 #endif 
341
342 int recalibrate_cpu_khz(void)
343 {
344 #ifndef CONFIG_SMP
345         unsigned int cpu_khz_old = cpu_khz;
346
347         if (cpu_has_tsc) {
348                 local_irq_disable();
349                 init_cpu_khz();
350                 local_irq_enable();
351                 cpu_data[0].loops_per_jiffy =
352                     cpufreq_scale(cpu_data[0].loops_per_jiffy,
353                                   cpu_khz_old,
354                                   cpu_khz);
355                 return 0;
356         } else
357                 return -ENODEV;
358 #else
359         return -ENODEV;
360 #endif
361 }
362 EXPORT_SYMBOL(recalibrate_cpu_khz);
363
364 static void mark_offset_tsc(void)
365 {
366         unsigned long lost,delay;
367         unsigned long delta = last_tsc_low;
368         int count;
369         int countmp;
370         static int count1 = 0;
371         unsigned long long this_offset, last_offset;
372         static int lost_count = 0;
373
374         write_seqlock(&monotonic_lock);
375         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
376         /*
377          * It is important that these two operations happen almost at
378          * the same time. We do the RDTSC stuff first, since it's
379          * faster. To avoid any inconsistencies, we need interrupts
380          * disabled locally.
381          */
382
383         /*
384          * Interrupts are just disabled locally since the timer irq
385          * has the SA_INTERRUPT flag set. -arca
386          */
387
388         /* read Pentium cycle counter */
389
390         rdtsc(last_tsc_low, last_tsc_high);
391
392         spin_lock(&i8253_lock);
393         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
394
395         count = inb_p(PIT_CH0);    /* read the latched count */
396         count |= inb(PIT_CH0) << 8;
397
398         /*
399          * VIA686a test code... reset the latch if count > max + 1
400          * from timer_pit.c - cjb
401          */
402         if (count > LATCH) {
403                 outb_p(0x34, PIT_MODE);
404                 outb_p(LATCH & 0xff, PIT_CH0);
405                 outb(LATCH >> 8, PIT_CH0);
406                 count = LATCH - 1;
407         }
408
409         spin_unlock(&i8253_lock);
410
411         if (pit_latch_buggy) {
412                 /* get center value of last 3 time lutch */
413                 if ((count2 >= count && count >= count1)
414                     || (count1 >= count && count >= count2)) {
415                         count2 = count1; count1 = count;
416                 } else if ((count1 >= count2 && count2 >= count)
417                            || (count >= count2 && count2 >= count1)) {
418                         countmp = count;count = count2;
419                         count2 = count1;count1 = countmp;
420                 } else {
421                         count2 = count1; count1 = count; count = count1;
422                 }
423         }
424
425         /* lost tick compensation */
426         delta = last_tsc_low - delta;
427         {
428                 register unsigned long eax, edx;
429                 eax = delta;
430                 __asm__("mull %2"
431                 :"=a" (eax), "=d" (edx)
432                 :"rm" (fast_gettimeoffset_quotient),
433                  "0" (eax));
434                 delta = edx;
435         }
436         delta += delay_at_last_interrupt;
437         lost = delta/(1000000/HZ);
438         delay = delta%(1000000/HZ);
439         if (lost >= 2 && detect_lost_ticks) {
440                 jiffies_64 += lost-1;
441
442                 /* sanity check to ensure we're not always losing ticks */
443                 if (lost_count++ > 100) {
444                         printk(KERN_WARNING "Losing too many ticks!\n");
445                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
446                         printk(KERN_WARNING "Possible reasons for this are:\n");
447                         printk(KERN_WARNING "  You're running with Speedstep,\n");
448                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
449                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
450                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
451
452                         clock_fallback();
453                 }
454                 /* ... but give the TSC a fair chance */
455                 if (lost_count > 25)
456                         cpufreq_delayed_get();
457         } else
458                 lost_count = 0;
459         /* update the monotonic base value */
460         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
461         monotonic_base += cycles_2_ns(this_offset - last_offset);
462         write_sequnlock(&monotonic_lock);
463
464         /* calculate delay_at_last_interrupt */
465         count = ((LATCH-1) - count) * TICK_SIZE;
466         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
467
468         /* catch corner case where tick rollover occured
469          * between tsc and pit reads (as noted when
470          * usec delta is > 90% # of usecs/tick)
471          */
472         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
473                 jiffies_64++;
474 }
475
476 static int __init init_tsc(char* override)
477 {
478
479         /* check clock override */
480         if (override[0] && strncmp(override,"tsc",3)) {
481 #ifdef CONFIG_HPET_TIMER
482                 if (is_hpet_enabled()) {
483                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
484                 } else
485 #endif
486                 {
487                         return -ENODEV;
488                 }
489         }
490
491         /*
492          * If we have APM enabled or the CPU clock speed is variable
493          * (CPU stops clock on HLT or slows clock to save power)
494          * then the TSC timestamps may diverge by up to 1 jiffy from
495          * 'real time' but nothing will break.
496          * The most frequent case is that the CPU is "woken" from a halt
497          * state by the timer interrupt itself, so we get 0 error. In the
498          * rare cases where a driver would "wake" the CPU and request a
499          * timestamp, the maximum error is < 1 jiffy. But timestamps are
500          * still perfectly ordered.
501          * Note that the TSC counter will be reset if APM suspends
502          * to disk; this won't break the kernel, though, 'cuz we're
503          * smart.  See arch/i386/kernel/apm.c.
504          */
505         /*
506          *      Firstly we have to do a CPU check for chips with
507          *      a potentially buggy TSC. At this point we haven't run
508          *      the ident/bugs checks so we must run this hook as it
509          *      may turn off the TSC flag.
510          *
511          *      NOTE: this doesn't yet handle SMP 486 machines where only
512          *      some CPU's have a TSC. Thats never worked and nobody has
513          *      moaned if you have the only one in the world - you fix it!
514          */
515
516         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
517
518         if (cpu_has_tsc) {
519                 unsigned long tsc_quotient;
520 #ifdef CONFIG_HPET_TIMER
521                 if (is_hpet_enabled() && hpet_use_timer) {
522                         unsigned long result, remain;
523                         printk("Using TSC for gettimeofday\n");
524                         tsc_quotient = calibrate_tsc_hpet(NULL);
525                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
526                         /*
527                          * Math to calculate hpet to usec multiplier
528                          * Look for the comments at get_offset_tsc_hpet()
529                          */
530                         ASM_DIV64_REG(result, remain, hpet_tick,
531                                         0, KERNEL_TICK_USEC);
532                         if (remain > (hpet_tick >> 1))
533                                 result++; /* rounding the result */
534
535                         hpet_usec_quotient = result;
536                 } else
537 #endif
538                 {
539                         tsc_quotient = calibrate_tsc();
540                 }
541
542                 if (tsc_quotient) {
543                         fast_gettimeoffset_quotient = tsc_quotient;
544                         use_tsc = 1;
545                         /*
546                          *      We could be more selective here I suspect
547                          *      and just enable this for the next intel chips ?
548                          */
549                         /* report CPU clock rate in Hz.
550                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
551                          * clock/second. Our precision is about 100 ppm.
552                          */
553                         {       unsigned long eax=0, edx=1000;
554                                 __asm__("divl %2"
555                                 :"=a" (cpu_khz), "=d" (edx)
556                                 :"r" (tsc_quotient),
557                                 "0" (eax), "1" (edx));
558                                 printk("Detected %u.%03u MHz processor.\n",
559                                         cpu_khz / 1000, cpu_khz % 1000);
560                         }
561                         set_cyc2ns_scale(cpu_khz);
562                         return 0;
563                 }
564         }
565         return -ENODEV;
566 }
567
568 static int tsc_resume(void)
569 {
570         write_seqlock(&monotonic_lock);
571         /* Assume this is the last mark offset time */
572         rdtsc(last_tsc_low, last_tsc_high);
573 #ifdef CONFIG_HPET_TIMER
574         if (is_hpet_enabled() && hpet_use_timer)
575                 hpet_last = hpet_readl(HPET_COUNTER);
576 #endif
577         write_sequnlock(&monotonic_lock);
578         return 0;
579 }
580
581 #ifndef CONFIG_X86_TSC
582 /* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
583  * in cpu/common.c */
584 static int __init tsc_setup(char *str)
585 {
586         tsc_disable = 1;
587         return 1;
588 }
589 #else
590 static int __init tsc_setup(char *str)
591 {
592         printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
593                                 "cannot disable TSC.\n");
594         return 1;
595 }
596 #endif
597 __setup("notsc", tsc_setup);
598
599
600
601 /************************************************************/
602
603 /* tsc timer_opts struct */
604 static struct timer_opts timer_tsc = {
605         .name = "tsc",
606         .mark_offset = mark_offset_tsc, 
607         .get_offset = get_offset_tsc,
608         .monotonic_clock = monotonic_clock_tsc,
609         .delay = delay_tsc,
610         .read_timer = read_timer_tsc,
611         .resume = tsc_resume,
612 };
613
614 struct init_timer_opts __initdata timer_tsc_init = {
615         .init = init_tsc,
616         .opts = &timer_tsc,
617 };