2  * This code largely moved from arch/i386/kernel/time.c.
 
   3  * See comments there for proper credits.
 
   5  * 2004-06-25    Jesper Juhl
 
   6  *      moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
 
  10 #include <linux/spinlock.h>
 
  11 #include <linux/init.h>
 
  12 #include <linux/timex.h>
 
  13 #include <linux/errno.h>
 
  14 #include <linux/cpufreq.h>
 
  15 #include <linux/string.h>
 
  16 #include <linux/jiffies.h>
 
  18 #include <asm/timer.h>
 
  20 /* processor.h for distable_tsc flag */
 
  21 #include <asm/processor.h>
 
  24 #include "mach_timer.h"
 
  27 #include <asm/i8253.h>
 
  29 #ifdef CONFIG_HPET_TIMER
 
  30 static unsigned long hpet_usec_quotient;
 
  31 static unsigned long hpet_last;
 
  32 static struct timer_opts timer_tsc;
 
  35 static inline void cpufreq_delayed_get(void);
 
  37 int tsc_disable __devinitdata = 0;
 
  40 /* Number of usecs that the last interrupt was delayed */
 
  41 static int delay_at_last_interrupt;
 
  43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
 
  44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
 
  45 static unsigned long long monotonic_base;
 
  46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
 
  48 /* convert from cycles(64bits) => nanoseconds (64bits)
 
  50  *              ns = cycles / (freq / ns_per_sec)
 
  51  *              ns = cycles * (ns_per_sec / freq)
 
  52  *              ns = cycles * (10^9 / (cpu_mhz * 10^6))
 
  53  *              ns = cycles * (10^3 / cpu_mhz)
 
  55  *      Then we use scaling math (suggested by george@mvista.com) to get:
 
  56  *              ns = cycles * (10^3 * SC / cpu_mhz) / SC
 
  57  *              ns = cycles * cyc2ns_scale / SC
 
  59  *      And since SC is a constant power of two, we can convert the div
 
  61  *                      -johnstul@us.ibm.com "math is hard, lets go shopping!"
 
  63 static unsigned long cyc2ns_scale; 
 
  64 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
 
  66 static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
 
  68         cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
 
  71 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
 
  73         return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
 
  76 static int count2; /* counter for mark_offset_tsc() */
 
  78 /* Cached *multiplier* to convert TSC counts to microseconds.
 
  79  * (see the equation below).
 
  80  * Equal to 2^32 * (1 / (clocks per usec) ).
 
  81  * Initialized in time_init.
 
  83 static unsigned long fast_gettimeoffset_quotient;
 
  85 static unsigned long get_offset_tsc(void)
 
  87         register unsigned long eax, edx;
 
  89         /* Read the Time Stamp Counter */
 
  93         /* .. relative to previous jiffy (32 bits is enough) */
 
  94         eax -= last_tsc_low;    /* tsc_low delta */
 
  97          * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
 
  98          *             = (tsc_low delta) * (usecs_per_clock)
 
  99          *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
 
 101          * Using a mull instead of a divl saves up to 31 clock cycles
 
 102          * in the critical path.
 
 106                 :"=a" (eax), "=d" (edx)
 
 107                 :"rm" (fast_gettimeoffset_quotient),
 
 110         /* our adjusted time offset in microseconds */
 
 111         return delay_at_last_interrupt + edx;
 
 114 static unsigned long long monotonic_clock_tsc(void)
 
 116         unsigned long long last_offset, this_offset, base;
 
 119         /* atomically read monotonic base & last_offset */
 
 121                 seq = read_seqbegin(&monotonic_lock);
 
 122                 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 
 123                 base = monotonic_base;
 
 124         } while (read_seqretry(&monotonic_lock, seq));
 
 126         /* Read the Time Stamp Counter */
 
 127         rdtscll(this_offset);
 
 129         /* return the value in ns */
 
 130         return base + cycles_2_ns(this_offset - last_offset);
 
 134  * Scheduler clock - returns current time in nanosec units.
 
 136 unsigned long long sched_clock(void)
 
 138         unsigned long long this_offset;
 
 141          * In the NUMA case we dont use the TSC as they are not
 
 142          * synchronized across all CPUs.
 
 147                 /* no locking but a rare wrong value is not a big deal */
 
 148                 return jiffies_64 * (1000000000 / HZ);
 
 150         /* Read the Time Stamp Counter */
 
 151         rdtscll(this_offset);
 
 153         /* return the value in ns */
 
 154         return cycles_2_ns(this_offset);
 
 157 static void delay_tsc(unsigned long loops)
 
 159         unsigned long bclock, now;
 
 166         } while ((now-bclock) < loops);
 
 169 #ifdef CONFIG_HPET_TIMER
 
 170 static void mark_offset_tsc_hpet(void)
 
 172         unsigned long long this_offset, last_offset;
 
 173         unsigned long offset, temp, hpet_current;
 
 175         write_seqlock(&monotonic_lock);
 
 176         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 
 178          * It is important that these two operations happen almost at
 
 179          * the same time. We do the RDTSC stuff first, since it's
 
 180          * faster. To avoid any inconsistencies, we need interrupts
 
 184          * Interrupts are just disabled locally since the timer irq
 
 185          * has the SA_INTERRUPT flag set. -arca
 
 187         /* read Pentium cycle counter */
 
 189         hpet_current = hpet_readl(HPET_COUNTER);
 
 190         rdtsc(last_tsc_low, last_tsc_high);
 
 192         /* lost tick compensation */
 
 193         offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
 
 194         if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
 
 195                 int lost_ticks = (offset - hpet_last) / hpet_tick;
 
 196                 jiffies_64 += lost_ticks;
 
 198         hpet_last = hpet_current;
 
 200         /* update the monotonic base value */
 
 201         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 
 202         monotonic_base += cycles_2_ns(this_offset - last_offset);
 
 203         write_sequnlock(&monotonic_lock);
 
 205         /* calculate delay_at_last_interrupt */
 
 207          * Time offset = (hpet delta) * ( usecs per HPET clock )
 
 208          *             = (hpet delta) * ( usecs per tick / HPET clocks per tick)
 
 209          *             = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
 
 211          * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
 
 213         delay_at_last_interrupt = hpet_current - offset;
 
 214         ASM_MUL64_REG(temp, delay_at_last_interrupt,
 
 215                         hpet_usec_quotient, delay_at_last_interrupt);
 
 220 #ifdef CONFIG_CPU_FREQ
 
 221 #include <linux/workqueue.h>
 
 223 static unsigned int cpufreq_delayed_issched = 0;
 
 224 static unsigned int cpufreq_init = 0;
 
 225 static struct work_struct cpufreq_delayed_get_work;
 
 227 static void handle_cpufreq_delayed_get(void *v)
 
 230         for_each_online_cpu(cpu) {
 
 233         cpufreq_delayed_issched = 0;
 
 236 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
 
 237  * to verify the CPU frequency the timing core thinks the CPU is running
 
 238  * at is still correct.
 
 240 static inline void cpufreq_delayed_get(void) 
 
 242         if (cpufreq_init && !cpufreq_delayed_issched) {
 
 243                 cpufreq_delayed_issched = 1;
 
 244                 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
 
 245                 schedule_work(&cpufreq_delayed_get_work);
 
 249 /* If the CPU frequency is scaled, TSC-based delays will need a different
 
 250  * loops_per_jiffy value to function properly.
 
 253 static unsigned int  ref_freq = 0;
 
 254 static unsigned long loops_per_jiffy_ref = 0;
 
 257 static unsigned long fast_gettimeoffset_ref = 0;
 
 258 static unsigned int cpu_khz_ref = 0;
 
 262 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 
 265         struct cpufreq_freqs *freq = data;
 
 267         if (val != CPUFREQ_RESUMECHANGE)
 
 268                 write_seqlock_irq(&xtime_lock);
 
 270                 ref_freq = freq->old;
 
 271                 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
 
 273                 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
 
 274                 cpu_khz_ref = cpu_khz;
 
 278         if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
 
 279             (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 
 280             (val == CPUFREQ_RESUMECHANGE)) {
 
 281                 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
 
 282                         cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
 285                         cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
 
 287                         if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
 
 288                                 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
 
 289                                 set_cyc2ns_scale(cpu_khz/1000);
 
 295         if (val != CPUFREQ_RESUMECHANGE)
 
 296                 write_sequnlock_irq(&xtime_lock);
 
 301 static struct notifier_block time_cpufreq_notifier_block = {
 
 302         .notifier_call  = time_cpufreq_notifier
 
 306 static int __init cpufreq_tsc(void)
 
 309         INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
 
 310         ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
 
 311                                         CPUFREQ_TRANSITION_NOTIFIER);
 
 316 core_initcall(cpufreq_tsc);
 
 318 #else /* CONFIG_CPU_FREQ */
 
 319 static inline void cpufreq_delayed_get(void) { return; }
 
 322 int recalibrate_cpu_khz(void)
 
 325         unsigned int cpu_khz_old = cpu_khz;
 
 329                 cpu_data[0].loops_per_jiffy =
 
 330                     cpufreq_scale(cpu_data[0].loops_per_jiffy,
 
 340 EXPORT_SYMBOL(recalibrate_cpu_khz);
 
 342 static void mark_offset_tsc(void)
 
 344         unsigned long lost,delay;
 
 345         unsigned long delta = last_tsc_low;
 
 348         static int count1 = 0;
 
 349         unsigned long long this_offset, last_offset;
 
 350         static int lost_count = 0;
 
 352         write_seqlock(&monotonic_lock);
 
 353         last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 
 355          * It is important that these two operations happen almost at
 
 356          * the same time. We do the RDTSC stuff first, since it's
 
 357          * faster. To avoid any inconsistencies, we need interrupts
 
 362          * Interrupts are just disabled locally since the timer irq
 
 363          * has the SA_INTERRUPT flag set. -arca
 
 366         /* read Pentium cycle counter */
 
 368         rdtsc(last_tsc_low, last_tsc_high);
 
 370         spin_lock(&i8253_lock);
 
 371         outb_p(0x00, PIT_MODE);     /* latch the count ASAP */
 
 373         count = inb_p(PIT_CH0);    /* read the latched count */
 
 374         count |= inb(PIT_CH0) << 8;
 
 377          * VIA686a test code... reset the latch if count > max + 1
 
 378          * from timer_pit.c - cjb
 
 381                 outb_p(0x34, PIT_MODE);
 
 382                 outb_p(LATCH & 0xff, PIT_CH0);
 
 383                 outb(LATCH >> 8, PIT_CH0);
 
 387         spin_unlock(&i8253_lock);
 
 389         if (pit_latch_buggy) {
 
 390                 /* get center value of last 3 time lutch */
 
 391                 if ((count2 >= count && count >= count1)
 
 392                     || (count1 >= count && count >= count2)) {
 
 393                         count2 = count1; count1 = count;
 
 394                 } else if ((count1 >= count2 && count2 >= count)
 
 395                            || (count >= count2 && count2 >= count1)) {
 
 396                         countmp = count;count = count2;
 
 397                         count2 = count1;count1 = countmp;
 
 399                         count2 = count1; count1 = count; count = count1;
 
 403         /* lost tick compensation */
 
 404         delta = last_tsc_low - delta;
 
 406                 register unsigned long eax, edx;
 
 409                 :"=a" (eax), "=d" (edx)
 
 410                 :"rm" (fast_gettimeoffset_quotient),
 
 414         delta += delay_at_last_interrupt;
 
 415         lost = delta/(1000000/HZ);
 
 416         delay = delta%(1000000/HZ);
 
 418                 jiffies_64 += lost-1;
 
 420                 /* sanity check to ensure we're not always losing ticks */
 
 421                 if (lost_count++ > 100) {
 
 422                         printk(KERN_WARNING "Losing too many ticks!\n");
 
 423                         printk(KERN_WARNING "TSC cannot be used as a timesource.  \n");
 
 424                         printk(KERN_WARNING "Possible reasons for this are:\n");
 
 425                         printk(KERN_WARNING "  You're running with Speedstep,\n");
 
 426                         printk(KERN_WARNING "  You don't have DMA enabled for your hard disk (see hdparm),\n");
 
 427                         printk(KERN_WARNING "  Incorrect TSC synchronization on an SMP system (see dmesg).\n");
 
 428                         printk(KERN_WARNING "Falling back to a sane timesource now.\n");
 
 432                 /* ... but give the TSC a fair chance */
 
 434                         cpufreq_delayed_get();
 
 437         /* update the monotonic base value */
 
 438         this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
 
 439         monotonic_base += cycles_2_ns(this_offset - last_offset);
 
 440         write_sequnlock(&monotonic_lock);
 
 442         /* calculate delay_at_last_interrupt */
 
 443         count = ((LATCH-1) - count) * TICK_SIZE;
 
 444         delay_at_last_interrupt = (count + LATCH/2) / LATCH;
 
 446         /* catch corner case where tick rollover occured
 
 447          * between tsc and pit reads (as noted when
 
 448          * usec delta is > 90% # of usecs/tick)
 
 450         if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
 
 454 static int __init init_tsc(char* override)
 
 457         /* check clock override */
 
 458         if (override[0] && strncmp(override,"tsc",3)) {
 
 459 #ifdef CONFIG_HPET_TIMER
 
 460                 if (is_hpet_enabled()) {
 
 461                         printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
 
 470          * If we have APM enabled or the CPU clock speed is variable
 
 471          * (CPU stops clock on HLT or slows clock to save power)
 
 472          * then the TSC timestamps may diverge by up to 1 jiffy from
 
 473          * 'real time' but nothing will break.
 
 474          * The most frequent case is that the CPU is "woken" from a halt
 
 475          * state by the timer interrupt itself, so we get 0 error. In the
 
 476          * rare cases where a driver would "wake" the CPU and request a
 
 477          * timestamp, the maximum error is < 1 jiffy. But timestamps are
 
 478          * still perfectly ordered.
 
 479          * Note that the TSC counter will be reset if APM suspends
 
 480          * to disk; this won't break the kernel, though, 'cuz we're
 
 481          * smart.  See arch/i386/kernel/apm.c.
 
 484          *      Firstly we have to do a CPU check for chips with
 
 485          *      a potentially buggy TSC. At this point we haven't run
 
 486          *      the ident/bugs checks so we must run this hook as it
 
 487          *      may turn off the TSC flag.
 
 489          *      NOTE: this doesn't yet handle SMP 486 machines where only
 
 490          *      some CPU's have a TSC. Thats never worked and nobody has
 
 491          *      moaned if you have the only one in the world - you fix it!
 
 494         count2 = LATCH; /* initialize counter for mark_offset_tsc() */
 
 497                 unsigned long tsc_quotient;
 
 498 #ifdef CONFIG_HPET_TIMER
 
 499                 if (is_hpet_enabled() && hpet_use_timer) {
 
 500                         unsigned long result, remain;
 
 501                         printk("Using TSC for gettimeofday\n");
 
 502                         tsc_quotient = calibrate_tsc_hpet(NULL);
 
 503                         timer_tsc.mark_offset = &mark_offset_tsc_hpet;
 
 505                          * Math to calculate hpet to usec multiplier
 
 506                          * Look for the comments at get_offset_tsc_hpet()
 
 508                         ASM_DIV64_REG(result, remain, hpet_tick,
 
 509                                         0, KERNEL_TICK_USEC);
 
 510                         if (remain > (hpet_tick >> 1))
 
 511                                 result++; /* rounding the result */
 
 513                         hpet_usec_quotient = result;
 
 517                         tsc_quotient = calibrate_tsc();
 
 521                         fast_gettimeoffset_quotient = tsc_quotient;
 
 524                          *      We could be more selective here I suspect
 
 525                          *      and just enable this for the next intel chips ?
 
 527                         /* report CPU clock rate in Hz.
 
 528                          * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
 
 529                          * clock/second. Our precision is about 100 ppm.
 
 531                         {       unsigned long eax=0, edx=1000;
 
 533                                 :"=a" (cpu_khz), "=d" (edx)
 
 535                                 "0" (eax), "1" (edx));
 
 536                                 printk("Detected %u.%03u MHz processor.\n",
 
 537                                         cpu_khz / 1000, cpu_khz % 1000);
 
 539                         set_cyc2ns_scale(cpu_khz/1000);
 
 546 static int tsc_resume(void)
 
 548         write_seqlock(&monotonic_lock);
 
 549         /* Assume this is the last mark offset time */
 
 550         rdtsc(last_tsc_low, last_tsc_high);
 
 551 #ifdef CONFIG_HPET_TIMER
 
 552         if (is_hpet_enabled() && hpet_use_timer)
 
 553                 hpet_last = hpet_readl(HPET_COUNTER);
 
 555         write_sequnlock(&monotonic_lock);
 
 559 #ifndef CONFIG_X86_TSC
 
 560 /* disable flag for tsc.  Takes effect by clearing the TSC cpu flag
 
 562 static int __init tsc_setup(char *str)
 
 568 static int __init tsc_setup(char *str)
 
 570         printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
 
 571                                 "cannot disable TSC.\n");
 
 575 __setup("notsc", tsc_setup);
 
 579 /************************************************************/
 
 581 /* tsc timer_opts struct */
 
 582 static struct timer_opts timer_tsc = {
 
 584         .mark_offset = mark_offset_tsc, 
 
 585         .get_offset = get_offset_tsc,
 
 586         .monotonic_clock = monotonic_clock_tsc,
 
 588         .read_timer = read_timer_tsc,
 
 589         .resume = tsc_resume,
 
 592 struct init_timer_opts __initdata timer_tsc_init = {