perf_counter: unify irq output code
[linux-2.6] / arch / x86 / kernel / cpu / perf_counter.c
1 /*
2  * Performance counter x86 architecture code
3  *
4  *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
5  *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
6  *  Copyright(C) 2009 Jaswinder Singh Rajput
7  *
8  *  For licencing details see kernel-base/COPYING
9  */
10
11 #include <linux/perf_counter.h>
12 #include <linux/capability.h>
13 #include <linux/notifier.h>
14 #include <linux/hardirq.h>
15 #include <linux/kprobes.h>
16 #include <linux/module.h>
17 #include <linux/kdebug.h>
18 #include <linux/sched.h>
19
20 #include <asm/apic.h>
21
22 static bool perf_counters_initialized __read_mostly;
23
24 /*
25  * Number of (generic) HW counters:
26  */
27 static int nr_counters_generic __read_mostly;
28 static u64 perf_counter_mask __read_mostly;
29 static u64 counter_value_mask __read_mostly;
30 static int counter_value_bits __read_mostly;
31
32 static int nr_counters_fixed __read_mostly;
33
34 struct cpu_hw_counters {
35         struct perf_counter     *counters[X86_PMC_IDX_MAX];
36         unsigned long           used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
37         unsigned long           interrupts;
38         u64                     throttle_ctrl;
39         unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
40         int                     enabled;
41 };
42
43 /*
44  * struct pmc_x86_ops - performance counter x86 ops
45  */
46 struct pmc_x86_ops {
47         u64             (*save_disable_all)(void);
48         void            (*restore_all)(u64);
49         u64             (*get_status)(u64);
50         void            (*ack_status)(u64);
51         void            (*enable)(int, u64);
52         void            (*disable)(int, u64);
53         unsigned        eventsel;
54         unsigned        perfctr;
55         u64             (*event_map)(int);
56         u64             (*raw_event)(u64);
57         int             max_events;
58 };
59
60 static struct pmc_x86_ops *pmc_ops __read_mostly;
61
62 static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
63         .enabled = 1,
64 };
65
66 static __read_mostly int intel_perfmon_version;
67
68 /*
69  * Intel PerfMon v3. Used on Core2 and later.
70  */
71 static const u64 intel_perfmon_event_map[] =
72 {
73   [PERF_COUNT_CPU_CYCLES]               = 0x003c,
74   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
75   [PERF_COUNT_CACHE_REFERENCES]         = 0x4f2e,
76   [PERF_COUNT_CACHE_MISSES]             = 0x412e,
77   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
78   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
79   [PERF_COUNT_BUS_CYCLES]               = 0x013c,
80 };
81
82 static u64 pmc_intel_event_map(int event)
83 {
84         return intel_perfmon_event_map[event];
85 }
86
87 static u64 pmc_intel_raw_event(u64 event)
88 {
89 #define CORE_EVNTSEL_EVENT_MASK         0x000000FFULL
90 #define CORE_EVNTSEL_UNIT_MASK          0x0000FF00ULL
91 #define CORE_EVNTSEL_COUNTER_MASK       0xFF000000ULL
92
93 #define CORE_EVNTSEL_MASK               \
94         (CORE_EVNTSEL_EVENT_MASK |      \
95          CORE_EVNTSEL_UNIT_MASK  |      \
96          CORE_EVNTSEL_COUNTER_MASK)
97
98         return event & CORE_EVNTSEL_MASK;
99 }
100
101 /*
102  * AMD Performance Monitor K7 and later.
103  */
104 static const u64 amd_perfmon_event_map[] =
105 {
106   [PERF_COUNT_CPU_CYCLES]               = 0x0076,
107   [PERF_COUNT_INSTRUCTIONS]             = 0x00c0,
108   [PERF_COUNT_CACHE_REFERENCES]         = 0x0080,
109   [PERF_COUNT_CACHE_MISSES]             = 0x0081,
110   [PERF_COUNT_BRANCH_INSTRUCTIONS]      = 0x00c4,
111   [PERF_COUNT_BRANCH_MISSES]            = 0x00c5,
112 };
113
114 static u64 pmc_amd_event_map(int event)
115 {
116         return amd_perfmon_event_map[event];
117 }
118
119 static u64 pmc_amd_raw_event(u64 event)
120 {
121 #define K7_EVNTSEL_EVENT_MASK   0x7000000FFULL
122 #define K7_EVNTSEL_UNIT_MASK    0x00000FF00ULL
123 #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
124
125 #define K7_EVNTSEL_MASK                 \
126         (K7_EVNTSEL_EVENT_MASK |        \
127          K7_EVNTSEL_UNIT_MASK  |        \
128          K7_EVNTSEL_COUNTER_MASK)
129
130         return event & K7_EVNTSEL_MASK;
131 }
132
133 /*
134  * Propagate counter elapsed time into the generic counter.
135  * Can only be executed on the CPU where the counter is active.
136  * Returns the delta events processed.
137  */
138 static void
139 x86_perf_counter_update(struct perf_counter *counter,
140                         struct hw_perf_counter *hwc, int idx)
141 {
142         u64 prev_raw_count, new_raw_count, delta;
143
144         /*
145          * Careful: an NMI might modify the previous counter value.
146          *
147          * Our tactic to handle this is to first atomically read and
148          * exchange a new raw count - then add that new-prev delta
149          * count to the generic counter atomically:
150          */
151 again:
152         prev_raw_count = atomic64_read(&hwc->prev_count);
153         rdmsrl(hwc->counter_base + idx, new_raw_count);
154
155         if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
156                                         new_raw_count) != prev_raw_count)
157                 goto again;
158
159         /*
160          * Now we have the new raw value and have updated the prev
161          * timestamp already. We can now calculate the elapsed delta
162          * (counter-)time and add that to the generic counter.
163          *
164          * Careful, not all hw sign-extends above the physical width
165          * of the count, so we do that by clipping the delta to 32 bits:
166          */
167         delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
168
169         atomic64_add(delta, &counter->count);
170         atomic64_sub(delta, &hwc->period_left);
171 }
172
173 /*
174  * Setup the hardware configuration for a given hw_event_type
175  */
176 static int __hw_perf_counter_init(struct perf_counter *counter)
177 {
178         struct perf_counter_hw_event *hw_event = &counter->hw_event;
179         struct hw_perf_counter *hwc = &counter->hw;
180
181         if (unlikely(!perf_counters_initialized))
182                 return -EINVAL;
183
184         /*
185          * Generate PMC IRQs:
186          * (keep 'enabled' bit clear for now)
187          */
188         hwc->config = ARCH_PERFMON_EVENTSEL_INT;
189
190         /*
191          * Count user and OS events unless requested not to.
192          */
193         if (!hw_event->exclude_user)
194                 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
195         if (!hw_event->exclude_kernel)
196                 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
197
198         /*
199          * If privileged enough, allow NMI events:
200          */
201         hwc->nmi = 0;
202         if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
203                 hwc->nmi = 1;
204
205         hwc->irq_period         = hw_event->irq_period;
206         /*
207          * Intel PMCs cannot be accessed sanely above 32 bit width,
208          * so we install an artificial 1<<31 period regardless of
209          * the generic counter period:
210          */
211         if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
212                 if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
213                         hwc->irq_period = 0x7FFFFFFF;
214
215         atomic64_set(&hwc->period_left, hwc->irq_period);
216
217         /*
218          * Raw event type provide the config in the event structure
219          */
220         if (hw_event->raw_type) {
221                 hwc->config |= pmc_ops->raw_event(hw_event->raw_event_id);
222         } else {
223                 if (hw_event->event_id >= pmc_ops->max_events)
224                         return -EINVAL;
225                 /*
226                  * The generic map:
227                  */
228                 hwc->config |= pmc_ops->event_map(hw_event->event_id);
229         }
230         counter->wakeup_pending = 0;
231
232         return 0;
233 }
234
235 static u64 pmc_intel_save_disable_all(void)
236 {
237         u64 ctrl;
238
239         rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
240         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
241
242         return ctrl;
243 }
244
245 static u64 pmc_amd_save_disable_all(void)
246 {
247         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
248         int enabled, idx;
249
250         enabled = cpuc->enabled;
251         cpuc->enabled = 0;
252         /*
253          * ensure we write the disable before we start disabling the
254          * counters proper, so that pcm_amd_enable() does the right thing.
255          */
256         barrier();
257
258         for (idx = 0; idx < nr_counters_generic; idx++) {
259                 u64 val;
260
261                 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
262                 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
263                         val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
264                         wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
265                 }
266         }
267
268         return enabled;
269 }
270
271 u64 hw_perf_save_disable(void)
272 {
273         if (unlikely(!perf_counters_initialized))
274                 return 0;
275
276         return pmc_ops->save_disable_all();
277 }
278 /*
279  * Exported because of ACPI idle
280  */
281 EXPORT_SYMBOL_GPL(hw_perf_save_disable);
282
283 static void pmc_intel_restore_all(u64 ctrl)
284 {
285         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
286 }
287
288 static void pmc_amd_restore_all(u64 ctrl)
289 {
290         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
291         int idx;
292
293         cpuc->enabled = ctrl;
294         barrier();
295         if (!ctrl)
296                 return;
297
298         for (idx = 0; idx < nr_counters_generic; idx++) {
299                 if (test_bit(idx, cpuc->active_mask)) {
300                         u64 val;
301
302                         rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
303                         val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
304                         wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
305                 }
306         }
307 }
308
309 void hw_perf_restore(u64 ctrl)
310 {
311         if (unlikely(!perf_counters_initialized))
312                 return;
313
314         pmc_ops->restore_all(ctrl);
315 }
316 /*
317  * Exported because of ACPI idle
318  */
319 EXPORT_SYMBOL_GPL(hw_perf_restore);
320
321 static u64 pmc_intel_get_status(u64 mask)
322 {
323         u64 status;
324
325         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
326
327         return status;
328 }
329
330 static u64 pmc_amd_get_status(u64 mask)
331 {
332         u64 status = 0;
333         int idx;
334
335         for (idx = 0; idx < nr_counters_generic; idx++) {
336                 s64 val;
337
338                 if (!(mask & (1 << idx)))
339                         continue;
340
341                 rdmsrl(MSR_K7_PERFCTR0 + idx, val);
342                 val <<= (64 - counter_value_bits);
343                 if (val >= 0)
344                         status |= (1 << idx);
345         }
346
347         return status;
348 }
349
350 static u64 hw_perf_get_status(u64 mask)
351 {
352         if (unlikely(!perf_counters_initialized))
353                 return 0;
354
355         return pmc_ops->get_status(mask);
356 }
357
358 static void pmc_intel_ack_status(u64 ack)
359 {
360         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
361 }
362
363 static void pmc_amd_ack_status(u64 ack)
364 {
365 }
366
367 static void hw_perf_ack_status(u64 ack)
368 {
369         if (unlikely(!perf_counters_initialized))
370                 return;
371
372         pmc_ops->ack_status(ack);
373 }
374
375 static void pmc_intel_enable(int idx, u64 config)
376 {
377         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
378                         config | ARCH_PERFMON_EVENTSEL0_ENABLE);
379 }
380
381 static void pmc_amd_enable(int idx, u64 config)
382 {
383         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
384
385         set_bit(idx, cpuc->active_mask);
386         if (cpuc->enabled)
387                 config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
388
389         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
390 }
391
392 static void hw_perf_enable(int idx, u64 config)
393 {
394         if (unlikely(!perf_counters_initialized))
395                 return;
396
397         pmc_ops->enable(idx, config);
398 }
399
400 static void pmc_intel_disable(int idx, u64 config)
401 {
402         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
403 }
404
405 static void pmc_amd_disable(int idx, u64 config)
406 {
407         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
408
409         clear_bit(idx, cpuc->active_mask);
410         wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
411
412 }
413
414 static void hw_perf_disable(int idx, u64 config)
415 {
416         if (unlikely(!perf_counters_initialized))
417                 return;
418
419         pmc_ops->disable(idx, config);
420 }
421
422 static inline void
423 __pmc_fixed_disable(struct perf_counter *counter,
424                     struct hw_perf_counter *hwc, unsigned int __idx)
425 {
426         int idx = __idx - X86_PMC_IDX_FIXED;
427         u64 ctrl_val, mask;
428         int err;
429
430         mask = 0xfULL << (idx * 4);
431
432         rdmsrl(hwc->config_base, ctrl_val);
433         ctrl_val &= ~mask;
434         err = checking_wrmsrl(hwc->config_base, ctrl_val);
435 }
436
437 static inline void
438 __pmc_generic_disable(struct perf_counter *counter,
439                            struct hw_perf_counter *hwc, unsigned int idx)
440 {
441         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
442                 __pmc_fixed_disable(counter, hwc, idx);
443         else
444                 hw_perf_disable(idx, hwc->config);
445 }
446
447 static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
448
449 /*
450  * Set the next IRQ period, based on the hwc->period_left value.
451  * To be called with the counter disabled in hw:
452  */
453 static void
454 __hw_perf_counter_set_period(struct perf_counter *counter,
455                              struct hw_perf_counter *hwc, int idx)
456 {
457         s64 left = atomic64_read(&hwc->period_left);
458         s64 period = hwc->irq_period;
459         int err;
460
461         /*
462          * If we are way outside a reasoable range then just skip forward:
463          */
464         if (unlikely(left <= -period)) {
465                 left = period;
466                 atomic64_set(&hwc->period_left, left);
467         }
468
469         if (unlikely(left <= 0)) {
470                 left += period;
471                 atomic64_set(&hwc->period_left, left);
472         }
473
474         per_cpu(prev_left[idx], smp_processor_id()) = left;
475
476         /*
477          * The hw counter starts counting from this counter offset,
478          * mark it to be able to extra future deltas:
479          */
480         atomic64_set(&hwc->prev_count, (u64)-left);
481
482         err = checking_wrmsrl(hwc->counter_base + idx,
483                              (u64)(-left) & counter_value_mask);
484 }
485
486 static inline void
487 __pmc_fixed_enable(struct perf_counter *counter,
488                    struct hw_perf_counter *hwc, unsigned int __idx)
489 {
490         int idx = __idx - X86_PMC_IDX_FIXED;
491         u64 ctrl_val, bits, mask;
492         int err;
493
494         /*
495          * Enable IRQ generation (0x8),
496          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
497          * if requested:
498          */
499         bits = 0x8ULL;
500         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
501                 bits |= 0x2;
502         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
503                 bits |= 0x1;
504         bits <<= (idx * 4);
505         mask = 0xfULL << (idx * 4);
506
507         rdmsrl(hwc->config_base, ctrl_val);
508         ctrl_val &= ~mask;
509         ctrl_val |= bits;
510         err = checking_wrmsrl(hwc->config_base, ctrl_val);
511 }
512
513 static void
514 __pmc_generic_enable(struct perf_counter *counter,
515                           struct hw_perf_counter *hwc, int idx)
516 {
517         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
518                 __pmc_fixed_enable(counter, hwc, idx);
519         else
520                 hw_perf_enable(idx, hwc->config);
521 }
522
523 static int
524 fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
525 {
526         unsigned int event;
527
528         if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
529                 return -1;
530
531         if (unlikely(hwc->nmi))
532                 return -1;
533
534         event = hwc->config & ARCH_PERFMON_EVENT_MASK;
535
536         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
537                 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
538         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
539                 return X86_PMC_IDX_FIXED_CPU_CYCLES;
540         if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
541                 return X86_PMC_IDX_FIXED_BUS_CYCLES;
542
543         return -1;
544 }
545
546 /*
547  * Find a PMC slot for the freshly enabled / scheduled in counter:
548  */
549 static int pmc_generic_enable(struct perf_counter *counter)
550 {
551         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
552         struct hw_perf_counter *hwc = &counter->hw;
553         int idx;
554
555         idx = fixed_mode_idx(counter, hwc);
556         if (idx >= 0) {
557                 /*
558                  * Try to get the fixed counter, if that is already taken
559                  * then try to get a generic counter:
560                  */
561                 if (test_and_set_bit(idx, cpuc->used))
562                         goto try_generic;
563
564                 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
565                 /*
566                  * We set it so that counter_base + idx in wrmsr/rdmsr maps to
567                  * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
568                  */
569                 hwc->counter_base =
570                         MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
571                 hwc->idx = idx;
572         } else {
573                 idx = hwc->idx;
574                 /* Try to get the previous generic counter again */
575                 if (test_and_set_bit(idx, cpuc->used)) {
576 try_generic:
577                         idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
578                         if (idx == nr_counters_generic)
579                                 return -EAGAIN;
580
581                         set_bit(idx, cpuc->used);
582                         hwc->idx = idx;
583                 }
584                 hwc->config_base  = pmc_ops->eventsel;
585                 hwc->counter_base = pmc_ops->perfctr;
586         }
587
588         perf_counters_lapic_init(hwc->nmi);
589
590         __pmc_generic_disable(counter, hwc, idx);
591
592         cpuc->counters[idx] = counter;
593         /*
594          * Make it visible before enabling the hw:
595          */
596         smp_wmb();
597
598         __hw_perf_counter_set_period(counter, hwc, idx);
599         __pmc_generic_enable(counter, hwc, idx);
600
601         return 0;
602 }
603
604 void perf_counter_print_debug(void)
605 {
606         u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
607         struct cpu_hw_counters *cpuc;
608         int cpu, idx;
609
610         if (!nr_counters_generic)
611                 return;
612
613         local_irq_disable();
614
615         cpu = smp_processor_id();
616         cpuc = &per_cpu(cpu_hw_counters, cpu);
617
618         if (intel_perfmon_version >= 2) {
619                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
620                 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
621                 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
622                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
623
624                 pr_info("\n");
625                 pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
626                 pr_info("CPU#%d: status:     %016llx\n", cpu, status);
627                 pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
628                 pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
629         }
630         pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
631
632         for (idx = 0; idx < nr_counters_generic; idx++) {
633                 rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
634                 rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
635
636                 prev_left = per_cpu(prev_left[idx], cpu);
637
638                 pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
639                         cpu, idx, pmc_ctrl);
640                 pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
641                         cpu, idx, pmc_count);
642                 pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
643                         cpu, idx, prev_left);
644         }
645         for (idx = 0; idx < nr_counters_fixed; idx++) {
646                 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
647
648                 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
649                         cpu, idx, pmc_count);
650         }
651         local_irq_enable();
652 }
653
654 static void pmc_generic_disable(struct perf_counter *counter)
655 {
656         struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
657         struct hw_perf_counter *hwc = &counter->hw;
658         unsigned int idx = hwc->idx;
659
660         __pmc_generic_disable(counter, hwc, idx);
661
662         clear_bit(idx, cpuc->used);
663         cpuc->counters[idx] = NULL;
664         /*
665          * Make sure the cleared pointer becomes visible before we
666          * (potentially) free the counter:
667          */
668         smp_wmb();
669
670         /*
671          * Drain the remaining delta count out of a counter
672          * that we are disabling:
673          */
674         x86_perf_counter_update(counter, hwc, idx);
675 }
676
677 /*
678  * Save and restart an expired counter. Called by NMI contexts,
679  * so it has to be careful about preempting normal counter ops:
680  */
681 static void perf_save_and_restart(struct perf_counter *counter)
682 {
683         struct hw_perf_counter *hwc = &counter->hw;
684         int idx = hwc->idx;
685
686         x86_perf_counter_update(counter, hwc, idx);
687         __hw_perf_counter_set_period(counter, hwc, idx);
688
689         if (counter->state == PERF_COUNTER_STATE_ACTIVE)
690                 __pmc_generic_enable(counter, hwc, idx);
691 }
692
693 /*
694  * Maximum interrupt frequency of 100KHz per CPU
695  */
696 #define PERFMON_MAX_INTERRUPTS (100000/HZ)
697
698 /*
699  * This handler is triggered by the local APIC, so the APIC IRQ handling
700  * rules apply:
701  */
702 static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
703 {
704         int bit, cpu = smp_processor_id();
705         u64 ack, status;
706         struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
707         int ret = 0;
708
709         cpuc->throttle_ctrl = hw_perf_save_disable();
710
711         status = hw_perf_get_status(cpuc->throttle_ctrl);
712         if (!status)
713                 goto out;
714
715         ret = 1;
716 again:
717         inc_irq_stat(apic_perf_irqs);
718         ack = status;
719         for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
720                 struct perf_counter *counter = cpuc->counters[bit];
721
722                 clear_bit(bit, (unsigned long *) &status);
723                 if (!counter)
724                         continue;
725
726                 perf_save_and_restart(counter);
727                 perf_counter_output(counter, nmi, regs);
728         }
729
730         hw_perf_ack_status(ack);
731
732         /*
733          * Repeat if there is more work to be done:
734          */
735         status = hw_perf_get_status(cpuc->throttle_ctrl);
736         if (status)
737                 goto again;
738 out:
739         /*
740          * Restore - do not reenable when global enable is off or throttled:
741          */
742         if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
743                 hw_perf_restore(cpuc->throttle_ctrl);
744
745         return ret;
746 }
747
748 void perf_counter_unthrottle(void)
749 {
750         struct cpu_hw_counters *cpuc;
751
752         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
753                 return;
754
755         if (unlikely(!perf_counters_initialized))
756                 return;
757
758         cpuc = &__get_cpu_var(cpu_hw_counters);
759         if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
760                 if (printk_ratelimit())
761                         printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
762                 hw_perf_restore(cpuc->throttle_ctrl);
763         }
764         cpuc->interrupts = 0;
765 }
766
767 void smp_perf_counter_interrupt(struct pt_regs *regs)
768 {
769         irq_enter();
770         apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
771         ack_APIC_irq();
772         __smp_perf_counter_interrupt(regs, 0);
773         irq_exit();
774 }
775
776 /*
777  * This handler is triggered by NMI contexts:
778  */
779 void perf_counter_notify(struct pt_regs *regs)
780 {
781         struct cpu_hw_counters *cpuc;
782         unsigned long flags;
783         int bit, cpu;
784
785         local_irq_save(flags);
786         cpu = smp_processor_id();
787         cpuc = &per_cpu(cpu_hw_counters, cpu);
788
789         for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
790                 struct perf_counter *counter = cpuc->counters[bit];
791
792                 if (!counter)
793                         continue;
794
795                 if (counter->wakeup_pending) {
796                         counter->wakeup_pending = 0;
797                         wake_up(&counter->waitq);
798                 }
799         }
800
801         local_irq_restore(flags);
802 }
803
804 void perf_counters_lapic_init(int nmi)
805 {
806         u32 apic_val;
807
808         if (!perf_counters_initialized)
809                 return;
810         /*
811          * Enable the performance counter vector in the APIC LVT:
812          */
813         apic_val = apic_read(APIC_LVTERR);
814
815         apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
816         if (nmi)
817                 apic_write(APIC_LVTPC, APIC_DM_NMI);
818         else
819                 apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
820         apic_write(APIC_LVTERR, apic_val);
821 }
822
823 static int __kprobes
824 perf_counter_nmi_handler(struct notifier_block *self,
825                          unsigned long cmd, void *__args)
826 {
827         struct die_args *args = __args;
828         struct pt_regs *regs;
829         int ret;
830
831         switch (cmd) {
832         case DIE_NMI:
833         case DIE_NMI_IPI:
834                 break;
835
836         default:
837                 return NOTIFY_DONE;
838         }
839
840         regs = args->regs;
841
842         apic_write(APIC_LVTPC, APIC_DM_NMI);
843         ret = __smp_perf_counter_interrupt(regs, 1);
844
845         return ret ? NOTIFY_STOP : NOTIFY_OK;
846 }
847
848 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
849         .notifier_call          = perf_counter_nmi_handler,
850         .next                   = NULL,
851         .priority               = 1
852 };
853
854 static struct pmc_x86_ops pmc_intel_ops = {
855         .save_disable_all       = pmc_intel_save_disable_all,
856         .restore_all            = pmc_intel_restore_all,
857         .get_status             = pmc_intel_get_status,
858         .ack_status             = pmc_intel_ack_status,
859         .enable                 = pmc_intel_enable,
860         .disable                = pmc_intel_disable,
861         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
862         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
863         .event_map              = pmc_intel_event_map,
864         .raw_event              = pmc_intel_raw_event,
865         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
866 };
867
868 static struct pmc_x86_ops pmc_amd_ops = {
869         .save_disable_all       = pmc_amd_save_disable_all,
870         .restore_all            = pmc_amd_restore_all,
871         .get_status             = pmc_amd_get_status,
872         .ack_status             = pmc_amd_ack_status,
873         .enable                 = pmc_amd_enable,
874         .disable                = pmc_amd_disable,
875         .eventsel               = MSR_K7_EVNTSEL0,
876         .perfctr                = MSR_K7_PERFCTR0,
877         .event_map              = pmc_amd_event_map,
878         .raw_event              = pmc_amd_raw_event,
879         .max_events             = ARRAY_SIZE(amd_perfmon_event_map),
880 };
881
882 static struct pmc_x86_ops *pmc_intel_init(void)
883 {
884         union cpuid10_edx edx;
885         union cpuid10_eax eax;
886         unsigned int unused;
887         unsigned int ebx;
888
889         /*
890          * Check whether the Architectural PerfMon supports
891          * Branch Misses Retired Event or not.
892          */
893         cpuid(10, &eax.full, &ebx, &unused, &edx.full);
894         if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
895                 return NULL;
896
897         intel_perfmon_version = eax.split.version_id;
898         if (intel_perfmon_version < 2)
899                 return NULL;
900
901         pr_info("Intel Performance Monitoring support detected.\n");
902         pr_info("... version:         %d\n", intel_perfmon_version);
903         pr_info("... bit width:       %d\n", eax.split.bit_width);
904         pr_info("... mask length:     %d\n", eax.split.mask_length);
905
906         nr_counters_generic = eax.split.num_counters;
907         nr_counters_fixed = edx.split.num_counters_fixed;
908         counter_value_mask = (1ULL << eax.split.bit_width) - 1;
909
910         return &pmc_intel_ops;
911 }
912
913 static struct pmc_x86_ops *pmc_amd_init(void)
914 {
915         nr_counters_generic = 4;
916         nr_counters_fixed = 0;
917         counter_value_mask = 0x0000FFFFFFFFFFFFULL;
918         counter_value_bits = 48;
919
920         pr_info("AMD Performance Monitoring support detected.\n");
921
922         return &pmc_amd_ops;
923 }
924
925 void __init init_hw_perf_counters(void)
926 {
927         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
928                 return;
929
930         switch (boot_cpu_data.x86_vendor) {
931         case X86_VENDOR_INTEL:
932                 pmc_ops = pmc_intel_init();
933                 break;
934         case X86_VENDOR_AMD:
935                 pmc_ops = pmc_amd_init();
936                 break;
937         }
938         if (!pmc_ops)
939                 return;
940
941         pr_info("... num counters:    %d\n", nr_counters_generic);
942         if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
943                 nr_counters_generic = X86_PMC_MAX_GENERIC;
944                 WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
945                         nr_counters_generic, X86_PMC_MAX_GENERIC);
946         }
947         perf_counter_mask = (1 << nr_counters_generic) - 1;
948         perf_max_counters = nr_counters_generic;
949
950         pr_info("... value mask:      %016Lx\n", counter_value_mask);
951
952         if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
953                 nr_counters_fixed = X86_PMC_MAX_FIXED;
954                 WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
955                         nr_counters_fixed, X86_PMC_MAX_FIXED);
956         }
957         pr_info("... fixed counters:  %d\n", nr_counters_fixed);
958
959         perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
960
961         pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
962         perf_counters_initialized = true;
963
964         perf_counters_lapic_init(0);
965         register_die_notifier(&perf_counter_nmi_notifier);
966 }
967
968 static void pmc_generic_read(struct perf_counter *counter)
969 {
970         x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
971 }
972
973 static const struct hw_perf_counter_ops x86_perf_counter_ops = {
974         .enable         = pmc_generic_enable,
975         .disable        = pmc_generic_disable,
976         .read           = pmc_generic_read,
977 };
978
979 const struct hw_perf_counter_ops *
980 hw_perf_counter_init(struct perf_counter *counter)
981 {
982         int err;
983
984         err = __hw_perf_counter_init(counter);
985         if (err)
986                 return NULL;
987
988         return &x86_perf_counter_ops;
989 }