Merge branch 'linus' into perfcounters/core
[linux-2.6] / arch / powerpc / kernel / perf_counter.c
1 /*
2  * Performance counter support - powerpc architecture code
3  *
4  * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/perf_counter.h>
14 #include <linux/percpu.h>
15 #include <linux/hardirq.h>
16 #include <asm/reg.h>
17 #include <asm/pmc.h>
18 #include <asm/machdep.h>
19 #include <asm/firmware.h>
20
21 struct cpu_hw_counters {
22         int n_counters;
23         int n_percpu;
24         int disabled;
25         int n_added;
26         struct perf_counter *counter[MAX_HWCOUNTERS];
27         unsigned int events[MAX_HWCOUNTERS];
28         u64 mmcr[3];
29         u8 pmcs_enabled;
30 };
31 DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
32
33 struct power_pmu *ppmu;
34
35 void perf_counter_print_debug(void)
36 {
37 }
38
39 /*
40  * Read one performance monitor counter (PMC).
41  */
42 static unsigned long read_pmc(int idx)
43 {
44         unsigned long val;
45
46         switch (idx) {
47         case 1:
48                 val = mfspr(SPRN_PMC1);
49                 break;
50         case 2:
51                 val = mfspr(SPRN_PMC2);
52                 break;
53         case 3:
54                 val = mfspr(SPRN_PMC3);
55                 break;
56         case 4:
57                 val = mfspr(SPRN_PMC4);
58                 break;
59         case 5:
60                 val = mfspr(SPRN_PMC5);
61                 break;
62         case 6:
63                 val = mfspr(SPRN_PMC6);
64                 break;
65         case 7:
66                 val = mfspr(SPRN_PMC7);
67                 break;
68         case 8:
69                 val = mfspr(SPRN_PMC8);
70                 break;
71         default:
72                 printk(KERN_ERR "oops trying to read PMC%d\n", idx);
73                 val = 0;
74         }
75         return val;
76 }
77
78 /*
79  * Write one PMC.
80  */
81 static void write_pmc(int idx, unsigned long val)
82 {
83         switch (idx) {
84         case 1:
85                 mtspr(SPRN_PMC1, val);
86                 break;
87         case 2:
88                 mtspr(SPRN_PMC2, val);
89                 break;
90         case 3:
91                 mtspr(SPRN_PMC3, val);
92                 break;
93         case 4:
94                 mtspr(SPRN_PMC4, val);
95                 break;
96         case 5:
97                 mtspr(SPRN_PMC5, val);
98                 break;
99         case 6:
100                 mtspr(SPRN_PMC6, val);
101                 break;
102         case 7:
103                 mtspr(SPRN_PMC7, val);
104                 break;
105         case 8:
106                 mtspr(SPRN_PMC8, val);
107                 break;
108         default:
109                 printk(KERN_ERR "oops trying to write PMC%d\n", idx);
110         }
111 }
112
113 /*
114  * Check if a set of events can all go on the PMU at once.
115  * If they can't, this will look at alternative codes for the events
116  * and see if any combination of alternative codes is feasible.
117  * The feasible set is returned in event[].
118  */
119 static int power_check_constraints(unsigned int event[], int n_ev)
120 {
121         u64 mask, value, nv;
122         unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
123         u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
124         u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
125         u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
126         int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
127         int i, j;
128         u64 addf = ppmu->add_fields;
129         u64 tadd = ppmu->test_adder;
130
131         if (n_ev > ppmu->n_counter)
132                 return -1;
133
134         /* First see if the events will go on as-is */
135         for (i = 0; i < n_ev; ++i) {
136                 alternatives[i][0] = event[i];
137                 if (ppmu->get_constraint(event[i], &amasks[i][0],
138                                          &avalues[i][0]))
139                         return -1;
140                 choice[i] = 0;
141         }
142         value = mask = 0;
143         for (i = 0; i < n_ev; ++i) {
144                 nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
145                 if ((((nv + tadd) ^ value) & mask) != 0 ||
146                     (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
147                         break;
148                 value = nv;
149                 mask |= amasks[i][0];
150         }
151         if (i == n_ev)
152                 return 0;       /* all OK */
153
154         /* doesn't work, gather alternatives... */
155         if (!ppmu->get_alternatives)
156                 return -1;
157         for (i = 0; i < n_ev; ++i) {
158                 n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
159                 for (j = 1; j < n_alt[i]; ++j)
160                         ppmu->get_constraint(alternatives[i][j],
161                                              &amasks[i][j], &avalues[i][j]);
162         }
163
164         /* enumerate all possibilities and see if any will work */
165         i = 0;
166         j = -1;
167         value = mask = nv = 0;
168         while (i < n_ev) {
169                 if (j >= 0) {
170                         /* we're backtracking, restore context */
171                         value = svalues[i];
172                         mask = smasks[i];
173                         j = choice[i];
174                 }
175                 /*
176                  * See if any alternative k for event i,
177                  * where k > j, will satisfy the constraints.
178                  */
179                 while (++j < n_alt[i]) {
180                         nv = (value | avalues[i][j]) +
181                                 (value & avalues[i][j] & addf);
182                         if ((((nv + tadd) ^ value) & mask) == 0 &&
183                             (((nv + tadd) ^ avalues[i][j])
184                              & amasks[i][j]) == 0)
185                                 break;
186                 }
187                 if (j >= n_alt[i]) {
188                         /*
189                          * No feasible alternative, backtrack
190                          * to event i-1 and continue enumerating its
191                          * alternatives from where we got up to.
192                          */
193                         if (--i < 0)
194                                 return -1;
195                 } else {
196                         /*
197                          * Found a feasible alternative for event i,
198                          * remember where we got up to with this event,
199                          * go on to the next event, and start with
200                          * the first alternative for it.
201                          */
202                         choice[i] = j;
203                         svalues[i] = value;
204                         smasks[i] = mask;
205                         value = nv;
206                         mask |= amasks[i][j];
207                         ++i;
208                         j = -1;
209                 }
210         }
211
212         /* OK, we have a feasible combination, tell the caller the solution */
213         for (i = 0; i < n_ev; ++i)
214                 event[i] = alternatives[i][choice[i]];
215         return 0;
216 }
217
218 /*
219  * Check if newly-added counters have consistent settings for
220  * exclude_{user,kernel,hv} with each other and any previously
221  * added counters.
222  */
223 static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
224 {
225         int eu, ek, eh;
226         int i, n;
227         struct perf_counter *counter;
228
229         n = n_prev + n_new;
230         if (n <= 1)
231                 return 0;
232
233         eu = ctrs[0]->hw_event.exclude_user;
234         ek = ctrs[0]->hw_event.exclude_kernel;
235         eh = ctrs[0]->hw_event.exclude_hv;
236         if (n_prev == 0)
237                 n_prev = 1;
238         for (i = n_prev; i < n; ++i) {
239                 counter = ctrs[i];
240                 if (counter->hw_event.exclude_user != eu ||
241                     counter->hw_event.exclude_kernel != ek ||
242                     counter->hw_event.exclude_hv != eh)
243                         return -EAGAIN;
244         }
245         return 0;
246 }
247
248 static void power_perf_read(struct perf_counter *counter)
249 {
250         long val, delta, prev;
251
252         if (!counter->hw.idx)
253                 return;
254         /*
255          * Performance monitor interrupts come even when interrupts
256          * are soft-disabled, as long as interrupts are hard-enabled.
257          * Therefore we treat them like NMIs.
258          */
259         do {
260                 prev = atomic64_read(&counter->hw.prev_count);
261                 barrier();
262                 val = read_pmc(counter->hw.idx);
263         } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
264
265         /* The counters are only 32 bits wide */
266         delta = (val - prev) & 0xfffffffful;
267         atomic64_add(delta, &counter->count);
268         atomic64_sub(delta, &counter->hw.period_left);
269 }
270
271 /*
272  * Disable all counters to prevent PMU interrupts and to allow
273  * counters to be added or removed.
274  */
275 u64 hw_perf_save_disable(void)
276 {
277         struct cpu_hw_counters *cpuhw;
278         unsigned long ret;
279         unsigned long flags;
280
281         local_irq_save(flags);
282         cpuhw = &__get_cpu_var(cpu_hw_counters);
283
284         ret = cpuhw->disabled;
285         if (!ret) {
286                 cpuhw->disabled = 1;
287                 cpuhw->n_added = 0;
288
289                 /*
290                  * Check if we ever enabled the PMU on this cpu.
291                  */
292                 if (!cpuhw->pmcs_enabled) {
293                         if (ppc_md.enable_pmcs)
294                                 ppc_md.enable_pmcs();
295                         cpuhw->pmcs_enabled = 1;
296                 }
297
298                 /*
299                  * Set the 'freeze counters' bit.
300                  * The barrier is to make sure the mtspr has been
301                  * executed and the PMU has frozen the counters
302                  * before we return.
303                  */
304                 mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
305                 mb();
306         }
307         local_irq_restore(flags);
308         return ret;
309 }
310
311 /*
312  * Re-enable all counters if disable == 0.
313  * If we were previously disabled and counters were added, then
314  * put the new config on the PMU.
315  */
316 void hw_perf_restore(u64 disable)
317 {
318         struct perf_counter *counter;
319         struct cpu_hw_counters *cpuhw;
320         unsigned long flags;
321         long i;
322         unsigned long val;
323         s64 left;
324         unsigned int hwc_index[MAX_HWCOUNTERS];
325
326         if (disable)
327                 return;
328         local_irq_save(flags);
329         cpuhw = &__get_cpu_var(cpu_hw_counters);
330         cpuhw->disabled = 0;
331
332         /*
333          * If we didn't change anything, or only removed counters,
334          * no need to recalculate MMCR* settings and reset the PMCs.
335          * Just reenable the PMU with the current MMCR* settings
336          * (possibly updated for removal of counters).
337          */
338         if (!cpuhw->n_added) {
339                 mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
340                 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
341                 mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
342                 if (cpuhw->n_counters == 0)
343                         get_lppaca()->pmcregs_in_use = 0;
344                 goto out;
345         }
346
347         /*
348          * Compute MMCR* values for the new set of counters
349          */
350         if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
351                                cpuhw->mmcr)) {
352                 /* shouldn't ever get here */
353                 printk(KERN_ERR "oops compute_mmcr failed\n");
354                 goto out;
355         }
356
357         /*
358          * Add in MMCR0 freeze bits corresponding to the
359          * hw_event.exclude_* bits for the first counter.
360          * We have already checked that all counters have the
361          * same values for these bits as the first counter.
362          */
363         counter = cpuhw->counter[0];
364         if (counter->hw_event.exclude_user)
365                 cpuhw->mmcr[0] |= MMCR0_FCP;
366         if (counter->hw_event.exclude_kernel)
367                 cpuhw->mmcr[0] |= MMCR0_FCS;
368         if (counter->hw_event.exclude_hv)
369                 cpuhw->mmcr[0] |= MMCR0_FCHV;
370
371         /*
372          * Write the new configuration to MMCR* with the freeze
373          * bit set and set the hardware counters to their initial values.
374          * Then unfreeze the counters.
375          */
376         get_lppaca()->pmcregs_in_use = 1;
377         mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
378         mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
379         mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
380                                 | MMCR0_FC);
381
382         /*
383          * Read off any pre-existing counters that need to move
384          * to another PMC.
385          */
386         for (i = 0; i < cpuhw->n_counters; ++i) {
387                 counter = cpuhw->counter[i];
388                 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
389                         power_perf_read(counter);
390                         write_pmc(counter->hw.idx, 0);
391                         counter->hw.idx = 0;
392                 }
393         }
394
395         /*
396          * Initialize the PMCs for all the new and moved counters.
397          */
398         for (i = 0; i < cpuhw->n_counters; ++i) {
399                 counter = cpuhw->counter[i];
400                 if (counter->hw.idx)
401                         continue;
402                 val = 0;
403                 if (counter->hw_event.irq_period) {
404                         left = atomic64_read(&counter->hw.period_left);
405                         if (left < 0x80000000L)
406                                 val = 0x80000000L - left;
407                 }
408                 atomic64_set(&counter->hw.prev_count, val);
409                 counter->hw.idx = hwc_index[i] + 1;
410                 write_pmc(counter->hw.idx, val);
411         }
412         mb();
413         cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
414         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
415
416  out:
417         local_irq_restore(flags);
418 }
419
420 static int collect_events(struct perf_counter *group, int max_count,
421                           struct perf_counter *ctrs[], unsigned int *events)
422 {
423         int n = 0;
424         struct perf_counter *counter;
425
426         if (!is_software_counter(group)) {
427                 if (n >= max_count)
428                         return -1;
429                 ctrs[n] = group;
430                 events[n++] = group->hw.config;
431         }
432         list_for_each_entry(counter, &group->sibling_list, list_entry) {
433                 if (!is_software_counter(counter) &&
434                     counter->state != PERF_COUNTER_STATE_OFF) {
435                         if (n >= max_count)
436                                 return -1;
437                         ctrs[n] = counter;
438                         events[n++] = counter->hw.config;
439                 }
440         }
441         return n;
442 }
443
444 static void counter_sched_in(struct perf_counter *counter, int cpu)
445 {
446         counter->state = PERF_COUNTER_STATE_ACTIVE;
447         counter->oncpu = cpu;
448         if (is_software_counter(counter))
449                 counter->hw_ops->enable(counter);
450 }
451
452 /*
453  * Called to enable a whole group of counters.
454  * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
455  * Assumes the caller has disabled interrupts and has
456  * frozen the PMU with hw_perf_save_disable.
457  */
458 int hw_perf_group_sched_in(struct perf_counter *group_leader,
459                struct perf_cpu_context *cpuctx,
460                struct perf_counter_context *ctx, int cpu)
461 {
462         struct cpu_hw_counters *cpuhw;
463         long i, n, n0;
464         struct perf_counter *sub;
465
466         cpuhw = &__get_cpu_var(cpu_hw_counters);
467         n0 = cpuhw->n_counters;
468         n = collect_events(group_leader, ppmu->n_counter - n0,
469                            &cpuhw->counter[n0], &cpuhw->events[n0]);
470         if (n < 0)
471                 return -EAGAIN;
472         if (check_excludes(cpuhw->counter, n0, n))
473                 return -EAGAIN;
474         if (power_check_constraints(cpuhw->events, n + n0))
475                 return -EAGAIN;
476         cpuhw->n_counters = n0 + n;
477         cpuhw->n_added += n;
478
479         /*
480          * OK, this group can go on; update counter states etc.,
481          * and enable any software counters
482          */
483         for (i = n0; i < n0 + n; ++i)
484                 cpuhw->counter[i]->hw.config = cpuhw->events[i];
485         cpuctx->active_oncpu += n;
486         n = 1;
487         counter_sched_in(group_leader, cpu);
488         list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
489                 if (sub->state != PERF_COUNTER_STATE_OFF) {
490                         counter_sched_in(sub, cpu);
491                         ++n;
492                 }
493         }
494         ctx->nr_active += n;
495
496         return 1;
497 }
498
499 /*
500  * Add a counter to the PMU.
501  * If all counters are not already frozen, then we disable and
502  * re-enable the PMU in order to get hw_perf_restore to do the
503  * actual work of reconfiguring the PMU.
504  */
505 static int power_perf_enable(struct perf_counter *counter)
506 {
507         struct cpu_hw_counters *cpuhw;
508         unsigned long flags;
509         u64 pmudis;
510         int n0;
511         int ret = -EAGAIN;
512
513         local_irq_save(flags);
514         pmudis = hw_perf_save_disable();
515
516         /*
517          * Add the counter to the list (if there is room)
518          * and check whether the total set is still feasible.
519          */
520         cpuhw = &__get_cpu_var(cpu_hw_counters);
521         n0 = cpuhw->n_counters;
522         if (n0 >= ppmu->n_counter)
523                 goto out;
524         cpuhw->counter[n0] = counter;
525         cpuhw->events[n0] = counter->hw.config;
526         if (check_excludes(cpuhw->counter, n0, 1))
527                 goto out;
528         if (power_check_constraints(cpuhw->events, n0 + 1))
529                 goto out;
530
531         counter->hw.config = cpuhw->events[n0];
532         ++cpuhw->n_counters;
533         ++cpuhw->n_added;
534
535         ret = 0;
536  out:
537         hw_perf_restore(pmudis);
538         local_irq_restore(flags);
539         return ret;
540 }
541
542 /*
543  * Remove a counter from the PMU.
544  */
545 static void power_perf_disable(struct perf_counter *counter)
546 {
547         struct cpu_hw_counters *cpuhw;
548         long i;
549         u64 pmudis;
550         unsigned long flags;
551
552         local_irq_save(flags);
553         pmudis = hw_perf_save_disable();
554
555         power_perf_read(counter);
556
557         cpuhw = &__get_cpu_var(cpu_hw_counters);
558         for (i = 0; i < cpuhw->n_counters; ++i) {
559                 if (counter == cpuhw->counter[i]) {
560                         while (++i < cpuhw->n_counters)
561                                 cpuhw->counter[i-1] = cpuhw->counter[i];
562                         --cpuhw->n_counters;
563                         ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
564                         write_pmc(counter->hw.idx, 0);
565                         counter->hw.idx = 0;
566                         break;
567                 }
568         }
569         if (cpuhw->n_counters == 0) {
570                 /* disable exceptions if no counters are running */
571                 cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
572         }
573
574         hw_perf_restore(pmudis);
575         local_irq_restore(flags);
576 }
577
578 struct hw_perf_counter_ops power_perf_ops = {
579         .enable = power_perf_enable,
580         .disable = power_perf_disable,
581         .read = power_perf_read
582 };
583
584 const struct hw_perf_counter_ops *
585 hw_perf_counter_init(struct perf_counter *counter)
586 {
587         unsigned long ev;
588         struct perf_counter *ctrs[MAX_HWCOUNTERS];
589         unsigned int events[MAX_HWCOUNTERS];
590         int n;
591
592         if (!ppmu)
593                 return NULL;
594         if ((s64)counter->hw_event.irq_period < 0)
595                 return NULL;
596         ev = counter->hw_event.type;
597         if (!counter->hw_event.raw) {
598                 if (ev >= ppmu->n_generic ||
599                     ppmu->generic_events[ev] == 0)
600                         return NULL;
601                 ev = ppmu->generic_events[ev];
602         }
603         counter->hw.config_base = ev;
604         counter->hw.idx = 0;
605
606         /*
607          * If we are not running on a hypervisor, force the
608          * exclude_hv bit to 0 so that we don't care what
609          * the user set it to.  This also means that we don't
610          * set the MMCR0_FCHV bit, which unconditionally freezes
611          * the counters on the PPC970 variants used in Apple G5
612          * machines (since MSR.HV is always 1 on those machines).
613          */
614         if (!firmware_has_feature(FW_FEATURE_LPAR))
615                 counter->hw_event.exclude_hv = 0;
616         
617         /*
618          * If this is in a group, check if it can go on with all the
619          * other hardware counters in the group.  We assume the counter
620          * hasn't been linked into its leader's sibling list at this point.
621          */
622         n = 0;
623         if (counter->group_leader != counter) {
624                 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
625                                    ctrs, events);
626                 if (n < 0)
627                         return NULL;
628         }
629         events[n] = ev;
630         if (check_excludes(ctrs, n, 1))
631                 return NULL;
632         if (power_check_constraints(events, n + 1))
633                 return NULL;
634
635         counter->hw.config = events[n];
636         atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
637         return &power_perf_ops;
638 }
639
640 /*
641  * Handle wakeups.
642  */
643 void perf_counter_do_pending(void)
644 {
645         int i;
646         struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
647         struct perf_counter *counter;
648
649         set_perf_counter_pending(0);
650         for (i = 0; i < cpuhw->n_counters; ++i) {
651                 counter = cpuhw->counter[i];
652                 if (counter && counter->wakeup_pending) {
653                         counter->wakeup_pending = 0;
654                         wake_up(&counter->waitq);
655                 }
656         }
657 }
658
659 /*
660  * Record data for an irq counter.
661  * This function was lifted from the x86 code; maybe it should
662  * go in the core?
663  */
664 static void perf_store_irq_data(struct perf_counter *counter, u64 data)
665 {
666         struct perf_data *irqdata = counter->irqdata;
667
668         if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
669                 irqdata->overrun++;
670         } else {
671                 u64 *p = (u64 *) &irqdata->data[irqdata->len];
672
673                 *p = data;
674                 irqdata->len += sizeof(u64);
675         }
676 }
677
678 /*
679  * Record all the values of the counters in a group
680  */
681 static void perf_handle_group(struct perf_counter *counter)
682 {
683         struct perf_counter *leader, *sub;
684
685         leader = counter->group_leader;
686         list_for_each_entry(sub, &leader->sibling_list, list_entry) {
687                 if (sub != counter)
688                         sub->hw_ops->read(sub);
689                 perf_store_irq_data(counter, sub->hw_event.type);
690                 perf_store_irq_data(counter, atomic64_read(&sub->count));
691         }
692 }
693
694 /*
695  * A counter has overflowed; update its count and record
696  * things if requested.  Note that interrupts are hard-disabled
697  * here so there is no possibility of being interrupted.
698  */
699 static void record_and_restart(struct perf_counter *counter, long val,
700                                struct pt_regs *regs)
701 {
702         s64 prev, delta, left;
703         int record = 0;
704
705         /* we don't have to worry about interrupts here */
706         prev = atomic64_read(&counter->hw.prev_count);
707         delta = (val - prev) & 0xfffffffful;
708         atomic64_add(delta, &counter->count);
709
710         /*
711          * See if the total period for this counter has expired,
712          * and update for the next period.
713          */
714         val = 0;
715         left = atomic64_read(&counter->hw.period_left) - delta;
716         if (counter->hw_event.irq_period) {
717                 if (left <= 0) {
718                         left += counter->hw_event.irq_period;
719                         if (left <= 0)
720                                 left = counter->hw_event.irq_period;
721                         record = 1;
722                 }
723                 if (left < 0x80000000L)
724                         val = 0x80000000L - left;
725         }
726         write_pmc(counter->hw.idx, val);
727         atomic64_set(&counter->hw.prev_count, val);
728         atomic64_set(&counter->hw.period_left, left);
729
730         /*
731          * Finally record data if requested.
732          */
733         if (record) {
734                 switch (counter->hw_event.record_type) {
735                 case PERF_RECORD_SIMPLE:
736                         break;
737                 case PERF_RECORD_IRQ:
738                         perf_store_irq_data(counter, instruction_pointer(regs));
739                         counter->wakeup_pending = 1;
740                         break;
741                 case PERF_RECORD_GROUP:
742                         perf_handle_group(counter);
743                         counter->wakeup_pending = 1;
744                         break;
745                 }
746         }
747 }
748
749 /*
750  * Performance monitor interrupt stuff
751  */
752 static void perf_counter_interrupt(struct pt_regs *regs)
753 {
754         int i;
755         struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
756         struct perf_counter *counter;
757         long val;
758         int need_wakeup = 0, found = 0;
759
760         for (i = 0; i < cpuhw->n_counters; ++i) {
761                 counter = cpuhw->counter[i];
762                 val = read_pmc(counter->hw.idx);
763                 if ((int)val < 0) {
764                         /* counter has overflowed */
765                         found = 1;
766                         record_and_restart(counter, val, regs);
767                         if (counter->wakeup_pending)
768                                 need_wakeup = 1;
769                 }
770         }
771
772         /*
773          * In case we didn't find and reset the counter that caused
774          * the interrupt, scan all counters and reset any that are
775          * negative, to avoid getting continual interrupts.
776          * Any that we processed in the previous loop will not be negative.
777          */
778         if (!found) {
779                 for (i = 0; i < ppmu->n_counter; ++i) {
780                         val = read_pmc(i + 1);
781                         if ((int)val < 0)
782                                 write_pmc(i + 1, 0);
783                 }
784         }
785
786         /*
787          * Reset MMCR0 to its normal value.  This will set PMXE and
788          * clear FC (freeze counters) and PMAO (perf mon alert occurred)
789          * and thus allow interrupts to occur again.
790          * XXX might want to use MSR.PM to keep the counters frozen until
791          * we get back out of this interrupt.
792          */
793         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
794
795         /*
796          * If we need a wakeup, check whether interrupts were soft-enabled
797          * when we took the interrupt.  If they were, we can wake stuff up
798          * immediately; otherwise we'll have to set a flag and do the
799          * wakeup when interrupts get soft-enabled.
800          */
801         if (need_wakeup) {
802                 if (regs->softe) {
803                         irq_enter();
804                         perf_counter_do_pending();
805                         irq_exit();
806                 } else {
807                         set_perf_counter_pending(1);
808                 }
809         }
810 }
811
812 void hw_perf_counter_setup(int cpu)
813 {
814         struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
815
816         memset(cpuhw, 0, sizeof(*cpuhw));
817         cpuhw->mmcr[0] = MMCR0_FC;
818 }
819
820 extern struct power_pmu ppc970_pmu;
821 extern struct power_pmu power6_pmu;
822
823 static int init_perf_counters(void)
824 {
825         unsigned long pvr;
826
827         if (reserve_pmc_hardware(perf_counter_interrupt)) {
828                 printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
829                 return -EBUSY;
830         }
831
832         /* XXX should get this from cputable */
833         pvr = mfspr(SPRN_PVR);
834         switch (PVR_VER(pvr)) {
835         case PV_970:
836         case PV_970FX:
837         case PV_970MP:
838                 ppmu = &ppc970_pmu;
839                 break;
840         case 0x3e:
841                 ppmu = &power6_pmu;
842                 break;
843         }
844         return 0;
845 }
846
847 arch_initcall(init_perf_counters);