perf_counter: Fix userspace build
[linux-2.6] / Documentation / perf_counter / builtin-stat.c
1 /*
2  * kerneltop.c: show top kernel functions - performance counters showcase
3
4    Build with:
5
6      cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
7
8    Sample output:
9
10 ------------------------------------------------------------------------------
11  KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
12 ------------------------------------------------------------------------------
13
14              weight         RIP          kernel function
15              ______   ________________   _______________
16
17               35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
18               33.00 - ffffffff804cb740 : sock_alloc_send_skb
19               31.26 - ffffffff804ce808 : skb_push
20               22.43 - ffffffff80510004 : tcp_established_options
21               19.00 - ffffffff8027d250 : find_get_page
22               15.76 - ffffffff804e4fc9 : eth_type_trans
23               15.20 - ffffffff804d8baa : dst_release
24               14.86 - ffffffff804cf5d8 : skb_release_head_state
25               14.00 - ffffffff802217d5 : read_hpet
26               12.00 - ffffffff804ffb7f : __ip_local_out
27               11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
28                8.54 - ffffffff805001a3 : ip_queue_xmit
29  */
30
31 /*
32  * perfstat:  /usr/bin/time -alike performance counter statistics utility
33
34           It summarizes the counter events of all tasks (and child tasks),
35           covering all CPUs that the command (or workload) executes on.
36           It only counts the per-task events of the workload started,
37           independent of how many other tasks run on those CPUs.
38
39    Sample output:
40
41    $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
42
43    Performance counter stats for 'ls':
44
45            163516953 instructions
46                 2295 cache-misses
47              2855182 branch-misses
48  */
49
50  /*
51   * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
52   *
53   * Improvements and fixes by:
54   *
55   *   Arjan van de Ven <arjan@linux.intel.com>
56   *   Yanmin Zhang <yanmin.zhang@intel.com>
57   *   Wu Fengguang <fengguang.wu@intel.com>
58   *   Mike Galbraith <efault@gmx.de>
59   *   Paul Mackerras <paulus@samba.org>
60   *
61   * Released under the GPL v2. (and only v2, not any later version)
62   */
63
64 #include "perf.h"
65 #include "util/util.h"
66
67 #include <getopt.h>
68 #include <assert.h>
69 #include <fcntl.h>
70 #include <stdio.h>
71 #include <errno.h>
72 #include <time.h>
73 #include <sched.h>
74 #include <pthread.h>
75
76 #include <sys/syscall.h>
77 #include <sys/ioctl.h>
78 #include <sys/poll.h>
79 #include <sys/prctl.h>
80 #include <sys/wait.h>
81 #include <sys/uio.h>
82 #include <sys/mman.h>
83
84 #include <linux/unistd.h>
85 #include <linux/types.h>
86
87 #define EVENT_MASK_KERNEL               1
88 #define EVENT_MASK_USER                 2
89
90 static int                      system_wide                     =  0;
91
92 static int                      nr_counters                     =  0;
93 static __u64                    event_id[MAX_COUNTERS]          = {
94         EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
95         EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
96         EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
97         EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
98
99         EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
100         EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
101         EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
102         EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
103 };
104 static int                      default_interval = 100000;
105 static int                      event_count[MAX_COUNTERS];
106 static int                      fd[MAX_NR_CPUS][MAX_COUNTERS];
107 static int                      event_mask[MAX_COUNTERS];
108
109 static int                      tid                             = -1;
110 static int                      profile_cpu                     = -1;
111 static int                      nr_cpus                         =  0;
112 static int                      nmi                             =  1;
113 static int                      group                           =  0;
114 static unsigned int             page_size;
115
116 static int                      zero;
117
118 static int                      scale                           =  1;
119
120 static const unsigned int default_count[] = {
121         1000000,
122         1000000,
123           10000,
124           10000,
125         1000000,
126           10000,
127 };
128
129 static char *hw_event_names[] = {
130         "CPU cycles",
131         "instructions",
132         "cache references",
133         "cache misses",
134         "branches",
135         "branch misses",
136         "bus cycles",
137 };
138
139 static char *sw_event_names[] = {
140         "cpu clock ticks",
141         "task clock ticks",
142         "pagefaults",
143         "context switches",
144         "CPU migrations",
145         "minor faults",
146         "major faults",
147 };
148
149 struct event_symbol {
150         __u64 event;
151         char *symbol;
152 };
153
154 static struct event_symbol event_symbols[] = {
155         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),                "cpu-cycles",           },
156         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),                "cycles",               },
157         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),              "instructions",         },
158         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),          "cache-references",     },
159         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),              "cache-misses",         },
160         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),       "branch-instructions",  },
161         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),       "branches",             },
162         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),             "branch-misses",        },
163         {EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),                "bus-cycles",           },
164
165         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),                 "cpu-clock",            },
166         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),                "task-clock",           },
167         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),               "page-faults",          },
168         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),               "faults",               },
169         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),           "minor-faults",         },
170         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),           "major-faults",         },
171         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),          "context-switches",     },
172         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),          "cs",                   },
173         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),            "cpu-migrations",       },
174         {EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),            "migrations",           },
175 };
176
177 #define __PERF_COUNTER_FIELD(config, name) \
178         ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
179
180 #define PERF_COUNTER_RAW(config)        __PERF_COUNTER_FIELD(config, RAW)
181 #define PERF_COUNTER_CONFIG(config)     __PERF_COUNTER_FIELD(config, CONFIG)
182 #define PERF_COUNTER_TYPE(config)       __PERF_COUNTER_FIELD(config, TYPE)
183 #define PERF_COUNTER_ID(config)         __PERF_COUNTER_FIELD(config, EVENT)
184
185 static void display_events_help(void)
186 {
187         unsigned int i;
188         __u64 e;
189
190         printf(
191         " -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
192
193         for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
194                 int type, id;
195
196                 e = event_symbols[i].event;
197                 type = PERF_COUNTER_TYPE(e);
198                 id = PERF_COUNTER_ID(e);
199
200                 printf("\n                             %d:%d: %-20s",
201                                 type, id, event_symbols[i].symbol);
202         }
203
204         printf("\n"
205         "                           rNNN: raw PMU events (eventsel+umask)\n\n");
206 }
207
208 static void display_help(void)
209 {
210         printf(
211         "Usage: perfstat [<events...>] <cmd...>\n\n"
212         "PerfStat Options (up to %d event types can be specified):\n\n",
213                  MAX_COUNTERS);
214
215         display_events_help();
216
217         printf(
218         " -l                           # scale counter values\n"
219         " -a                           # system-wide collection\n");
220         exit(0);
221 }
222
223 static char *event_name(int ctr)
224 {
225         __u64 config = event_id[ctr];
226         int type = PERF_COUNTER_TYPE(config);
227         int id = PERF_COUNTER_ID(config);
228         static char buf[32];
229
230         if (PERF_COUNTER_RAW(config)) {
231                 sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
232                 return buf;
233         }
234
235         switch (type) {
236         case PERF_TYPE_HARDWARE:
237                 if (id < PERF_HW_EVENTS_MAX)
238                         return hw_event_names[id];
239                 return "unknown-hardware";
240
241         case PERF_TYPE_SOFTWARE:
242                 if (id < PERF_SW_EVENTS_MAX)
243                         return sw_event_names[id];
244                 return "unknown-software";
245
246         default:
247                 break;
248         }
249
250         return "unknown";
251 }
252
253 /*
254  * Each event can have multiple symbolic names.
255  * Symbolic names are (almost) exactly matched.
256  */
257 static __u64 match_event_symbols(char *str)
258 {
259         __u64 config, id;
260         int type;
261         unsigned int i;
262         char mask_str[4];
263
264         if (sscanf(str, "r%llx", &config) == 1)
265                 return config | PERF_COUNTER_RAW_MASK;
266
267         switch (sscanf(str, "%d:%llu:%2s", &type, &id, mask_str)) {
268                 case 3:
269                         if (strchr(mask_str, 'u'))
270                                 event_mask[nr_counters] |= EVENT_MASK_USER;
271                         if (strchr(mask_str, 'k'))
272                                 event_mask[nr_counters] |= EVENT_MASK_KERNEL;
273                 case 2:
274                         return EID(type, id);
275
276                 default:
277                         break;
278         }
279
280         for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
281                 if (!strncmp(str, event_symbols[i].symbol,
282                              strlen(event_symbols[i].symbol)))
283                         return event_symbols[i].event;
284         }
285
286         return ~0ULL;
287 }
288
289 static int parse_events(char *str)
290 {
291         __u64 config;
292
293 again:
294         if (nr_counters == MAX_COUNTERS)
295                 return -1;
296
297         config = match_event_symbols(str);
298         if (config == ~0ULL)
299                 return -1;
300
301         event_id[nr_counters] = config;
302         nr_counters++;
303
304         str = strstr(str, ",");
305         if (str) {
306                 str++;
307                 goto again;
308         }
309
310         return 0;
311 }
312
313
314 /*
315  * perfstat
316  */
317
318 char fault_here[1000000];
319
320 static void create_perfstat_counter(int counter)
321 {
322         struct perf_counter_hw_event hw_event;
323
324         memset(&hw_event, 0, sizeof(hw_event));
325         hw_event.config         = event_id[counter];
326         hw_event.record_type    = 0;
327         hw_event.nmi            = 0;
328         hw_event.exclude_kernel = event_mask[counter] & EVENT_MASK_KERNEL;
329         hw_event.exclude_user   = event_mask[counter] & EVENT_MASK_USER;
330
331         if (scale)
332                 hw_event.read_format    = PERF_FORMAT_TOTAL_TIME_ENABLED |
333                                           PERF_FORMAT_TOTAL_TIME_RUNNING;
334
335         if (system_wide) {
336                 int cpu;
337                 for (cpu = 0; cpu < nr_cpus; cpu ++) {
338                         fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
339                         if (fd[cpu][counter] < 0) {
340                                 printf("perfstat error: syscall returned with %d (%s)\n",
341                                                 fd[cpu][counter], strerror(errno));
342                                 exit(-1);
343                         }
344                 }
345         } else {
346                 hw_event.inherit        = 1;
347                 hw_event.disabled       = 1;
348
349                 fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
350                 if (fd[0][counter] < 0) {
351                         printf("perfstat error: syscall returned with %d (%s)\n",
352                                         fd[0][counter], strerror(errno));
353                         exit(-1);
354                 }
355         }
356 }
357
358 int do_perfstat(int argc, char *argv[])
359 {
360         unsigned long long t0, t1;
361         int counter;
362         ssize_t res;
363         int status;
364         int pid;
365
366         if (!system_wide)
367                 nr_cpus = 1;
368
369         for (counter = 0; counter < nr_counters; counter++)
370                 create_perfstat_counter(counter);
371
372         argc -= optind;
373         argv += optind;
374
375         if (!argc)
376                 display_help();
377
378         /*
379          * Enable counters and exec the command:
380          */
381         t0 = rdclock();
382         prctl(PR_TASK_PERF_COUNTERS_ENABLE);
383
384         if ((pid = fork()) < 0)
385                 perror("failed to fork");
386         if (!pid) {
387                 if (execvp(argv[0], argv)) {
388                         perror(argv[0]);
389                         exit(-1);
390                 }
391         }
392         while (wait(&status) >= 0)
393                 ;
394         prctl(PR_TASK_PERF_COUNTERS_DISABLE);
395         t1 = rdclock();
396
397         fflush(stdout);
398
399         fprintf(stderr, "\n");
400         fprintf(stderr, " Performance counter stats for \'%s\':\n",
401                 argv[0]);
402         fprintf(stderr, "\n");
403
404         for (counter = 0; counter < nr_counters; counter++) {
405                 int cpu, nv;
406                 __u64 count[3], single_count[3];
407                 int scaled;
408
409                 count[0] = count[1] = count[2] = 0;
410                 nv = scale ? 3 : 1;
411                 for (cpu = 0; cpu < nr_cpus; cpu ++) {
412                         res = read(fd[cpu][counter],
413                                    single_count, nv * sizeof(__u64));
414                         assert(res == nv * sizeof(__u64));
415
416                         count[0] += single_count[0];
417                         if (scale) {
418                                 count[1] += single_count[1];
419                                 count[2] += single_count[2];
420                         }
421                 }
422
423                 scaled = 0;
424                 if (scale) {
425                         if (count[2] == 0) {
426                                 fprintf(stderr, " %14s  %-20s\n",
427                                         "<not counted>", event_name(counter));
428                                 continue;
429                         }
430                         if (count[2] < count[1]) {
431                                 scaled = 1;
432                                 count[0] = (unsigned long long)
433                                         ((double)count[0] * count[1] / count[2] + 0.5);
434                         }
435                 }
436
437                 if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
438                     event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
439
440                         double msecs = (double)count[0] / 1000000;
441
442                         fprintf(stderr, " %14.6f  %-20s (msecs)",
443                                 msecs, event_name(counter));
444                 } else {
445                         fprintf(stderr, " %14Ld  %-20s (events)",
446                                 count[0], event_name(counter));
447                 }
448                 if (scaled)
449                         fprintf(stderr, "  (scaled from %.2f%%)",
450                                 (double) count[2] / count[1] * 100);
451                 fprintf(stderr, "\n");
452         }
453         fprintf(stderr, "\n");
454         fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
455                         (double)(t1-t0)/1e6);
456         fprintf(stderr, "\n");
457
458         return 0;
459 }
460
461 static void process_options(int argc, char **argv)
462 {
463         int error = 0, counter;
464
465         for (;;) {
466                 int option_index = 0;
467                 /** Options for getopt */
468                 static struct option long_options[] = {
469                         {"count",       required_argument,      NULL, 'c'},
470                         {"cpu",         required_argument,      NULL, 'C'},
471                         {"delay",       required_argument,      NULL, 'd'},
472                         {"dump_symtab", no_argument,            NULL, 'D'},
473                         {"event",       required_argument,      NULL, 'e'},
474                         {"filter",      required_argument,      NULL, 'f'},
475                         {"group",       required_argument,      NULL, 'g'},
476                         {"help",        no_argument,            NULL, 'h'},
477                         {"nmi",         required_argument,      NULL, 'n'},
478                         {"munmap_info", no_argument,            NULL, 'U'},
479                         {"pid",         required_argument,      NULL, 'p'},
480                         {"realtime",    required_argument,      NULL, 'r'},
481                         {"scale",       no_argument,            NULL, 'l'},
482                         {"symbol",      required_argument,      NULL, 's'},
483                         {"stat",        no_argument,            NULL, 'S'},
484                         {"vmlinux",     required_argument,      NULL, 'x'},
485                         {"zero",        no_argument,            NULL, 'z'},
486                         {NULL,          0,                      NULL,  0 }
487                 };
488                 int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
489                                     long_options, &option_index);
490                 if (c == -1)
491                         break;
492
493                 switch (c) {
494                 case 'a': system_wide                   =              1; break;
495                 case 'c': default_interval              =   atoi(optarg); break;
496                 case 'C':
497                         /* CPU and PID are mutually exclusive */
498                         if (tid != -1) {
499                                 printf("WARNING: CPU switch overriding PID\n");
500                                 sleep(1);
501                                 tid = -1;
502                         }
503                         profile_cpu                     =   atoi(optarg); break;
504
505                 case 'e': error                         = parse_events(optarg); break;
506
507                 case 'g': group                         =   atoi(optarg); break;
508                 case 'h':                                 display_help(); break;
509                 case 'l': scale                         =              1; break;
510                 case 'n': nmi                           =   atoi(optarg); break;
511                 case 'p':
512                         /* CPU and PID are mutually exclusive */
513                         if (profile_cpu != -1) {
514                                 printf("WARNING: PID switch overriding CPU\n");
515                                 sleep(1);
516                                 profile_cpu = -1;
517                         }
518                         tid                             =   atoi(optarg); break;
519                 case 'z': zero                          =              1; break;
520                 default: error = 1; break;
521                 }
522         }
523         if (error)
524                 display_help();
525
526         if (!nr_counters) {
527                 nr_counters = 8;
528         }
529
530         for (counter = 0; counter < nr_counters; counter++) {
531                 if (event_count[counter])
532                         continue;
533
534                 event_count[counter] = default_interval;
535         }
536 }
537
538 static void skip_signal(int signo)
539 {
540 }
541
542 int cmd_stat(int argc, char **argv, const char *prefix)
543 {
544         sigset_t blocked;
545
546         page_size = sysconf(_SC_PAGE_SIZE);
547
548         process_options(argc, argv);
549
550         nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
551         assert(nr_cpus <= MAX_NR_CPUS);
552         assert(nr_cpus >= 0);
553
554         /*
555          * We dont want to block the signals - that would cause
556          * child tasks to inherit that and Ctrl-C would not work.
557          * What we want is for Ctrl-C to work in the exec()-ed
558          * task, but being ignored by perf stat itself:
559          */
560         signal(SIGINT,  skip_signal);
561         signal(SIGALRM, skip_signal);
562         signal(SIGABRT, skip_signal);
563
564         return do_perfstat(argc, argv);
565 }