Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[linux-2.6] / Documentation / vm / page-types.c
1 /*
2  * page-types: Tool for querying page flags
3  *
4  * Copyright (C) 2009 Intel corporation
5  * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
6  */
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <unistd.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <string.h>
14 #include <getopt.h>
15 #include <limits.h>
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/fcntl.h>
19
20
21 /*
22  * kernel page flags
23  */
24
25 #define KPF_BYTES               8
26 #define PROC_KPAGEFLAGS         "/proc/kpageflags"
27
28 /* copied from kpageflags_read() */
29 #define KPF_LOCKED              0
30 #define KPF_ERROR               1
31 #define KPF_REFERENCED          2
32 #define KPF_UPTODATE            3
33 #define KPF_DIRTY               4
34 #define KPF_LRU                 5
35 #define KPF_ACTIVE              6
36 #define KPF_SLAB                7
37 #define KPF_WRITEBACK           8
38 #define KPF_RECLAIM             9
39 #define KPF_BUDDY               10
40
41 /* [11-20] new additions in 2.6.31 */
42 #define KPF_MMAP                11
43 #define KPF_ANON                12
44 #define KPF_SWAPCACHE           13
45 #define KPF_SWAPBACKED          14
46 #define KPF_COMPOUND_HEAD       15
47 #define KPF_COMPOUND_TAIL       16
48 #define KPF_HUGE                17
49 #define KPF_UNEVICTABLE         18
50 #define KPF_NOPAGE              20
51
52 /* [32-] kernel hacking assistances */
53 #define KPF_RESERVED            32
54 #define KPF_MLOCKED             33
55 #define KPF_MAPPEDTODISK        34
56 #define KPF_PRIVATE             35
57 #define KPF_PRIVATE_2           36
58 #define KPF_OWNER_PRIVATE       37
59 #define KPF_ARCH                38
60 #define KPF_UNCACHED            39
61
62 /* [48-] take some arbitrary free slots for expanding overloaded flags
63  * not part of kernel API
64  */
65 #define KPF_READAHEAD           48
66 #define KPF_SLOB_FREE           49
67 #define KPF_SLUB_FROZEN         50
68 #define KPF_SLUB_DEBUG          51
69
70 #define KPF_ALL_BITS            ((uint64_t)~0ULL)
71 #define KPF_HACKERS_BITS        (0xffffULL << 32)
72 #define KPF_OVERLOADED_BITS     (0xffffULL << 48)
73 #define BIT(name)               (1ULL << KPF_##name)
74 #define BITS_COMPOUND           (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
75
76 static char *page_flag_names[] = {
77         [KPF_LOCKED]            = "L:locked",
78         [KPF_ERROR]             = "E:error",
79         [KPF_REFERENCED]        = "R:referenced",
80         [KPF_UPTODATE]          = "U:uptodate",
81         [KPF_DIRTY]             = "D:dirty",
82         [KPF_LRU]               = "l:lru",
83         [KPF_ACTIVE]            = "A:active",
84         [KPF_SLAB]              = "S:slab",
85         [KPF_WRITEBACK]         = "W:writeback",
86         [KPF_RECLAIM]           = "I:reclaim",
87         [KPF_BUDDY]             = "B:buddy",
88
89         [KPF_MMAP]              = "M:mmap",
90         [KPF_ANON]              = "a:anonymous",
91         [KPF_SWAPCACHE]         = "s:swapcache",
92         [KPF_SWAPBACKED]        = "b:swapbacked",
93         [KPF_COMPOUND_HEAD]     = "H:compound_head",
94         [KPF_COMPOUND_TAIL]     = "T:compound_tail",
95         [KPF_HUGE]              = "G:huge",
96         [KPF_UNEVICTABLE]       = "u:unevictable",
97         [KPF_NOPAGE]            = "n:nopage",
98
99         [KPF_RESERVED]          = "r:reserved",
100         [KPF_MLOCKED]           = "m:mlocked",
101         [KPF_MAPPEDTODISK]      = "d:mappedtodisk",
102         [KPF_PRIVATE]           = "P:private",
103         [KPF_PRIVATE_2]         = "p:private_2",
104         [KPF_OWNER_PRIVATE]     = "O:owner_private",
105         [KPF_ARCH]              = "h:arch",
106         [KPF_UNCACHED]          = "c:uncached",
107
108         [KPF_READAHEAD]         = "I:readahead",
109         [KPF_SLOB_FREE]         = "P:slob_free",
110         [KPF_SLUB_FROZEN]       = "A:slub_frozen",
111         [KPF_SLUB_DEBUG]        = "E:slub_debug",
112 };
113
114
115 /*
116  * data structures
117  */
118
119 static int              opt_raw;        /* for kernel developers */
120 static int              opt_list;       /* list pages (in ranges) */
121 static int              opt_no_summary; /* don't show summary */
122 static pid_t            opt_pid;        /* process to walk */
123
124 #define MAX_ADDR_RANGES 1024
125 static int              nr_addr_ranges;
126 static unsigned long    opt_offset[MAX_ADDR_RANGES];
127 static unsigned long    opt_size[MAX_ADDR_RANGES];
128
129 #define MAX_BIT_FILTERS 64
130 static int              nr_bit_filters;
131 static uint64_t         opt_mask[MAX_BIT_FILTERS];
132 static uint64_t         opt_bits[MAX_BIT_FILTERS];
133
134 static int              page_size;
135
136 #define PAGES_BATCH     (64 << 10)      /* 64k pages */
137 static int              kpageflags_fd;
138 static uint64_t         kpageflags_buf[KPF_BYTES * PAGES_BATCH];
139
140 #define HASH_SHIFT      13
141 #define HASH_SIZE       (1 << HASH_SHIFT)
142 #define HASH_MASK       (HASH_SIZE - 1)
143 #define HASH_KEY(flags) (flags & HASH_MASK)
144
145 static unsigned long    total_pages;
146 static unsigned long    nr_pages[HASH_SIZE];
147 static uint64_t         page_flags[HASH_SIZE];
148
149
150 /*
151  * helper functions
152  */
153
154 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
155
156 #define min_t(type, x, y) ({                    \
157         type __min1 = (x);                      \
158         type __min2 = (y);                      \
159         __min1 < __min2 ? __min1 : __min2; })
160
161 unsigned long pages2mb(unsigned long pages)
162 {
163         return (pages * page_size) >> 20;
164 }
165
166 void fatal(const char *x, ...)
167 {
168         va_list ap;
169
170         va_start(ap, x);
171         vfprintf(stderr, x, ap);
172         va_end(ap);
173         exit(EXIT_FAILURE);
174 }
175
176
177 /*
178  * page flag names
179  */
180
181 char *page_flag_name(uint64_t flags)
182 {
183         static char buf[65];
184         int present;
185         int i, j;
186
187         for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
188                 present = (flags >> i) & 1;
189                 if (!page_flag_names[i]) {
190                         if (present)
191                                 fatal("unkown flag bit %d\n", i);
192                         continue;
193                 }
194                 buf[j++] = present ? page_flag_names[i][0] : '_';
195         }
196
197         return buf;
198 }
199
200 char *page_flag_longname(uint64_t flags)
201 {
202         static char buf[1024];
203         int i, n;
204
205         for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
206                 if (!page_flag_names[i])
207                         continue;
208                 if ((flags >> i) & 1)
209                         n += snprintf(buf + n, sizeof(buf) - n, "%s,",
210                                         page_flag_names[i] + 2);
211         }
212         if (n)
213                 n--;
214         buf[n] = '\0';
215
216         return buf;
217 }
218
219
220 /*
221  * page list and summary
222  */
223
224 void show_page_range(unsigned long offset, uint64_t flags)
225 {
226         static uint64_t      flags0;
227         static unsigned long index;
228         static unsigned long count;
229
230         if (flags == flags0 && offset == index + count) {
231                 count++;
232                 return;
233         }
234
235         if (count)
236                 printf("%lu\t%lu\t%s\n",
237                                 index, count, page_flag_name(flags0));
238
239         flags0 = flags;
240         index  = offset;
241         count  = 1;
242 }
243
244 void show_page(unsigned long offset, uint64_t flags)
245 {
246         printf("%lu\t%s\n", offset, page_flag_name(flags));
247 }
248
249 void show_summary(void)
250 {
251         int i;
252
253         printf("             flags\tpage-count       MB"
254                 "  symbolic-flags\t\t\tlong-symbolic-flags\n");
255
256         for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
257                 if (nr_pages[i])
258                         printf("0x%016llx\t%10lu %8lu  %s\t%s\n",
259                                 (unsigned long long)page_flags[i],
260                                 nr_pages[i],
261                                 pages2mb(nr_pages[i]),
262                                 page_flag_name(page_flags[i]),
263                                 page_flag_longname(page_flags[i]));
264         }
265
266         printf("             total\t%10lu %8lu\n",
267                         total_pages, pages2mb(total_pages));
268 }
269
270
271 /*
272  * page flag filters
273  */
274
275 int bit_mask_ok(uint64_t flags)
276 {
277         int i;
278
279         for (i = 0; i < nr_bit_filters; i++) {
280                 if (opt_bits[i] == KPF_ALL_BITS) {
281                         if ((flags & opt_mask[i]) == 0)
282                                 return 0;
283                 } else {
284                         if ((flags & opt_mask[i]) != opt_bits[i])
285                                 return 0;
286                 }
287         }
288
289         return 1;
290 }
291
292 uint64_t expand_overloaded_flags(uint64_t flags)
293 {
294         /* SLOB/SLUB overload several page flags */
295         if (flags & BIT(SLAB)) {
296                 if (flags & BIT(PRIVATE))
297                         flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
298                 if (flags & BIT(ACTIVE))
299                         flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
300                 if (flags & BIT(ERROR))
301                         flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
302         }
303
304         /* PG_reclaim is overloaded as PG_readahead in the read path */
305         if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
306                 flags ^= BIT(RECLAIM) | BIT(READAHEAD);
307
308         return flags;
309 }
310
311 uint64_t well_known_flags(uint64_t flags)
312 {
313         /* hide flags intended only for kernel hacker */
314         flags &= ~KPF_HACKERS_BITS;
315
316         /* hide non-hugeTLB compound pages */
317         if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
318                 flags &= ~BITS_COMPOUND;
319
320         return flags;
321 }
322
323
324 /*
325  * page frame walker
326  */
327
328 int hash_slot(uint64_t flags)
329 {
330         int k = HASH_KEY(flags);
331         int i;
332
333         /* Explicitly reserve slot 0 for flags 0: the following logic
334          * cannot distinguish an unoccupied slot from slot (flags==0).
335          */
336         if (flags == 0)
337                 return 0;
338
339         /* search through the remaining (HASH_SIZE-1) slots */
340         for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
341                 if (!k || k >= ARRAY_SIZE(page_flags))
342                         k = 1;
343                 if (page_flags[k] == 0) {
344                         page_flags[k] = flags;
345                         return k;
346                 }
347                 if (page_flags[k] == flags)
348                         return k;
349         }
350
351         fatal("hash table full: bump up HASH_SHIFT?\n");
352         exit(EXIT_FAILURE);
353 }
354
355 void add_page(unsigned long offset, uint64_t flags)
356 {
357         flags = expand_overloaded_flags(flags);
358
359         if (!opt_raw)
360                 flags = well_known_flags(flags);
361
362         if (!bit_mask_ok(flags))
363                 return;
364
365         if (opt_list == 1)
366                 show_page_range(offset, flags);
367         else if (opt_list == 2)
368                 show_page(offset, flags);
369
370         nr_pages[hash_slot(flags)]++;
371         total_pages++;
372 }
373
374 void walk_pfn(unsigned long index, unsigned long count)
375 {
376         unsigned long batch;
377         unsigned long n;
378         unsigned long i;
379
380         if (index > ULONG_MAX / KPF_BYTES)
381                 fatal("index overflow: %lu\n", index);
382
383         lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
384
385         while (count) {
386                 batch = min_t(unsigned long, count, PAGES_BATCH);
387                 n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
388                 if (n == 0)
389                         break;
390                 if (n < 0) {
391                         perror(PROC_KPAGEFLAGS);
392                         exit(EXIT_FAILURE);
393                 }
394
395                 if (n % KPF_BYTES != 0)
396                         fatal("partial read: %lu bytes\n", n);
397                 n = n / KPF_BYTES;
398
399                 for (i = 0; i < n; i++)
400                         add_page(index + i, kpageflags_buf[i]);
401
402                 index += batch;
403                 count -= batch;
404         }
405 }
406
407 void walk_addr_ranges(void)
408 {
409         int i;
410
411         kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY);
412         if (kpageflags_fd < 0) {
413                 perror(PROC_KPAGEFLAGS);
414                 exit(EXIT_FAILURE);
415         }
416
417         if (!nr_addr_ranges)
418                 walk_pfn(0, ULONG_MAX);
419
420         for (i = 0; i < nr_addr_ranges; i++)
421                 walk_pfn(opt_offset[i], opt_size[i]);
422
423         close(kpageflags_fd);
424 }
425
426
427 /*
428  * user interface
429  */
430
431 const char *page_flag_type(uint64_t flag)
432 {
433         if (flag & KPF_HACKERS_BITS)
434                 return "(r)";
435         if (flag & KPF_OVERLOADED_BITS)
436                 return "(o)";
437         return "   ";
438 }
439
440 void usage(void)
441 {
442         int i, j;
443
444         printf(
445 "page-types [options]\n"
446 "            -r|--raw                  Raw mode, for kernel developers\n"
447 "            -a|--addr    addr-spec    Walk a range of pages\n"
448 "            -b|--bits    bits-spec    Walk pages with specified bits\n"
449 #if 0 /* planned features */
450 "            -p|--pid     pid          Walk process address space\n"
451 "            -f|--file    filename     Walk file address space\n"
452 #endif
453 "            -l|--list                 Show page details in ranges\n"
454 "            -L|--list-each            Show page details one by one\n"
455 "            -N|--no-summary           Don't show summay info\n"
456 "            -h|--help                 Show this usage message\n"
457 "addr-spec:\n"
458 "            N                         one page at offset N (unit: pages)\n"
459 "            N+M                       pages range from N to N+M-1\n"
460 "            N,M                       pages range from N to M-1\n"
461 "            N,                        pages range from N to end\n"
462 "            ,M                        pages range from 0 to M\n"
463 "bits-spec:\n"
464 "            bit1,bit2                 (flags & (bit1|bit2)) != 0\n"
465 "            bit1,bit2=bit1            (flags & (bit1|bit2)) == bit1\n"
466 "            bit1,~bit2                (flags & (bit1|bit2)) == bit1\n"
467 "            =bit1,bit2                flags == (bit1|bit2)\n"
468 "bit-names:\n"
469         );
470
471         for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
472                 if (!page_flag_names[i])
473                         continue;
474                 printf("%16s%s", page_flag_names[i] + 2,
475                                  page_flag_type(1ULL << i));
476                 if (++j > 3) {
477                         j = 0;
478                         putchar('\n');
479                 }
480         }
481         printf("\n                                   "
482                 "(r) raw mode bits  (o) overloaded bits\n");
483 }
484
485 unsigned long long parse_number(const char *str)
486 {
487         unsigned long long n;
488
489         n = strtoll(str, NULL, 0);
490
491         if (n == 0 && str[0] != '0')
492                 fatal("invalid name or number: %s\n", str);
493
494         return n;
495 }
496
497 void parse_pid(const char *str)
498 {
499         opt_pid = parse_number(str);
500 }
501
502 void parse_file(const char *name)
503 {
504 }
505
506 void add_addr_range(unsigned long offset, unsigned long size)
507 {
508         if (nr_addr_ranges >= MAX_ADDR_RANGES)
509                 fatal("too much addr ranges\n");
510
511         opt_offset[nr_addr_ranges] = offset;
512         opt_size[nr_addr_ranges] = size;
513         nr_addr_ranges++;
514 }
515
516 void parse_addr_range(const char *optarg)
517 {
518         unsigned long offset;
519         unsigned long size;
520         char *p;
521
522         p = strchr(optarg, ',');
523         if (!p)
524                 p = strchr(optarg, '+');
525
526         if (p == optarg) {
527                 offset = 0;
528                 size   = parse_number(p + 1);
529         } else if (p) {
530                 offset = parse_number(optarg);
531                 if (p[1] == '\0')
532                         size = ULONG_MAX;
533                 else {
534                         size = parse_number(p + 1);
535                         if (*p == ',') {
536                                 if (size < offset)
537                                         fatal("invalid range: %lu,%lu\n",
538                                                         offset, size);
539                                 size -= offset;
540                         }
541                 }
542         } else {
543                 offset = parse_number(optarg);
544                 size   = 1;
545         }
546
547         add_addr_range(offset, size);
548 }
549
550 void add_bits_filter(uint64_t mask, uint64_t bits)
551 {
552         if (nr_bit_filters >= MAX_BIT_FILTERS)
553                 fatal("too much bit filters\n");
554
555         opt_mask[nr_bit_filters] = mask;
556         opt_bits[nr_bit_filters] = bits;
557         nr_bit_filters++;
558 }
559
560 uint64_t parse_flag_name(const char *str, int len)
561 {
562         int i;
563
564         if (!*str || !len)
565                 return 0;
566
567         if (len <= 8 && !strncmp(str, "compound", len))
568                 return BITS_COMPOUND;
569
570         for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
571                 if (!page_flag_names[i])
572                         continue;
573                 if (!strncmp(str, page_flag_names[i] + 2, len))
574                         return 1ULL << i;
575         }
576
577         return parse_number(str);
578 }
579
580 uint64_t parse_flag_names(const char *str, int all)
581 {
582         const char *p    = str;
583         uint64_t   flags = 0;
584
585         while (1) {
586                 if (*p == ',' || *p == '=' || *p == '\0') {
587                         if ((*str != '~') || (*str == '~' && all && *++str))
588                                 flags |= parse_flag_name(str, p - str);
589                         if (*p != ',')
590                                 break;
591                         str = p + 1;
592                 }
593                 p++;
594         }
595
596         return flags;
597 }
598
599 void parse_bits_mask(const char *optarg)
600 {
601         uint64_t mask;
602         uint64_t bits;
603         const char *p;
604
605         p = strchr(optarg, '=');
606         if (p == optarg) {
607                 mask = KPF_ALL_BITS;
608                 bits = parse_flag_names(p + 1, 0);
609         } else if (p) {
610                 mask = parse_flag_names(optarg, 0);
611                 bits = parse_flag_names(p + 1, 0);
612         } else if (strchr(optarg, '~')) {
613                 mask = parse_flag_names(optarg, 1);
614                 bits = parse_flag_names(optarg, 0);
615         } else {
616                 mask = parse_flag_names(optarg, 0);
617                 bits = KPF_ALL_BITS;
618         }
619
620         add_bits_filter(mask, bits);
621 }
622
623
624 struct option opts[] = {
625         { "raw"       , 0, NULL, 'r' },
626         { "pid"       , 1, NULL, 'p' },
627         { "file"      , 1, NULL, 'f' },
628         { "addr"      , 1, NULL, 'a' },
629         { "bits"      , 1, NULL, 'b' },
630         { "list"      , 0, NULL, 'l' },
631         { "list-each" , 0, NULL, 'L' },
632         { "no-summary", 0, NULL, 'N' },
633         { "help"      , 0, NULL, 'h' },
634         { NULL        , 0, NULL, 0 }
635 };
636
637 int main(int argc, char *argv[])
638 {
639         int c;
640
641         page_size = getpagesize();
642
643         while ((c = getopt_long(argc, argv,
644                                 "rp:f:a:b:lLNh", opts, NULL)) != -1) {
645                 switch (c) {
646                 case 'r':
647                         opt_raw = 1;
648                         break;
649                 case 'p':
650                         parse_pid(optarg);
651                         break;
652                 case 'f':
653                         parse_file(optarg);
654                         break;
655                 case 'a':
656                         parse_addr_range(optarg);
657                         break;
658                 case 'b':
659                         parse_bits_mask(optarg);
660                         break;
661                 case 'l':
662                         opt_list = 1;
663                         break;
664                 case 'L':
665                         opt_list = 2;
666                         break;
667                 case 'N':
668                         opt_no_summary = 1;
669                         break;
670                 case 'h':
671                         usage();
672                         exit(0);
673                 default:
674                         usage();
675                         exit(1);
676                 }
677         }
678
679         if (opt_list == 1)
680                 printf("offset\tcount\tflags\n");
681         if (opt_list == 2)
682                 printf("offset\tflags\n");
683
684         walk_addr_ranges();
685
686         if (opt_list == 1)
687                 show_page_range(0, 0);  /* drain the buffer */
688
689         if (opt_no_summary)
690                 return 0;
691
692         if (opt_list)
693                 printf("\n\n");
694
695         show_summary();
696
697         return 0;
698 }