ftrace: distinguish kretprobe'd functions in trace logs
[linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/debugfs.h>
18 #include <linux/pagemap.h>
19 #include <linux/hardirq.h>
20 #include <linux/linkage.h>
21 #include <linux/uaccess.h>
22 #include <linux/ftrace.h>
23 #include <linux/module.h>
24 #include <linux/percpu.h>
25 #include <linux/ctype.h>
26 #include <linux/init.h>
27 #include <linux/poll.h>
28 #include <linux/gfp.h>
29 #include <linux/fs.h>
30 #include <linux/kprobes.h>
31 #include <linux/writeback.h>
32
33 #include <linux/stacktrace.h>
34
35 #include "trace.h"
36
37 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
38 unsigned long __read_mostly     tracing_thresh;
39
40 static unsigned long __read_mostly      tracing_nr_buffers;
41 static cpumask_t __read_mostly          tracing_buffer_mask;
42
43 #define for_each_tracing_cpu(cpu)       \
44         for_each_cpu_mask(cpu, tracing_buffer_mask)
45
46 /* dummy trace to disable tracing */
47 static struct tracer no_tracer __read_mostly = {
48         .name           = "none",
49 };
50
51 static int trace_alloc_page(void);
52 static int trace_free_page(void);
53
54 static int tracing_disabled = 1;
55
56 static unsigned long tracing_pages_allocated;
57
58 long
59 ns2usecs(cycle_t nsec)
60 {
61         nsec += 500;
62         do_div(nsec, 1000);
63         return nsec;
64 }
65
66 cycle_t ftrace_now(int cpu)
67 {
68         return cpu_clock(cpu);
69 }
70
71 /*
72  * The global_trace is the descriptor that holds the tracing
73  * buffers for the live tracing. For each CPU, it contains
74  * a link list of pages that will store trace entries. The
75  * page descriptor of the pages in the memory is used to hold
76  * the link list by linking the lru item in the page descriptor
77  * to each of the pages in the buffer per CPU.
78  *
79  * For each active CPU there is a data field that holds the
80  * pages for the buffer for that CPU. Each CPU has the same number
81  * of pages allocated for its buffer.
82  */
83 static struct trace_array       global_trace;
84
85 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
86
87 /*
88  * The max_tr is used to snapshot the global_trace when a maximum
89  * latency is reached. Some tracers will use this to store a maximum
90  * trace while it continues examining live traces.
91  *
92  * The buffers for the max_tr are set up the same as the global_trace.
93  * When a snapshot is taken, the link list of the max_tr is swapped
94  * with the link list of the global_trace and the buffers are reset for
95  * the global_trace so the tracing can continue.
96  */
97 static struct trace_array       max_tr;
98
99 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
100
101 /* tracer_enabled is used to toggle activation of a tracer */
102 static int                      tracer_enabled = 1;
103
104 /*
105  * trace_nr_entries is the number of entries that is allocated
106  * for a buffer. Note, the number of entries is always rounded
107  * to ENTRIES_PER_PAGE.
108  */
109 static unsigned long            trace_nr_entries = 65536UL;
110
111 /* trace_types holds a link list of available tracers. */
112 static struct tracer            *trace_types __read_mostly;
113
114 /* current_trace points to the tracer that is currently active */
115 static struct tracer            *current_trace __read_mostly;
116
117 /*
118  * max_tracer_type_len is used to simplify the allocating of
119  * buffers to read userspace tracer names. We keep track of
120  * the longest tracer name registered.
121  */
122 static int                      max_tracer_type_len;
123
124 /*
125  * trace_types_lock is used to protect the trace_types list.
126  * This lock is also used to keep user access serialized.
127  * Accesses from userspace will grab this lock while userspace
128  * activities happen inside the kernel.
129  */
130 static DEFINE_MUTEX(trace_types_lock);
131
132 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
133 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
134
135 /* trace_flags holds iter_ctrl options */
136 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
137
138 /**
139  * trace_wake_up - wake up tasks waiting for trace input
140  *
141  * Simply wakes up any task that is blocked on the trace_wait
142  * queue. These is used with trace_poll for tasks polling the trace.
143  */
144 void trace_wake_up(void)
145 {
146         /*
147          * The runqueue_is_locked() can fail, but this is the best we
148          * have for now:
149          */
150         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
151                 wake_up(&trace_wait);
152 }
153
154 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
155
156 static int __init set_nr_entries(char *str)
157 {
158         unsigned long nr_entries;
159         int ret;
160
161         if (!str)
162                 return 0;
163         ret = strict_strtoul(str, 0, &nr_entries);
164         /* nr_entries can not be zero */
165         if (ret < 0 || nr_entries == 0)
166                 return 0;
167         trace_nr_entries = nr_entries;
168         return 1;
169 }
170 __setup("trace_entries=", set_nr_entries);
171
172 unsigned long nsecs_to_usecs(unsigned long nsecs)
173 {
174         return nsecs / 1000;
175 }
176
177 /*
178  * trace_flag_type is an enumeration that holds different
179  * states when a trace occurs. These are:
180  *  IRQS_OFF    - interrupts were disabled
181  *  NEED_RESCED - reschedule is requested
182  *  HARDIRQ     - inside an interrupt handler
183  *  SOFTIRQ     - inside a softirq handler
184  */
185 enum trace_flag_type {
186         TRACE_FLAG_IRQS_OFF             = 0x01,
187         TRACE_FLAG_NEED_RESCHED         = 0x02,
188         TRACE_FLAG_HARDIRQ              = 0x04,
189         TRACE_FLAG_SOFTIRQ              = 0x08,
190 };
191
192 /*
193  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
194  * control the output of kernel symbols.
195  */
196 #define TRACE_ITER_SYM_MASK \
197         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
198
199 /* These must match the bit postions in trace_iterator_flags */
200 static const char *trace_options[] = {
201         "print-parent",
202         "sym-offset",
203         "sym-addr",
204         "verbose",
205         "raw",
206         "hex",
207         "bin",
208         "block",
209         "stacktrace",
210         "sched-tree",
211         NULL
212 };
213
214 /*
215  * ftrace_max_lock is used to protect the swapping of buffers
216  * when taking a max snapshot. The buffers themselves are
217  * protected by per_cpu spinlocks. But the action of the swap
218  * needs its own lock.
219  *
220  * This is defined as a raw_spinlock_t in order to help
221  * with performance when lockdep debugging is enabled.
222  */
223 static raw_spinlock_t ftrace_max_lock =
224         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
225
226 /*
227  * Copy the new maximum trace into the separate maximum-trace
228  * structure. (this way the maximum trace is permanently saved,
229  * for later retrieval via /debugfs/tracing/latency_trace)
230  */
231 static void
232 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
233 {
234         struct trace_array_cpu *data = tr->data[cpu];
235
236         max_tr.cpu = cpu;
237         max_tr.time_start = data->preempt_timestamp;
238
239         data = max_tr.data[cpu];
240         data->saved_latency = tracing_max_latency;
241
242         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
243         data->pid = tsk->pid;
244         data->uid = tsk->uid;
245         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
246         data->policy = tsk->policy;
247         data->rt_priority = tsk->rt_priority;
248
249         /* record this tasks comm */
250         tracing_record_cmdline(current);
251 }
252
253 #define CHECK_COND(cond)                        \
254         if (unlikely(cond)) {                   \
255                 tracing_disabled = 1;           \
256                 WARN_ON(1);                     \
257                 return -1;                      \
258         }
259
260 /**
261  * check_pages - integrity check of trace buffers
262  *
263  * As a safty measure we check to make sure the data pages have not
264  * been corrupted.
265  */
266 int check_pages(struct trace_array_cpu *data)
267 {
268         struct page *page, *tmp;
269
270         CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
271         CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
272
273         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
274                 CHECK_COND(page->lru.next->prev != &page->lru);
275                 CHECK_COND(page->lru.prev->next != &page->lru);
276         }
277
278         return 0;
279 }
280
281 /**
282  * head_page - page address of the first page in per_cpu buffer.
283  *
284  * head_page returns the page address of the first page in
285  * a per_cpu buffer. This also preforms various consistency
286  * checks to make sure the buffer has not been corrupted.
287  */
288 void *head_page(struct trace_array_cpu *data)
289 {
290         struct page *page;
291
292         if (list_empty(&data->trace_pages))
293                 return NULL;
294
295         page = list_entry(data->trace_pages.next, struct page, lru);
296         BUG_ON(&page->lru == &data->trace_pages);
297
298         return page_address(page);
299 }
300
301 /**
302  * trace_seq_printf - sequence printing of trace information
303  * @s: trace sequence descriptor
304  * @fmt: printf format string
305  *
306  * The tracer may use either sequence operations or its own
307  * copy to user routines. To simplify formating of a trace
308  * trace_seq_printf is used to store strings into a special
309  * buffer (@s). Then the output may be either used by
310  * the sequencer or pulled into another buffer.
311  */
312 int
313 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
314 {
315         int len = (PAGE_SIZE - 1) - s->len;
316         va_list ap;
317         int ret;
318
319         if (!len)
320                 return 0;
321
322         va_start(ap, fmt);
323         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
324         va_end(ap);
325
326         /* If we can't write it all, don't bother writing anything */
327         if (ret >= len)
328                 return 0;
329
330         s->len += ret;
331
332         return len;
333 }
334
335 /**
336  * trace_seq_puts - trace sequence printing of simple string
337  * @s: trace sequence descriptor
338  * @str: simple string to record
339  *
340  * The tracer may use either the sequence operations or its own
341  * copy to user routines. This function records a simple string
342  * into a special buffer (@s) for later retrieval by a sequencer
343  * or other mechanism.
344  */
345 static int
346 trace_seq_puts(struct trace_seq *s, const char *str)
347 {
348         int len = strlen(str);
349
350         if (len > ((PAGE_SIZE - 1) - s->len))
351                 return 0;
352
353         memcpy(s->buffer + s->len, str, len);
354         s->len += len;
355
356         return len;
357 }
358
359 static int
360 trace_seq_putc(struct trace_seq *s, unsigned char c)
361 {
362         if (s->len >= (PAGE_SIZE - 1))
363                 return 0;
364
365         s->buffer[s->len++] = c;
366
367         return 1;
368 }
369
370 static int
371 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
372 {
373         if (len > ((PAGE_SIZE - 1) - s->len))
374                 return 0;
375
376         memcpy(s->buffer + s->len, mem, len);
377         s->len += len;
378
379         return len;
380 }
381
382 #define HEX_CHARS 17
383 static const char hex2asc[] = "0123456789abcdef";
384
385 static int
386 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
387 {
388         unsigned char hex[HEX_CHARS];
389         unsigned char *data = mem;
390         unsigned char byte;
391         int i, j;
392
393         BUG_ON(len >= HEX_CHARS);
394
395 #ifdef __BIG_ENDIAN
396         for (i = 0, j = 0; i < len; i++) {
397 #else
398         for (i = len-1, j = 0; i >= 0; i--) {
399 #endif
400                 byte = data[i];
401
402                 hex[j++] = hex2asc[byte & 0x0f];
403                 hex[j++] = hex2asc[byte >> 4];
404         }
405         hex[j++] = ' ';
406
407         return trace_seq_putmem(s, hex, j);
408 }
409
410 static void
411 trace_seq_reset(struct trace_seq *s)
412 {
413         s->len = 0;
414         s->readpos = 0;
415 }
416
417 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
418 {
419         int len;
420         int ret;
421
422         if (s->len <= s->readpos)
423                 return -EBUSY;
424
425         len = s->len - s->readpos;
426         if (cnt > len)
427                 cnt = len;
428         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
429         if (ret)
430                 return -EFAULT;
431
432         s->readpos += len;
433         return cnt;
434 }
435
436 static void
437 trace_print_seq(struct seq_file *m, struct trace_seq *s)
438 {
439         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
440
441         s->buffer[len] = 0;
442         seq_puts(m, s->buffer);
443
444         trace_seq_reset(s);
445 }
446
447 /*
448  * flip the trace buffers between two trace descriptors.
449  * This usually is the buffers between the global_trace and
450  * the max_tr to record a snapshot of a current trace.
451  *
452  * The ftrace_max_lock must be held.
453  */
454 static void
455 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
456 {
457         struct list_head flip_pages;
458
459         INIT_LIST_HEAD(&flip_pages);
460
461         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
462                 sizeof(struct trace_array_cpu) -
463                 offsetof(struct trace_array_cpu, trace_head_idx));
464
465         check_pages(tr1);
466         check_pages(tr2);
467         list_splice_init(&tr1->trace_pages, &flip_pages);
468         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
469         list_splice_init(&flip_pages, &tr2->trace_pages);
470         BUG_ON(!list_empty(&flip_pages));
471         check_pages(tr1);
472         check_pages(tr2);
473 }
474
475 /**
476  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
477  * @tr: tracer
478  * @tsk: the task with the latency
479  * @cpu: The cpu that initiated the trace.
480  *
481  * Flip the buffers between the @tr and the max_tr and record information
482  * about which task was the cause of this latency.
483  */
484 void
485 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
486 {
487         struct trace_array_cpu *data;
488         int i;
489
490         WARN_ON_ONCE(!irqs_disabled());
491         __raw_spin_lock(&ftrace_max_lock);
492         /* clear out all the previous traces */
493         for_each_tracing_cpu(i) {
494                 data = tr->data[i];
495                 flip_trace(max_tr.data[i], data);
496                 tracing_reset(data);
497         }
498
499         __update_max_tr(tr, tsk, cpu);
500         __raw_spin_unlock(&ftrace_max_lock);
501 }
502
503 /**
504  * update_max_tr_single - only copy one trace over, and reset the rest
505  * @tr - tracer
506  * @tsk - task with the latency
507  * @cpu - the cpu of the buffer to copy.
508  *
509  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
510  */
511 void
512 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
513 {
514         struct trace_array_cpu *data = tr->data[cpu];
515         int i;
516
517         WARN_ON_ONCE(!irqs_disabled());
518         __raw_spin_lock(&ftrace_max_lock);
519         for_each_tracing_cpu(i)
520                 tracing_reset(max_tr.data[i]);
521
522         flip_trace(max_tr.data[cpu], data);
523         tracing_reset(data);
524
525         __update_max_tr(tr, tsk, cpu);
526         __raw_spin_unlock(&ftrace_max_lock);
527 }
528
529 /**
530  * register_tracer - register a tracer with the ftrace system.
531  * @type - the plugin for the tracer
532  *
533  * Register a new plugin tracer.
534  */
535 int register_tracer(struct tracer *type)
536 {
537         struct tracer *t;
538         int len;
539         int ret = 0;
540
541         if (!type->name) {
542                 pr_info("Tracer must have a name\n");
543                 return -1;
544         }
545
546         mutex_lock(&trace_types_lock);
547         for (t = trace_types; t; t = t->next) {
548                 if (strcmp(type->name, t->name) == 0) {
549                         /* already found */
550                         pr_info("Trace %s already registered\n",
551                                 type->name);
552                         ret = -1;
553                         goto out;
554                 }
555         }
556
557 #ifdef CONFIG_FTRACE_STARTUP_TEST
558         if (type->selftest) {
559                 struct tracer *saved_tracer = current_trace;
560                 struct trace_array_cpu *data;
561                 struct trace_array *tr = &global_trace;
562                 int saved_ctrl = tr->ctrl;
563                 int i;
564                 /*
565                  * Run a selftest on this tracer.
566                  * Here we reset the trace buffer, and set the current
567                  * tracer to be this tracer. The tracer can then run some
568                  * internal tracing to verify that everything is in order.
569                  * If we fail, we do not register this tracer.
570                  */
571                 for_each_tracing_cpu(i) {
572                         data = tr->data[i];
573                         if (!head_page(data))
574                                 continue;
575                         tracing_reset(data);
576                 }
577                 current_trace = type;
578                 tr->ctrl = 0;
579                 /* the test is responsible for initializing and enabling */
580                 pr_info("Testing tracer %s: ", type->name);
581                 ret = type->selftest(type, tr);
582                 /* the test is responsible for resetting too */
583                 current_trace = saved_tracer;
584                 tr->ctrl = saved_ctrl;
585                 if (ret) {
586                         printk(KERN_CONT "FAILED!\n");
587                         goto out;
588                 }
589                 /* Only reset on passing, to avoid touching corrupted buffers */
590                 for_each_tracing_cpu(i) {
591                         data = tr->data[i];
592                         if (!head_page(data))
593                                 continue;
594                         tracing_reset(data);
595                 }
596                 printk(KERN_CONT "PASSED\n");
597         }
598 #endif
599
600         type->next = trace_types;
601         trace_types = type;
602         len = strlen(type->name);
603         if (len > max_tracer_type_len)
604                 max_tracer_type_len = len;
605
606  out:
607         mutex_unlock(&trace_types_lock);
608
609         return ret;
610 }
611
612 void unregister_tracer(struct tracer *type)
613 {
614         struct tracer **t;
615         int len;
616
617         mutex_lock(&trace_types_lock);
618         for (t = &trace_types; *t; t = &(*t)->next) {
619                 if (*t == type)
620                         goto found;
621         }
622         pr_info("Trace %s not registered\n", type->name);
623         goto out;
624
625  found:
626         *t = (*t)->next;
627         if (strlen(type->name) != max_tracer_type_len)
628                 goto out;
629
630         max_tracer_type_len = 0;
631         for (t = &trace_types; *t; t = &(*t)->next) {
632                 len = strlen((*t)->name);
633                 if (len > max_tracer_type_len)
634                         max_tracer_type_len = len;
635         }
636  out:
637         mutex_unlock(&trace_types_lock);
638 }
639
640 void tracing_reset(struct trace_array_cpu *data)
641 {
642         data->trace_idx = 0;
643         data->overrun = 0;
644         data->trace_head = data->trace_tail = head_page(data);
645         data->trace_head_idx = 0;
646         data->trace_tail_idx = 0;
647 }
648
649 #define SAVED_CMDLINES 128
650 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
651 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
652 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
653 static int cmdline_idx;
654 static DEFINE_SPINLOCK(trace_cmdline_lock);
655
656 /* temporary disable recording */
657 atomic_t trace_record_cmdline_disabled __read_mostly;
658
659 static void trace_init_cmdlines(void)
660 {
661         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
662         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
663         cmdline_idx = 0;
664 }
665
666 void trace_stop_cmdline_recording(void);
667
668 static void trace_save_cmdline(struct task_struct *tsk)
669 {
670         unsigned map;
671         unsigned idx;
672
673         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
674                 return;
675
676         /*
677          * It's not the end of the world if we don't get
678          * the lock, but we also don't want to spin
679          * nor do we want to disable interrupts,
680          * so if we miss here, then better luck next time.
681          */
682         if (!spin_trylock(&trace_cmdline_lock))
683                 return;
684
685         idx = map_pid_to_cmdline[tsk->pid];
686         if (idx >= SAVED_CMDLINES) {
687                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
688
689                 map = map_cmdline_to_pid[idx];
690                 if (map <= PID_MAX_DEFAULT)
691                         map_pid_to_cmdline[map] = (unsigned)-1;
692
693                 map_pid_to_cmdline[tsk->pid] = idx;
694
695                 cmdline_idx = idx;
696         }
697
698         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
699
700         spin_unlock(&trace_cmdline_lock);
701 }
702
703 static char *trace_find_cmdline(int pid)
704 {
705         char *cmdline = "<...>";
706         unsigned map;
707
708         if (!pid)
709                 return "<idle>";
710
711         if (pid > PID_MAX_DEFAULT)
712                 goto out;
713
714         map = map_pid_to_cmdline[pid];
715         if (map >= SAVED_CMDLINES)
716                 goto out;
717
718         cmdline = saved_cmdlines[map];
719
720  out:
721         return cmdline;
722 }
723
724 void tracing_record_cmdline(struct task_struct *tsk)
725 {
726         if (atomic_read(&trace_record_cmdline_disabled))
727                 return;
728
729         trace_save_cmdline(tsk);
730 }
731
732 static inline struct list_head *
733 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
734 {
735         /*
736          * Roundrobin - but skip the head (which is not a real page):
737          */
738         next = next->next;
739         if (unlikely(next == &data->trace_pages))
740                 next = next->next;
741         BUG_ON(next == &data->trace_pages);
742
743         return next;
744 }
745
746 static inline void *
747 trace_next_page(struct trace_array_cpu *data, void *addr)
748 {
749         struct list_head *next;
750         struct page *page;
751
752         page = virt_to_page(addr);
753
754         next = trace_next_list(data, &page->lru);
755         page = list_entry(next, struct page, lru);
756
757         return page_address(page);
758 }
759
760 static inline struct trace_entry *
761 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
762 {
763         unsigned long idx, idx_next;
764         struct trace_entry *entry;
765
766         data->trace_idx++;
767         idx = data->trace_head_idx;
768         idx_next = idx + 1;
769
770         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
771
772         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
773
774         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
775                 data->trace_head = trace_next_page(data, data->trace_head);
776                 idx_next = 0;
777         }
778
779         if (data->trace_head == data->trace_tail &&
780             idx_next == data->trace_tail_idx) {
781                 /* overrun */
782                 data->overrun++;
783                 data->trace_tail_idx++;
784                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
785                         data->trace_tail =
786                                 trace_next_page(data, data->trace_tail);
787                         data->trace_tail_idx = 0;
788                 }
789         }
790
791         data->trace_head_idx = idx_next;
792
793         return entry;
794 }
795
796 static inline void
797 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
798 {
799         struct task_struct *tsk = current;
800         unsigned long pc;
801
802         pc = preempt_count();
803
804         entry->preempt_count    = pc & 0xff;
805         entry->pid              = (tsk) ? tsk->pid : 0;
806         entry->t                = ftrace_now(raw_smp_processor_id());
807         entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
808                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
809                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
810                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
811 }
812
813 void
814 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
815                unsigned long ip, unsigned long parent_ip, unsigned long flags)
816 {
817         struct trace_entry *entry;
818         unsigned long irq_flags;
819
820         raw_local_irq_save(irq_flags);
821         __raw_spin_lock(&data->lock);
822         entry                   = tracing_get_trace_entry(tr, data);
823         tracing_generic_entry_update(entry, flags);
824         entry->type             = TRACE_FN;
825         entry->fn.ip            = ip;
826         entry->fn.parent_ip     = parent_ip;
827         __raw_spin_unlock(&data->lock);
828         raw_local_irq_restore(irq_flags);
829 }
830
831 void
832 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
833        unsigned long ip, unsigned long parent_ip, unsigned long flags)
834 {
835         if (likely(!atomic_read(&data->disabled)))
836                 trace_function(tr, data, ip, parent_ip, flags);
837 }
838
839 void __trace_stack(struct trace_array *tr,
840                    struct trace_array_cpu *data,
841                    unsigned long flags,
842                    int skip)
843 {
844         struct trace_entry *entry;
845         struct stack_trace trace;
846
847         if (!(trace_flags & TRACE_ITER_STACKTRACE))
848                 return;
849
850         entry                   = tracing_get_trace_entry(tr, data);
851         tracing_generic_entry_update(entry, flags);
852         entry->type             = TRACE_STACK;
853
854         memset(&entry->stack, 0, sizeof(entry->stack));
855
856         trace.nr_entries        = 0;
857         trace.max_entries       = FTRACE_STACK_ENTRIES;
858         trace.skip              = skip;
859         trace.entries           = entry->stack.caller;
860
861         save_stack_trace(&trace);
862 }
863
864 void
865 __trace_special(void *__tr, void *__data,
866                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
867 {
868         struct trace_array_cpu *data = __data;
869         struct trace_array *tr = __tr;
870         struct trace_entry *entry;
871         unsigned long irq_flags;
872
873         raw_local_irq_save(irq_flags);
874         __raw_spin_lock(&data->lock);
875         entry                   = tracing_get_trace_entry(tr, data);
876         tracing_generic_entry_update(entry, 0);
877         entry->type             = TRACE_SPECIAL;
878         entry->special.arg1     = arg1;
879         entry->special.arg2     = arg2;
880         entry->special.arg3     = arg3;
881         __trace_stack(tr, data, irq_flags, 4);
882         __raw_spin_unlock(&data->lock);
883         raw_local_irq_restore(irq_flags);
884
885         trace_wake_up();
886 }
887
888 void
889 tracing_sched_switch_trace(struct trace_array *tr,
890                            struct trace_array_cpu *data,
891                            struct task_struct *prev,
892                            struct task_struct *next,
893                            unsigned long flags)
894 {
895         struct trace_entry *entry;
896         unsigned long irq_flags;
897
898         raw_local_irq_save(irq_flags);
899         __raw_spin_lock(&data->lock);
900         entry                   = tracing_get_trace_entry(tr, data);
901         tracing_generic_entry_update(entry, flags);
902         entry->type             = TRACE_CTX;
903         entry->ctx.prev_pid     = prev->pid;
904         entry->ctx.prev_prio    = prev->prio;
905         entry->ctx.prev_state   = prev->state;
906         entry->ctx.next_pid     = next->pid;
907         entry->ctx.next_prio    = next->prio;
908         entry->ctx.next_state   = next->state;
909         __trace_stack(tr, data, flags, 5);
910         __raw_spin_unlock(&data->lock);
911         raw_local_irq_restore(irq_flags);
912 }
913
914 void
915 tracing_sched_wakeup_trace(struct trace_array *tr,
916                            struct trace_array_cpu *data,
917                            struct task_struct *wakee,
918                            struct task_struct *curr,
919                            unsigned long flags)
920 {
921         struct trace_entry *entry;
922         unsigned long irq_flags;
923
924         raw_local_irq_save(irq_flags);
925         __raw_spin_lock(&data->lock);
926         entry                   = tracing_get_trace_entry(tr, data);
927         tracing_generic_entry_update(entry, flags);
928         entry->type             = TRACE_WAKE;
929         entry->ctx.prev_pid     = curr->pid;
930         entry->ctx.prev_prio    = curr->prio;
931         entry->ctx.prev_state   = curr->state;
932         entry->ctx.next_pid     = wakee->pid;
933         entry->ctx.next_prio    = wakee->prio;
934         entry->ctx.next_state   = wakee->state;
935         __trace_stack(tr, data, flags, 6);
936         __raw_spin_unlock(&data->lock);
937         raw_local_irq_restore(irq_flags);
938
939         trace_wake_up();
940 }
941
942 void
943 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
944 {
945         struct trace_array *tr = &global_trace;
946         struct trace_array_cpu *data;
947         unsigned long flags;
948         long disabled;
949         int cpu;
950
951         if (tracing_disabled || current_trace == &no_tracer || !tr->ctrl)
952                 return;
953
954         local_irq_save(flags);
955         cpu = raw_smp_processor_id();
956         data = tr->data[cpu];
957         disabled = atomic_inc_return(&data->disabled);
958
959         if (likely(disabled == 1))
960                 __trace_special(tr, data, arg1, arg2, arg3);
961
962         atomic_dec(&data->disabled);
963         local_irq_restore(flags);
964 }
965
966 #ifdef CONFIG_FTRACE
967 static void
968 function_trace_call(unsigned long ip, unsigned long parent_ip)
969 {
970         struct trace_array *tr = &global_trace;
971         struct trace_array_cpu *data;
972         unsigned long flags;
973         long disabled;
974         int cpu;
975
976         if (unlikely(!tracer_enabled))
977                 return;
978
979         local_irq_save(flags);
980         cpu = raw_smp_processor_id();
981         data = tr->data[cpu];
982         disabled = atomic_inc_return(&data->disabled);
983
984         if (likely(disabled == 1))
985                 trace_function(tr, data, ip, parent_ip, flags);
986
987         atomic_dec(&data->disabled);
988         local_irq_restore(flags);
989 }
990
991 static struct ftrace_ops trace_ops __read_mostly =
992 {
993         .func = function_trace_call,
994 };
995
996 void tracing_start_function_trace(void)
997 {
998         register_ftrace_function(&trace_ops);
999 }
1000
1001 void tracing_stop_function_trace(void)
1002 {
1003         unregister_ftrace_function(&trace_ops);
1004 }
1005 #endif
1006
1007 enum trace_file_type {
1008         TRACE_FILE_LAT_FMT      = 1,
1009 };
1010
1011 static struct trace_entry *
1012 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1013                 struct trace_iterator *iter, int cpu)
1014 {
1015         struct page *page;
1016         struct trace_entry *array;
1017
1018         if (iter->next_idx[cpu] >= tr->entries ||
1019             iter->next_idx[cpu] >= data->trace_idx ||
1020             (data->trace_head == data->trace_tail &&
1021              data->trace_head_idx == data->trace_tail_idx))
1022                 return NULL;
1023
1024         if (!iter->next_page[cpu]) {
1025                 /* Initialize the iterator for this cpu trace buffer */
1026                 WARN_ON(!data->trace_tail);
1027                 page = virt_to_page(data->trace_tail);
1028                 iter->next_page[cpu] = &page->lru;
1029                 iter->next_page_idx[cpu] = data->trace_tail_idx;
1030         }
1031
1032         page = list_entry(iter->next_page[cpu], struct page, lru);
1033         BUG_ON(&data->trace_pages == &page->lru);
1034
1035         array = page_address(page);
1036
1037         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1038         return &array[iter->next_page_idx[cpu]];
1039 }
1040
1041 static struct trace_entry *
1042 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1043 {
1044         struct trace_array *tr = iter->tr;
1045         struct trace_entry *ent, *next = NULL;
1046         int next_cpu = -1;
1047         int cpu;
1048
1049         for_each_tracing_cpu(cpu) {
1050                 if (!head_page(tr->data[cpu]))
1051                         continue;
1052                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1053                 /*
1054                  * Pick the entry with the smallest timestamp:
1055                  */
1056                 if (ent && (!next || ent->t < next->t)) {
1057                         next = ent;
1058                         next_cpu = cpu;
1059                 }
1060         }
1061
1062         if (ent_cpu)
1063                 *ent_cpu = next_cpu;
1064
1065         return next;
1066 }
1067
1068 static void trace_iterator_increment(struct trace_iterator *iter)
1069 {
1070         iter->idx++;
1071         iter->next_idx[iter->cpu]++;
1072         iter->next_page_idx[iter->cpu]++;
1073
1074         if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
1075                 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1076
1077                 iter->next_page_idx[iter->cpu] = 0;
1078                 iter->next_page[iter->cpu] =
1079                         trace_next_list(data, iter->next_page[iter->cpu]);
1080         }
1081 }
1082
1083 static void trace_consume(struct trace_iterator *iter)
1084 {
1085         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1086
1087         data->trace_tail_idx++;
1088         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1089                 data->trace_tail = trace_next_page(data, data->trace_tail);
1090                 data->trace_tail_idx = 0;
1091         }
1092
1093         /* Check if we empty it, then reset the index */
1094         if (data->trace_head == data->trace_tail &&
1095             data->trace_head_idx == data->trace_tail_idx)
1096                 data->trace_idx = 0;
1097 }
1098
1099 static void *find_next_entry_inc(struct trace_iterator *iter)
1100 {
1101         struct trace_entry *next;
1102         int next_cpu = -1;
1103
1104         next = find_next_entry(iter, &next_cpu);
1105
1106         iter->prev_ent = iter->ent;
1107         iter->prev_cpu = iter->cpu;
1108
1109         iter->ent = next;
1110         iter->cpu = next_cpu;
1111
1112         if (next)
1113                 trace_iterator_increment(iter);
1114
1115         return next ? iter : NULL;
1116 }
1117
1118 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1119 {
1120         struct trace_iterator *iter = m->private;
1121         void *last_ent = iter->ent;
1122         int i = (int)*pos;
1123         void *ent;
1124
1125         (*pos)++;
1126
1127         /* can't go backwards */
1128         if (iter->idx > i)
1129                 return NULL;
1130
1131         if (iter->idx < 0)
1132                 ent = find_next_entry_inc(iter);
1133         else
1134                 ent = iter;
1135
1136         while (ent && iter->idx < i)
1137                 ent = find_next_entry_inc(iter);
1138
1139         iter->pos = *pos;
1140
1141         if (last_ent && !ent)
1142                 seq_puts(m, "\n\nvim:ft=help\n");
1143
1144         return ent;
1145 }
1146
1147 static void *s_start(struct seq_file *m, loff_t *pos)
1148 {
1149         struct trace_iterator *iter = m->private;
1150         void *p = NULL;
1151         loff_t l = 0;
1152         int i;
1153
1154         mutex_lock(&trace_types_lock);
1155
1156         if (!current_trace || current_trace != iter->trace) {
1157                 mutex_unlock(&trace_types_lock);
1158                 return NULL;
1159         }
1160
1161         atomic_inc(&trace_record_cmdline_disabled);
1162
1163         /* let the tracer grab locks here if needed */
1164         if (current_trace->start)
1165                 current_trace->start(iter);
1166
1167         if (*pos != iter->pos) {
1168                 iter->ent = NULL;
1169                 iter->cpu = 0;
1170                 iter->idx = -1;
1171                 iter->prev_ent = NULL;
1172                 iter->prev_cpu = -1;
1173
1174                 for_each_tracing_cpu(i) {
1175                         iter->next_idx[i] = 0;
1176                         iter->next_page[i] = NULL;
1177                 }
1178
1179                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1180                         ;
1181
1182         } else {
1183                 l = *pos - 1;
1184                 p = s_next(m, p, &l);
1185         }
1186
1187         return p;
1188 }
1189
1190 static void s_stop(struct seq_file *m, void *p)
1191 {
1192         struct trace_iterator *iter = m->private;
1193
1194         atomic_dec(&trace_record_cmdline_disabled);
1195
1196         /* let the tracer release locks here if needed */
1197         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1198                 iter->trace->stop(iter);
1199
1200         mutex_unlock(&trace_types_lock);
1201 }
1202
1203 #define KRETPROBE_MSG "[unknown/kretprobe'd]"
1204
1205 #ifdef CONFIG_KRETPROBES
1206 static inline int kretprobed(unsigned long addr)
1207 {
1208         return addr == (unsigned long)kretprobe_trampoline;
1209 }
1210 #else
1211 static inline int kretprobed(unsigned long addr)
1212 {
1213         return 0;
1214 }
1215 #endif /* CONFIG_KRETPROBES */
1216
1217 static int
1218 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1219 {
1220 #ifdef CONFIG_KALLSYMS
1221         char str[KSYM_SYMBOL_LEN];
1222
1223         kallsyms_lookup(address, NULL, NULL, NULL, str);
1224
1225         return trace_seq_printf(s, fmt, str);
1226 #endif
1227         return 1;
1228 }
1229
1230 static int
1231 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1232                      unsigned long address)
1233 {
1234 #ifdef CONFIG_KALLSYMS
1235         char str[KSYM_SYMBOL_LEN];
1236
1237         sprint_symbol(str, address);
1238         return trace_seq_printf(s, fmt, str);
1239 #endif
1240         return 1;
1241 }
1242
1243 #ifndef CONFIG_64BIT
1244 # define IP_FMT "%08lx"
1245 #else
1246 # define IP_FMT "%016lx"
1247 #endif
1248
1249 static int
1250 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1251 {
1252         int ret;
1253
1254         if (!ip)
1255                 return trace_seq_printf(s, "0");
1256
1257         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1258                 ret = seq_print_sym_offset(s, "%s", ip);
1259         else
1260                 ret = seq_print_sym_short(s, "%s", ip);
1261
1262         if (!ret)
1263                 return 0;
1264
1265         if (sym_flags & TRACE_ITER_SYM_ADDR)
1266                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1267         return ret;
1268 }
1269
1270 static void print_lat_help_header(struct seq_file *m)
1271 {
1272         seq_puts(m, "#                _------=> CPU#            \n");
1273         seq_puts(m, "#               / _-----=> irqs-off        \n");
1274         seq_puts(m, "#              | / _----=> need-resched    \n");
1275         seq_puts(m, "#              || / _---=> hardirq/softirq \n");
1276         seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
1277         seq_puts(m, "#              |||| /                      \n");
1278         seq_puts(m, "#              |||||     delay             \n");
1279         seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
1280         seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
1281 }
1282
1283 static void print_func_help_header(struct seq_file *m)
1284 {
1285         seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
1286         seq_puts(m, "#              | |      |          |         |\n");
1287 }
1288
1289
1290 static void
1291 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1292 {
1293         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1294         struct trace_array *tr = iter->tr;
1295         struct trace_array_cpu *data = tr->data[tr->cpu];
1296         struct tracer *type = current_trace;
1297         unsigned long total   = 0;
1298         unsigned long entries = 0;
1299         int cpu;
1300         const char *name = "preemption";
1301
1302         if (type)
1303                 name = type->name;
1304
1305         for_each_tracing_cpu(cpu) {
1306                 if (head_page(tr->data[cpu])) {
1307                         total += tr->data[cpu]->trace_idx;
1308                         if (tr->data[cpu]->trace_idx > tr->entries)
1309                                 entries += tr->entries;
1310                         else
1311                                 entries += tr->data[cpu]->trace_idx;
1312                 }
1313         }
1314
1315         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1316                    name, UTS_RELEASE);
1317         seq_puts(m, "-----------------------------------"
1318                  "---------------------------------\n");
1319         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1320                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1321                    nsecs_to_usecs(data->saved_latency),
1322                    entries,
1323                    total,
1324                    tr->cpu,
1325 #if defined(CONFIG_PREEMPT_NONE)
1326                    "server",
1327 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1328                    "desktop",
1329 #elif defined(CONFIG_PREEMPT_DESKTOP)
1330                    "preempt",
1331 #else
1332                    "unknown",
1333 #endif
1334                    /* These are reserved for later use */
1335                    0, 0, 0, 0);
1336 #ifdef CONFIG_SMP
1337         seq_printf(m, " #P:%d)\n", num_online_cpus());
1338 #else
1339         seq_puts(m, ")\n");
1340 #endif
1341         seq_puts(m, "    -----------------\n");
1342         seq_printf(m, "    | task: %.16s-%d "
1343                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1344                    data->comm, data->pid, data->uid, data->nice,
1345                    data->policy, data->rt_priority);
1346         seq_puts(m, "    -----------------\n");
1347
1348         if (data->critical_start) {
1349                 seq_puts(m, " => started at: ");
1350                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1351                 trace_print_seq(m, &iter->seq);
1352                 seq_puts(m, "\n => ended at:   ");
1353                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1354                 trace_print_seq(m, &iter->seq);
1355                 seq_puts(m, "\n");
1356         }
1357
1358         seq_puts(m, "\n");
1359 }
1360
1361 static void
1362 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1363 {
1364         int hardirq, softirq;
1365         char *comm;
1366
1367         comm = trace_find_cmdline(entry->pid);
1368
1369         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1370         trace_seq_printf(s, "%d", cpu);
1371         trace_seq_printf(s, "%c%c",
1372                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1373                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1374
1375         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1376         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1377         if (hardirq && softirq) {
1378                 trace_seq_putc(s, 'H');
1379         } else {
1380                 if (hardirq) {
1381                         trace_seq_putc(s, 'h');
1382                 } else {
1383                         if (softirq)
1384                                 trace_seq_putc(s, 's');
1385                         else
1386                                 trace_seq_putc(s, '.');
1387                 }
1388         }
1389
1390         if (entry->preempt_count)
1391                 trace_seq_printf(s, "%x", entry->preempt_count);
1392         else
1393                 trace_seq_puts(s, ".");
1394 }
1395
1396 unsigned long preempt_mark_thresh = 100;
1397
1398 static void
1399 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1400                     unsigned long rel_usecs)
1401 {
1402         trace_seq_printf(s, " %4lldus", abs_usecs);
1403         if (rel_usecs > preempt_mark_thresh)
1404                 trace_seq_puts(s, "!: ");
1405         else if (rel_usecs > 1)
1406                 trace_seq_puts(s, "+: ");
1407         else
1408                 trace_seq_puts(s, " : ");
1409 }
1410
1411 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1412
1413 static int
1414 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1415 {
1416         struct trace_seq *s = &iter->seq;
1417         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1418         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1419         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1420         struct trace_entry *entry = iter->ent;
1421         unsigned long abs_usecs;
1422         unsigned long rel_usecs;
1423         char *comm;
1424         int S, T;
1425         int i;
1426         unsigned state;
1427
1428         if (!next_entry)
1429                 next_entry = entry;
1430         rel_usecs = ns2usecs(next_entry->t - entry->t);
1431         abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1432
1433         if (verbose) {
1434                 comm = trace_find_cmdline(entry->pid);
1435                 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1436                                  " %ld.%03ldms (+%ld.%03ldms): ",
1437                                  comm,
1438                                  entry->pid, cpu, entry->flags,
1439                                  entry->preempt_count, trace_idx,
1440                                  ns2usecs(entry->t),
1441                                  abs_usecs/1000,
1442                                  abs_usecs % 1000, rel_usecs/1000,
1443                                  rel_usecs % 1000);
1444         } else {
1445                 lat_print_generic(s, entry, cpu);
1446                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1447         }
1448         switch (entry->type) {
1449         case TRACE_FN:
1450                 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1451                 trace_seq_puts(s, " (");
1452                 if (kretprobed(entry->fn.parent_ip))
1453                         trace_seq_puts(s, KRETPROBE_MSG);
1454                 else
1455                         seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1456                 trace_seq_puts(s, ")\n");
1457                 break;
1458         case TRACE_CTX:
1459         case TRACE_WAKE:
1460                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1461                         state_to_char[entry->ctx.next_state] : 'X';
1462
1463                 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1464                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1465                 comm = trace_find_cmdline(entry->ctx.next_pid);
1466                 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1467                                  entry->ctx.prev_pid,
1468                                  entry->ctx.prev_prio,
1469                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1470                                  entry->ctx.next_pid,
1471                                  entry->ctx.next_prio,
1472                                  T, comm);
1473                 break;
1474         case TRACE_SPECIAL:
1475                 trace_seq_printf(s, "# %ld %ld %ld\n",
1476                                  entry->special.arg1,
1477                                  entry->special.arg2,
1478                                  entry->special.arg3);
1479                 break;
1480         case TRACE_STACK:
1481                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1482                         if (i)
1483                                 trace_seq_puts(s, " <= ");
1484                         seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1485                 }
1486                 trace_seq_puts(s, "\n");
1487                 break;
1488         default:
1489                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1490         }
1491         return 1;
1492 }
1493
1494 static int print_trace_fmt(struct trace_iterator *iter)
1495 {
1496         struct trace_seq *s = &iter->seq;
1497         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1498         struct trace_entry *entry;
1499         unsigned long usec_rem;
1500         unsigned long long t;
1501         unsigned long secs;
1502         char *comm;
1503         int ret;
1504         int S, T;
1505         int i;
1506
1507         entry = iter->ent;
1508
1509         comm = trace_find_cmdline(iter->ent->pid);
1510
1511         t = ns2usecs(entry->t);
1512         usec_rem = do_div(t, 1000000ULL);
1513         secs = (unsigned long)t;
1514
1515         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1516         if (!ret)
1517                 return 0;
1518         ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1519         if (!ret)
1520                 return 0;
1521         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1522         if (!ret)
1523                 return 0;
1524
1525         switch (entry->type) {
1526         case TRACE_FN:
1527                 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1528                 if (!ret)
1529                         return 0;
1530                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1531                                                 entry->fn.parent_ip) {
1532                         ret = trace_seq_printf(s, " <-");
1533                         if (!ret)
1534                                 return 0;
1535                         if (kretprobed(entry->fn.parent_ip))
1536                                 ret = trace_seq_puts(s, KRETPROBE_MSG);
1537                         else
1538                                 ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1539                                                        sym_flags);
1540                         if (!ret)
1541                                 return 0;
1542                 }
1543                 ret = trace_seq_printf(s, "\n");
1544                 if (!ret)
1545                         return 0;
1546                 break;
1547         case TRACE_CTX:
1548         case TRACE_WAKE:
1549                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1550                         state_to_char[entry->ctx.prev_state] : 'X';
1551                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1552                         state_to_char[entry->ctx.next_state] : 'X';
1553                 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1554                                        entry->ctx.prev_pid,
1555                                        entry->ctx.prev_prio,
1556                                        S,
1557                                        entry->type == TRACE_CTX ? "==>" : "  +",
1558                                        entry->ctx.next_pid,
1559                                        entry->ctx.next_prio,
1560                                        T);
1561                 if (!ret)
1562                         return 0;
1563                 break;
1564         case TRACE_SPECIAL:
1565                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1566                                  entry->special.arg1,
1567                                  entry->special.arg2,
1568                                  entry->special.arg3);
1569                 if (!ret)
1570                         return 0;
1571                 break;
1572         case TRACE_STACK:
1573                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1574                         if (i) {
1575                                 ret = trace_seq_puts(s, " <= ");
1576                                 if (!ret)
1577                                         return 0;
1578                         }
1579                         ret = seq_print_ip_sym(s, entry->stack.caller[i],
1580                                                sym_flags);
1581                         if (!ret)
1582                                 return 0;
1583                 }
1584                 ret = trace_seq_puts(s, "\n");
1585                 if (!ret)
1586                         return 0;
1587                 break;
1588         }
1589         return 1;
1590 }
1591
1592 static int print_raw_fmt(struct trace_iterator *iter)
1593 {
1594         struct trace_seq *s = &iter->seq;
1595         struct trace_entry *entry;
1596         int ret;
1597         int S, T;
1598
1599         entry = iter->ent;
1600
1601         ret = trace_seq_printf(s, "%d %d %llu ",
1602                 entry->pid, iter->cpu, entry->t);
1603         if (!ret)
1604                 return 0;
1605
1606         switch (entry->type) {
1607         case TRACE_FN:
1608                 ret = trace_seq_printf(s, "%x %x\n",
1609                                         entry->fn.ip, entry->fn.parent_ip);
1610                 if (!ret)
1611                         return 0;
1612                 break;
1613         case TRACE_CTX:
1614         case TRACE_WAKE:
1615                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1616                         state_to_char[entry->ctx.prev_state] : 'X';
1617                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1618                         state_to_char[entry->ctx.next_state] : 'X';
1619                 if (entry->type == TRACE_WAKE)
1620                         S = '+';
1621                 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1622                                        entry->ctx.prev_pid,
1623                                        entry->ctx.prev_prio,
1624                                        S,
1625                                        entry->ctx.next_pid,
1626                                        entry->ctx.next_prio,
1627                                        T);
1628                 if (!ret)
1629                         return 0;
1630                 break;
1631         case TRACE_SPECIAL:
1632         case TRACE_STACK:
1633                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1634                                  entry->special.arg1,
1635                                  entry->special.arg2,
1636                                  entry->special.arg3);
1637                 if (!ret)
1638                         return 0;
1639                 break;
1640         }
1641         return 1;
1642 }
1643
1644 #define SEQ_PUT_FIELD_RET(s, x)                         \
1645 do {                                                    \
1646         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1647                 return 0;                               \
1648 } while (0)
1649
1650 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1651 do {                                                    \
1652         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1653                 return 0;                               \
1654 } while (0)
1655
1656 static int print_hex_fmt(struct trace_iterator *iter)
1657 {
1658         struct trace_seq *s = &iter->seq;
1659         unsigned char newline = '\n';
1660         struct trace_entry *entry;
1661         int S, T;
1662
1663         entry = iter->ent;
1664
1665         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1666         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1667         SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1668
1669         switch (entry->type) {
1670         case TRACE_FN:
1671                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1672                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1673                 break;
1674         case TRACE_CTX:
1675         case TRACE_WAKE:
1676                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1677                         state_to_char[entry->ctx.prev_state] : 'X';
1678                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1679                         state_to_char[entry->ctx.next_state] : 'X';
1680                 if (entry->type == TRACE_WAKE)
1681                         S = '+';
1682                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1683                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1684                 SEQ_PUT_HEX_FIELD_RET(s, S);
1685                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1686                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1687                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1688                 SEQ_PUT_HEX_FIELD_RET(s, T);
1689                 break;
1690         case TRACE_SPECIAL:
1691         case TRACE_STACK:
1692                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1693                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1694                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1695                 break;
1696         }
1697         SEQ_PUT_FIELD_RET(s, newline);
1698
1699         return 1;
1700 }
1701
1702 static int print_bin_fmt(struct trace_iterator *iter)
1703 {
1704         struct trace_seq *s = &iter->seq;
1705         struct trace_entry *entry;
1706
1707         entry = iter->ent;
1708
1709         SEQ_PUT_FIELD_RET(s, entry->pid);
1710         SEQ_PUT_FIELD_RET(s, entry->cpu);
1711         SEQ_PUT_FIELD_RET(s, entry->t);
1712
1713         switch (entry->type) {
1714         case TRACE_FN:
1715                 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1716                 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1717                 break;
1718         case TRACE_CTX:
1719                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1720                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1721                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1722                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1723                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1724                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1725                 break;
1726         case TRACE_SPECIAL:
1727         case TRACE_STACK:
1728                 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1729                 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1730                 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1731                 break;
1732         }
1733         return 1;
1734 }
1735
1736 static int trace_empty(struct trace_iterator *iter)
1737 {
1738         struct trace_array_cpu *data;
1739         int cpu;
1740
1741         for_each_tracing_cpu(cpu) {
1742                 data = iter->tr->data[cpu];
1743
1744                 if (head_page(data) && data->trace_idx &&
1745                     (data->trace_tail != data->trace_head ||
1746                      data->trace_tail_idx != data->trace_head_idx))
1747                         return 0;
1748         }
1749         return 1;
1750 }
1751
1752 static int print_trace_line(struct trace_iterator *iter)
1753 {
1754         if (iter->trace && iter->trace->print_line)
1755                 return iter->trace->print_line(iter);
1756
1757         if (trace_flags & TRACE_ITER_BIN)
1758                 return print_bin_fmt(iter);
1759
1760         if (trace_flags & TRACE_ITER_HEX)
1761                 return print_hex_fmt(iter);
1762
1763         if (trace_flags & TRACE_ITER_RAW)
1764                 return print_raw_fmt(iter);
1765
1766         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1767                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1768
1769         return print_trace_fmt(iter);
1770 }
1771
1772 static int s_show(struct seq_file *m, void *v)
1773 {
1774         struct trace_iterator *iter = v;
1775
1776         if (iter->ent == NULL) {
1777                 if (iter->tr) {
1778                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1779                         seq_puts(m, "#\n");
1780                 }
1781                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1782                         /* print nothing if the buffers are empty */
1783                         if (trace_empty(iter))
1784                                 return 0;
1785                         print_trace_header(m, iter);
1786                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1787                                 print_lat_help_header(m);
1788                 } else {
1789                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1790                                 print_func_help_header(m);
1791                 }
1792         } else {
1793                 print_trace_line(iter);
1794                 trace_print_seq(m, &iter->seq);
1795         }
1796
1797         return 0;
1798 }
1799
1800 static struct seq_operations tracer_seq_ops = {
1801         .start          = s_start,
1802         .next           = s_next,
1803         .stop           = s_stop,
1804         .show           = s_show,
1805 };
1806
1807 static struct trace_iterator *
1808 __tracing_open(struct inode *inode, struct file *file, int *ret)
1809 {
1810         struct trace_iterator *iter;
1811
1812         if (tracing_disabled) {
1813                 *ret = -ENODEV;
1814                 return NULL;
1815         }
1816
1817         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1818         if (!iter) {
1819                 *ret = -ENOMEM;
1820                 goto out;
1821         }
1822
1823         mutex_lock(&trace_types_lock);
1824         if (current_trace && current_trace->print_max)
1825                 iter->tr = &max_tr;
1826         else
1827                 iter->tr = inode->i_private;
1828         iter->trace = current_trace;
1829         iter->pos = -1;
1830
1831         /* TODO stop tracer */
1832         *ret = seq_open(file, &tracer_seq_ops);
1833         if (!*ret) {
1834                 struct seq_file *m = file->private_data;
1835                 m->private = iter;
1836
1837                 /* stop the trace while dumping */
1838                 if (iter->tr->ctrl)
1839                         tracer_enabled = 0;
1840
1841                 if (iter->trace && iter->trace->open)
1842                         iter->trace->open(iter);
1843         } else {
1844                 kfree(iter);
1845                 iter = NULL;
1846         }
1847         mutex_unlock(&trace_types_lock);
1848
1849  out:
1850         return iter;
1851 }
1852
1853 int tracing_open_generic(struct inode *inode, struct file *filp)
1854 {
1855         if (tracing_disabled)
1856                 return -ENODEV;
1857
1858         filp->private_data = inode->i_private;
1859         return 0;
1860 }
1861
1862 int tracing_release(struct inode *inode, struct file *file)
1863 {
1864         struct seq_file *m = (struct seq_file *)file->private_data;
1865         struct trace_iterator *iter = m->private;
1866
1867         mutex_lock(&trace_types_lock);
1868         if (iter->trace && iter->trace->close)
1869                 iter->trace->close(iter);
1870
1871         /* reenable tracing if it was previously enabled */
1872         if (iter->tr->ctrl)
1873                 tracer_enabled = 1;
1874         mutex_unlock(&trace_types_lock);
1875
1876         seq_release(inode, file);
1877         kfree(iter);
1878         return 0;
1879 }
1880
1881 static int tracing_open(struct inode *inode, struct file *file)
1882 {
1883         int ret;
1884
1885         __tracing_open(inode, file, &ret);
1886
1887         return ret;
1888 }
1889
1890 static int tracing_lt_open(struct inode *inode, struct file *file)
1891 {
1892         struct trace_iterator *iter;
1893         int ret;
1894
1895         iter = __tracing_open(inode, file, &ret);
1896
1897         if (!ret)
1898                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1899
1900         return ret;
1901 }
1902
1903
1904 static void *
1905 t_next(struct seq_file *m, void *v, loff_t *pos)
1906 {
1907         struct tracer *t = m->private;
1908
1909         (*pos)++;
1910
1911         if (t)
1912                 t = t->next;
1913
1914         m->private = t;
1915
1916         return t;
1917 }
1918
1919 static void *t_start(struct seq_file *m, loff_t *pos)
1920 {
1921         struct tracer *t = m->private;
1922         loff_t l = 0;
1923
1924         mutex_lock(&trace_types_lock);
1925         for (; t && l < *pos; t = t_next(m, t, &l))
1926                 ;
1927
1928         return t;
1929 }
1930
1931 static void t_stop(struct seq_file *m, void *p)
1932 {
1933         mutex_unlock(&trace_types_lock);
1934 }
1935
1936 static int t_show(struct seq_file *m, void *v)
1937 {
1938         struct tracer *t = v;
1939
1940         if (!t)
1941                 return 0;
1942
1943         seq_printf(m, "%s", t->name);
1944         if (t->next)
1945                 seq_putc(m, ' ');
1946         else
1947                 seq_putc(m, '\n');
1948
1949         return 0;
1950 }
1951
1952 static struct seq_operations show_traces_seq_ops = {
1953         .start          = t_start,
1954         .next           = t_next,
1955         .stop           = t_stop,
1956         .show           = t_show,
1957 };
1958
1959 static int show_traces_open(struct inode *inode, struct file *file)
1960 {
1961         int ret;
1962
1963         if (tracing_disabled)
1964                 return -ENODEV;
1965
1966         ret = seq_open(file, &show_traces_seq_ops);
1967         if (!ret) {
1968                 struct seq_file *m = file->private_data;
1969                 m->private = trace_types;
1970         }
1971
1972         return ret;
1973 }
1974
1975 static struct file_operations tracing_fops = {
1976         .open           = tracing_open,
1977         .read           = seq_read,
1978         .llseek         = seq_lseek,
1979         .release        = tracing_release,
1980 };
1981
1982 static struct file_operations tracing_lt_fops = {
1983         .open           = tracing_lt_open,
1984         .read           = seq_read,
1985         .llseek         = seq_lseek,
1986         .release        = tracing_release,
1987 };
1988
1989 static struct file_operations show_traces_fops = {
1990         .open           = show_traces_open,
1991         .read           = seq_read,
1992         .release        = seq_release,
1993 };
1994
1995 /*
1996  * Only trace on a CPU if the bitmask is set:
1997  */
1998 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
1999
2000 /*
2001  * When tracing/tracing_cpu_mask is modified then this holds
2002  * the new bitmask we are about to install:
2003  */
2004 static cpumask_t tracing_cpumask_new;
2005
2006 /*
2007  * The tracer itself will not take this lock, but still we want
2008  * to provide a consistent cpumask to user-space:
2009  */
2010 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2011
2012 /*
2013  * Temporary storage for the character representation of the
2014  * CPU bitmask (and one more byte for the newline):
2015  */
2016 static char mask_str[NR_CPUS + 1];
2017
2018 static ssize_t
2019 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2020                      size_t count, loff_t *ppos)
2021 {
2022         int len;
2023
2024         mutex_lock(&tracing_cpumask_update_lock);
2025
2026         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2027         if (count - len < 2) {
2028                 count = -EINVAL;
2029                 goto out_err;
2030         }
2031         len += sprintf(mask_str + len, "\n");
2032         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2033
2034 out_err:
2035         mutex_unlock(&tracing_cpumask_update_lock);
2036
2037         return count;
2038 }
2039
2040 static ssize_t
2041 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2042                       size_t count, loff_t *ppos)
2043 {
2044         int err, cpu;
2045
2046         mutex_lock(&tracing_cpumask_update_lock);
2047         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2048         if (err)
2049                 goto err_unlock;
2050
2051         raw_local_irq_disable();
2052         __raw_spin_lock(&ftrace_max_lock);
2053         for_each_tracing_cpu(cpu) {
2054                 /*
2055                  * Increase/decrease the disabled counter if we are
2056                  * about to flip a bit in the cpumask:
2057                  */
2058                 if (cpu_isset(cpu, tracing_cpumask) &&
2059                                 !cpu_isset(cpu, tracing_cpumask_new)) {
2060                         atomic_inc(&global_trace.data[cpu]->disabled);
2061                 }
2062                 if (!cpu_isset(cpu, tracing_cpumask) &&
2063                                 cpu_isset(cpu, tracing_cpumask_new)) {
2064                         atomic_dec(&global_trace.data[cpu]->disabled);
2065                 }
2066         }
2067         __raw_spin_unlock(&ftrace_max_lock);
2068         raw_local_irq_enable();
2069
2070         tracing_cpumask = tracing_cpumask_new;
2071
2072         mutex_unlock(&tracing_cpumask_update_lock);
2073
2074         return count;
2075
2076 err_unlock:
2077         mutex_unlock(&tracing_cpumask_update_lock);
2078
2079         return err;
2080 }
2081
2082 static struct file_operations tracing_cpumask_fops = {
2083         .open           = tracing_open_generic,
2084         .read           = tracing_cpumask_read,
2085         .write          = tracing_cpumask_write,
2086 };
2087
2088 static ssize_t
2089 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2090                        size_t cnt, loff_t *ppos)
2091 {
2092         char *buf;
2093         int r = 0;
2094         int len = 0;
2095         int i;
2096
2097         /* calulate max size */
2098         for (i = 0; trace_options[i]; i++) {
2099                 len += strlen(trace_options[i]);
2100                 len += 3; /* "no" and space */
2101         }
2102
2103         /* +2 for \n and \0 */
2104         buf = kmalloc(len + 2, GFP_KERNEL);
2105         if (!buf)
2106                 return -ENOMEM;
2107
2108         for (i = 0; trace_options[i]; i++) {
2109                 if (trace_flags & (1 << i))
2110                         r += sprintf(buf + r, "%s ", trace_options[i]);
2111                 else
2112                         r += sprintf(buf + r, "no%s ", trace_options[i]);
2113         }
2114
2115         r += sprintf(buf + r, "\n");
2116         WARN_ON(r >= len + 2);
2117
2118         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2119
2120         kfree(buf);
2121
2122         return r;
2123 }
2124
2125 static ssize_t
2126 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2127                         size_t cnt, loff_t *ppos)
2128 {
2129         char buf[64];
2130         char *cmp = buf;
2131         int neg = 0;
2132         int i;
2133
2134         if (cnt >= sizeof(buf))
2135                 return -EINVAL;
2136
2137         if (copy_from_user(&buf, ubuf, cnt))
2138                 return -EFAULT;
2139
2140         buf[cnt] = 0;
2141
2142         if (strncmp(buf, "no", 2) == 0) {
2143                 neg = 1;
2144                 cmp += 2;
2145         }
2146
2147         for (i = 0; trace_options[i]; i++) {
2148                 int len = strlen(trace_options[i]);
2149
2150                 if (strncmp(cmp, trace_options[i], len) == 0) {
2151                         if (neg)
2152                                 trace_flags &= ~(1 << i);
2153                         else
2154                                 trace_flags |= (1 << i);
2155                         break;
2156                 }
2157         }
2158         /*
2159          * If no option could be set, return an error:
2160          */
2161         if (!trace_options[i])
2162                 return -EINVAL;
2163
2164         filp->f_pos += cnt;
2165
2166         return cnt;
2167 }
2168
2169 static struct file_operations tracing_iter_fops = {
2170         .open           = tracing_open_generic,
2171         .read           = tracing_iter_ctrl_read,
2172         .write          = tracing_iter_ctrl_write,
2173 };
2174
2175 static const char readme_msg[] =
2176         "tracing mini-HOWTO:\n\n"
2177         "# mkdir /debug\n"
2178         "# mount -t debugfs nodev /debug\n\n"
2179         "# cat /debug/tracing/available_tracers\n"
2180         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2181         "# cat /debug/tracing/current_tracer\n"
2182         "none\n"
2183         "# echo sched_switch > /debug/tracing/current_tracer\n"
2184         "# cat /debug/tracing/current_tracer\n"
2185         "sched_switch\n"
2186         "# cat /debug/tracing/iter_ctrl\n"
2187         "noprint-parent nosym-offset nosym-addr noverbose\n"
2188         "# echo print-parent > /debug/tracing/iter_ctrl\n"
2189         "# echo 1 > /debug/tracing/tracing_enabled\n"
2190         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2191         "echo 0 > /debug/tracing/tracing_enabled\n"
2192 ;
2193
2194 static ssize_t
2195 tracing_readme_read(struct file *filp, char __user *ubuf,
2196                        size_t cnt, loff_t *ppos)
2197 {
2198         return simple_read_from_buffer(ubuf, cnt, ppos,
2199                                         readme_msg, strlen(readme_msg));
2200 }
2201
2202 static struct file_operations tracing_readme_fops = {
2203         .open           = tracing_open_generic,
2204         .read           = tracing_readme_read,
2205 };
2206
2207 static ssize_t
2208 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2209                   size_t cnt, loff_t *ppos)
2210 {
2211         struct trace_array *tr = filp->private_data;
2212         char buf[64];
2213         int r;
2214
2215         r = sprintf(buf, "%ld\n", tr->ctrl);
2216         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2217 }
2218
2219 static ssize_t
2220 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2221                    size_t cnt, loff_t *ppos)
2222 {
2223         struct trace_array *tr = filp->private_data;
2224         char buf[64];
2225         long val;
2226         int ret;
2227
2228         if (cnt >= sizeof(buf))
2229                 return -EINVAL;
2230
2231         if (copy_from_user(&buf, ubuf, cnt))
2232                 return -EFAULT;
2233
2234         buf[cnt] = 0;
2235
2236         ret = strict_strtoul(buf, 10, &val);
2237         if (ret < 0)
2238                 return ret;
2239
2240         val = !!val;
2241
2242         mutex_lock(&trace_types_lock);
2243         if (tr->ctrl ^ val) {
2244                 if (val)
2245                         tracer_enabled = 1;
2246                 else
2247                         tracer_enabled = 0;
2248
2249                 tr->ctrl = val;
2250
2251                 if (current_trace && current_trace->ctrl_update)
2252                         current_trace->ctrl_update(tr);
2253         }
2254         mutex_unlock(&trace_types_lock);
2255
2256         filp->f_pos += cnt;
2257
2258         return cnt;
2259 }
2260
2261 static ssize_t
2262 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2263                        size_t cnt, loff_t *ppos)
2264 {
2265         char buf[max_tracer_type_len+2];
2266         int r;
2267
2268         mutex_lock(&trace_types_lock);
2269         if (current_trace)
2270                 r = sprintf(buf, "%s\n", current_trace->name);
2271         else
2272                 r = sprintf(buf, "\n");
2273         mutex_unlock(&trace_types_lock);
2274
2275         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2276 }
2277
2278 static ssize_t
2279 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2280                         size_t cnt, loff_t *ppos)
2281 {
2282         struct trace_array *tr = &global_trace;
2283         struct tracer *t;
2284         char buf[max_tracer_type_len+1];
2285         int i;
2286
2287         if (cnt > max_tracer_type_len)
2288                 cnt = max_tracer_type_len;
2289
2290         if (copy_from_user(&buf, ubuf, cnt))
2291                 return -EFAULT;
2292
2293         buf[cnt] = 0;
2294
2295         /* strip ending whitespace. */
2296         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2297                 buf[i] = 0;
2298
2299         mutex_lock(&trace_types_lock);
2300         for (t = trace_types; t; t = t->next) {
2301                 if (strcmp(t->name, buf) == 0)
2302                         break;
2303         }
2304         if (!t || t == current_trace)
2305                 goto out;
2306
2307         if (current_trace && current_trace->reset)
2308                 current_trace->reset(tr);
2309
2310         current_trace = t;
2311         if (t->init)
2312                 t->init(tr);
2313
2314  out:
2315         mutex_unlock(&trace_types_lock);
2316
2317         filp->f_pos += cnt;
2318
2319         return cnt;
2320 }
2321
2322 static ssize_t
2323 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2324                      size_t cnt, loff_t *ppos)
2325 {
2326         unsigned long *ptr = filp->private_data;
2327         char buf[64];
2328         int r;
2329
2330         r = snprintf(buf, sizeof(buf), "%ld\n",
2331                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2332         if (r > sizeof(buf))
2333                 r = sizeof(buf);
2334         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2335 }
2336
2337 static ssize_t
2338 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2339                       size_t cnt, loff_t *ppos)
2340 {
2341         long *ptr = filp->private_data;
2342         char buf[64];
2343         long val;
2344         int ret;
2345
2346         if (cnt >= sizeof(buf))
2347                 return -EINVAL;
2348
2349         if (copy_from_user(&buf, ubuf, cnt))
2350                 return -EFAULT;
2351
2352         buf[cnt] = 0;
2353
2354         ret = strict_strtoul(buf, 10, &val);
2355         if (ret < 0)
2356                 return ret;
2357
2358         *ptr = val * 1000;
2359
2360         return cnt;
2361 }
2362
2363 static atomic_t tracing_reader;
2364
2365 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2366 {
2367         struct trace_iterator *iter;
2368
2369         if (tracing_disabled)
2370                 return -ENODEV;
2371
2372         /* We only allow for reader of the pipe */
2373         if (atomic_inc_return(&tracing_reader) != 1) {
2374                 atomic_dec(&tracing_reader);
2375                 return -EBUSY;
2376         }
2377
2378         /* create a buffer to store the information to pass to userspace */
2379         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2380         if (!iter)
2381                 return -ENOMEM;
2382
2383         mutex_lock(&trace_types_lock);
2384         iter->tr = &global_trace;
2385         iter->trace = current_trace;
2386         filp->private_data = iter;
2387
2388         if (iter->trace->pipe_open)
2389                 iter->trace->pipe_open(iter);
2390         mutex_unlock(&trace_types_lock);
2391
2392         return 0;
2393 }
2394
2395 static int tracing_release_pipe(struct inode *inode, struct file *file)
2396 {
2397         struct trace_iterator *iter = file->private_data;
2398
2399         kfree(iter);
2400         atomic_dec(&tracing_reader);
2401
2402         return 0;
2403 }
2404
2405 static unsigned int
2406 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2407 {
2408         struct trace_iterator *iter = filp->private_data;
2409
2410         if (trace_flags & TRACE_ITER_BLOCK) {
2411                 /*
2412                  * Always select as readable when in blocking mode
2413                  */
2414                 return POLLIN | POLLRDNORM;
2415         } else {
2416                 if (!trace_empty(iter))
2417                         return POLLIN | POLLRDNORM;
2418                 poll_wait(filp, &trace_wait, poll_table);
2419                 if (!trace_empty(iter))
2420                         return POLLIN | POLLRDNORM;
2421
2422                 return 0;
2423         }
2424 }
2425
2426 /*
2427  * Consumer reader.
2428  */
2429 static ssize_t
2430 tracing_read_pipe(struct file *filp, char __user *ubuf,
2431                   size_t cnt, loff_t *ppos)
2432 {
2433         struct trace_iterator *iter = filp->private_data;
2434         struct trace_array_cpu *data;
2435         static cpumask_t mask;
2436         unsigned long flags;
2437 #ifdef CONFIG_FTRACE
2438         int ftrace_save;
2439 #endif
2440         int cpu;
2441         ssize_t sret;
2442
2443         /* return any leftover data */
2444         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2445         if (sret != -EBUSY)
2446                 return sret;
2447         sret = 0;
2448
2449         trace_seq_reset(&iter->seq);
2450
2451         mutex_lock(&trace_types_lock);
2452         if (iter->trace->read) {
2453                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2454                 if (sret)
2455                         goto out;
2456         }
2457
2458         while (trace_empty(iter)) {
2459
2460                 if ((filp->f_flags & O_NONBLOCK)) {
2461                         sret = -EAGAIN;
2462                         goto out;
2463                 }
2464
2465                 /*
2466                  * This is a make-shift waitqueue. The reason we don't use
2467                  * an actual wait queue is because:
2468                  *  1) we only ever have one waiter
2469                  *  2) the tracing, traces all functions, we don't want
2470                  *     the overhead of calling wake_up and friends
2471                  *     (and tracing them too)
2472                  *     Anyway, this is really very primitive wakeup.
2473                  */
2474                 set_current_state(TASK_INTERRUPTIBLE);
2475                 iter->tr->waiter = current;
2476
2477                 mutex_unlock(&trace_types_lock);
2478
2479                 /* sleep for 100 msecs, and try again. */
2480                 schedule_timeout(HZ/10);
2481
2482                 mutex_lock(&trace_types_lock);
2483
2484                 iter->tr->waiter = NULL;
2485
2486                 if (signal_pending(current)) {
2487                         sret = -EINTR;
2488                         goto out;
2489                 }
2490
2491                 if (iter->trace != current_trace)
2492                         goto out;
2493
2494                 /*
2495                  * We block until we read something and tracing is disabled.
2496                  * We still block if tracing is disabled, but we have never
2497                  * read anything. This allows a user to cat this file, and
2498                  * then enable tracing. But after we have read something,
2499                  * we give an EOF when tracing is again disabled.
2500                  *
2501                  * iter->pos will be 0 if we haven't read anything.
2502                  */
2503                 if (!tracer_enabled && iter->pos)
2504                         break;
2505
2506                 continue;
2507         }
2508
2509         /* stop when tracing is finished */
2510         if (trace_empty(iter))
2511                 goto out;
2512
2513         if (cnt >= PAGE_SIZE)
2514                 cnt = PAGE_SIZE - 1;
2515
2516         /* reset all but tr, trace, and overruns */
2517         memset(&iter->seq, 0,
2518                sizeof(struct trace_iterator) -
2519                offsetof(struct trace_iterator, seq));
2520         iter->pos = -1;
2521
2522         /*
2523          * We need to stop all tracing on all CPUS to read the
2524          * the next buffer. This is a bit expensive, but is
2525          * not done often. We fill all what we can read,
2526          * and then release the locks again.
2527          */
2528
2529         cpus_clear(mask);
2530         local_irq_save(flags);
2531 #ifdef CONFIG_FTRACE
2532         ftrace_save = ftrace_enabled;
2533         ftrace_enabled = 0;
2534 #endif
2535         smp_wmb();
2536         for_each_tracing_cpu(cpu) {
2537                 data = iter->tr->data[cpu];
2538
2539                 if (!head_page(data) || !data->trace_idx)
2540                         continue;
2541
2542                 atomic_inc(&data->disabled);
2543                 cpu_set(cpu, mask);
2544         }
2545
2546         for_each_cpu_mask(cpu, mask) {
2547                 data = iter->tr->data[cpu];
2548                 __raw_spin_lock(&data->lock);
2549
2550                 if (data->overrun > iter->last_overrun[cpu])
2551                         iter->overrun[cpu] +=
2552                                 data->overrun - iter->last_overrun[cpu];
2553                 iter->last_overrun[cpu] = data->overrun;
2554         }
2555
2556         while (find_next_entry_inc(iter) != NULL) {
2557                 int ret;
2558                 int len = iter->seq.len;
2559
2560                 ret = print_trace_line(iter);
2561                 if (!ret) {
2562                         /* don't print partial lines */
2563                         iter->seq.len = len;
2564                         break;
2565                 }
2566
2567                 trace_consume(iter);
2568
2569                 if (iter->seq.len >= cnt)
2570                         break;
2571         }
2572
2573         for_each_cpu_mask(cpu, mask) {
2574                 data = iter->tr->data[cpu];
2575                 __raw_spin_unlock(&data->lock);
2576         }
2577
2578         for_each_cpu_mask(cpu, mask) {
2579                 data = iter->tr->data[cpu];
2580                 atomic_dec(&data->disabled);
2581         }
2582 #ifdef CONFIG_FTRACE
2583         ftrace_enabled = ftrace_save;
2584 #endif
2585         local_irq_restore(flags);
2586
2587         /* Now copy what we have to the user */
2588         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2589         if (iter->seq.readpos >= iter->seq.len)
2590                 trace_seq_reset(&iter->seq);
2591         if (sret == -EBUSY)
2592                 sret = 0;
2593
2594 out:
2595         mutex_unlock(&trace_types_lock);
2596
2597         return sret;
2598 }
2599
2600 static ssize_t
2601 tracing_entries_read(struct file *filp, char __user *ubuf,
2602                      size_t cnt, loff_t *ppos)
2603 {
2604         struct trace_array *tr = filp->private_data;
2605         char buf[64];
2606         int r;
2607
2608         r = sprintf(buf, "%lu\n", tr->entries);
2609         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2610 }
2611
2612 static ssize_t
2613 tracing_entries_write(struct file *filp, const char __user *ubuf,
2614                       size_t cnt, loff_t *ppos)
2615 {
2616         unsigned long val;
2617         char buf[64];
2618         int i, ret;
2619
2620         if (cnt >= sizeof(buf))
2621                 return -EINVAL;
2622
2623         if (copy_from_user(&buf, ubuf, cnt))
2624                 return -EFAULT;
2625
2626         buf[cnt] = 0;
2627
2628         ret = strict_strtoul(buf, 10, &val);
2629         if (ret < 0)
2630                 return ret;
2631
2632         /* must have at least 1 entry */
2633         if (!val)
2634                 return -EINVAL;
2635
2636         mutex_lock(&trace_types_lock);
2637
2638         if (current_trace != &no_tracer) {
2639                 cnt = -EBUSY;
2640                 pr_info("ftrace: set current_tracer to none"
2641                         " before modifying buffer size\n");
2642                 goto out;
2643         }
2644
2645         if (val > global_trace.entries) {
2646                 long pages_requested;
2647                 unsigned long freeable_pages;
2648
2649                 /* make sure we have enough memory before mapping */
2650                 pages_requested =
2651                         (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2652
2653                 /* account for each buffer (and max_tr) */
2654                 pages_requested *= tracing_nr_buffers * 2;
2655
2656                 /* Check for overflow */
2657                 if (pages_requested < 0) {
2658                         cnt = -ENOMEM;
2659                         goto out;
2660                 }
2661
2662                 freeable_pages = determine_dirtyable_memory();
2663
2664                 /* we only allow to request 1/4 of useable memory */
2665                 if (pages_requested >
2666                     ((freeable_pages + tracing_pages_allocated) / 4)) {
2667                         cnt = -ENOMEM;
2668                         goto out;
2669                 }
2670
2671                 while (global_trace.entries < val) {
2672                         if (trace_alloc_page()) {
2673                                 cnt = -ENOMEM;
2674                                 goto out;
2675                         }
2676                         /* double check that we don't go over the known pages */
2677                         if (tracing_pages_allocated > pages_requested)
2678                                 break;
2679                 }
2680
2681         } else {
2682                 /* include the number of entries in val (inc of page entries) */
2683                 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2684                         trace_free_page();
2685         }
2686
2687         /* check integrity */
2688         for_each_tracing_cpu(i)
2689                 check_pages(global_trace.data[i]);
2690
2691         filp->f_pos += cnt;
2692
2693         /* If check pages failed, return ENOMEM */
2694         if (tracing_disabled)
2695                 cnt = -ENOMEM;
2696  out:
2697         max_tr.entries = global_trace.entries;
2698         mutex_unlock(&trace_types_lock);
2699
2700         return cnt;
2701 }
2702
2703 static struct file_operations tracing_max_lat_fops = {
2704         .open           = tracing_open_generic,
2705         .read           = tracing_max_lat_read,
2706         .write          = tracing_max_lat_write,
2707 };
2708
2709 static struct file_operations tracing_ctrl_fops = {
2710         .open           = tracing_open_generic,
2711         .read           = tracing_ctrl_read,
2712         .write          = tracing_ctrl_write,
2713 };
2714
2715 static struct file_operations set_tracer_fops = {
2716         .open           = tracing_open_generic,
2717         .read           = tracing_set_trace_read,
2718         .write          = tracing_set_trace_write,
2719 };
2720
2721 static struct file_operations tracing_pipe_fops = {
2722         .open           = tracing_open_pipe,
2723         .poll           = tracing_poll_pipe,
2724         .read           = tracing_read_pipe,
2725         .release        = tracing_release_pipe,
2726 };
2727
2728 static struct file_operations tracing_entries_fops = {
2729         .open           = tracing_open_generic,
2730         .read           = tracing_entries_read,
2731         .write          = tracing_entries_write,
2732 };
2733
2734 #ifdef CONFIG_DYNAMIC_FTRACE
2735
2736 static ssize_t
2737 tracing_read_long(struct file *filp, char __user *ubuf,
2738                   size_t cnt, loff_t *ppos)
2739 {
2740         unsigned long *p = filp->private_data;
2741         char buf[64];
2742         int r;
2743
2744         r = sprintf(buf, "%ld\n", *p);
2745
2746         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2747 }
2748
2749 static struct file_operations tracing_read_long_fops = {
2750         .open           = tracing_open_generic,
2751         .read           = tracing_read_long,
2752 };
2753 #endif
2754
2755 static struct dentry *d_tracer;
2756
2757 struct dentry *tracing_init_dentry(void)
2758 {
2759         static int once;
2760
2761         if (d_tracer)
2762                 return d_tracer;
2763
2764         d_tracer = debugfs_create_dir("tracing", NULL);
2765
2766         if (!d_tracer && !once) {
2767                 once = 1;
2768                 pr_warning("Could not create debugfs directory 'tracing'\n");
2769                 return NULL;
2770         }
2771
2772         return d_tracer;
2773 }
2774
2775 #ifdef CONFIG_FTRACE_SELFTEST
2776 /* Let selftest have access to static functions in this file */
2777 #include "trace_selftest.c"
2778 #endif
2779
2780 static __init void tracer_init_debugfs(void)
2781 {
2782         struct dentry *d_tracer;
2783         struct dentry *entry;
2784
2785         d_tracer = tracing_init_dentry();
2786
2787         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2788                                     &global_trace, &tracing_ctrl_fops);
2789         if (!entry)
2790                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2791
2792         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2793                                     NULL, &tracing_iter_fops);
2794         if (!entry)
2795                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2796
2797         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2798                                     NULL, &tracing_cpumask_fops);
2799         if (!entry)
2800                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2801
2802         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2803                                     &global_trace, &tracing_lt_fops);
2804         if (!entry)
2805                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2806
2807         entry = debugfs_create_file("trace", 0444, d_tracer,
2808                                     &global_trace, &tracing_fops);
2809         if (!entry)
2810                 pr_warning("Could not create debugfs 'trace' entry\n");
2811
2812         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2813                                     &global_trace, &show_traces_fops);
2814         if (!entry)
2815                 pr_warning("Could not create debugfs 'trace' entry\n");
2816
2817         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2818                                     &global_trace, &set_tracer_fops);
2819         if (!entry)
2820                 pr_warning("Could not create debugfs 'trace' entry\n");
2821
2822         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2823                                     &tracing_max_latency,
2824                                     &tracing_max_lat_fops);
2825         if (!entry)
2826                 pr_warning("Could not create debugfs "
2827                            "'tracing_max_latency' entry\n");
2828
2829         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2830                                     &tracing_thresh, &tracing_max_lat_fops);
2831         if (!entry)
2832                 pr_warning("Could not create debugfs "
2833                            "'tracing_threash' entry\n");
2834         entry = debugfs_create_file("README", 0644, d_tracer,
2835                                     NULL, &tracing_readme_fops);
2836         if (!entry)
2837                 pr_warning("Could not create debugfs 'README' entry\n");
2838
2839         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2840                                     NULL, &tracing_pipe_fops);
2841         if (!entry)
2842                 pr_warning("Could not create debugfs "
2843                            "'tracing_threash' entry\n");
2844
2845         entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2846                                     &global_trace, &tracing_entries_fops);
2847         if (!entry)
2848                 pr_warning("Could not create debugfs "
2849                            "'tracing_threash' entry\n");
2850
2851 #ifdef CONFIG_DYNAMIC_FTRACE
2852         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2853                                     &ftrace_update_tot_cnt,
2854                                     &tracing_read_long_fops);
2855         if (!entry)
2856                 pr_warning("Could not create debugfs "
2857                            "'dyn_ftrace_total_info' entry\n");
2858 #endif
2859 }
2860
2861 static int trace_alloc_page(void)
2862 {
2863         struct trace_array_cpu *data;
2864         struct page *page, *tmp;
2865         LIST_HEAD(pages);
2866         void *array;
2867         unsigned pages_allocated = 0;
2868         int i;
2869
2870         /* first allocate a page for each CPU */
2871         for_each_tracing_cpu(i) {
2872                 array = (void *)__get_free_page(GFP_KERNEL);
2873                 if (array == NULL) {
2874                         printk(KERN_ERR "tracer: failed to allocate page"
2875                                "for trace buffer!\n");
2876                         goto free_pages;
2877                 }
2878
2879                 pages_allocated++;
2880                 page = virt_to_page(array);
2881                 list_add(&page->lru, &pages);
2882
2883 /* Only allocate if we are actually using the max trace */
2884 #ifdef CONFIG_TRACER_MAX_TRACE
2885                 array = (void *)__get_free_page(GFP_KERNEL);
2886                 if (array == NULL) {
2887                         printk(KERN_ERR "tracer: failed to allocate page"
2888                                "for trace buffer!\n");
2889                         goto free_pages;
2890                 }
2891                 pages_allocated++;
2892                 page = virt_to_page(array);
2893                 list_add(&page->lru, &pages);
2894 #endif
2895         }
2896
2897         /* Now that we successfully allocate a page per CPU, add them */
2898         for_each_tracing_cpu(i) {
2899                 data = global_trace.data[i];
2900                 page = list_entry(pages.next, struct page, lru);
2901                 list_del_init(&page->lru);
2902                 list_add_tail(&page->lru, &data->trace_pages);
2903                 ClearPageLRU(page);
2904
2905 #ifdef CONFIG_TRACER_MAX_TRACE
2906                 data = max_tr.data[i];
2907                 page = list_entry(pages.next, struct page, lru);
2908                 list_del_init(&page->lru);
2909                 list_add_tail(&page->lru, &data->trace_pages);
2910                 SetPageLRU(page);
2911 #endif
2912         }
2913         tracing_pages_allocated += pages_allocated;
2914         global_trace.entries += ENTRIES_PER_PAGE;
2915
2916         return 0;
2917
2918  free_pages:
2919         list_for_each_entry_safe(page, tmp, &pages, lru) {
2920                 list_del_init(&page->lru);
2921                 __free_page(page);
2922         }
2923         return -ENOMEM;
2924 }
2925
2926 static int trace_free_page(void)
2927 {
2928         struct trace_array_cpu *data;
2929         struct page *page;
2930         struct list_head *p;
2931         int i;
2932         int ret = 0;
2933
2934         /* free one page from each buffer */
2935         for_each_tracing_cpu(i) {
2936                 data = global_trace.data[i];
2937                 p = data->trace_pages.next;
2938                 if (p == &data->trace_pages) {
2939                         /* should never happen */
2940                         WARN_ON(1);
2941                         tracing_disabled = 1;
2942                         ret = -1;
2943                         break;
2944                 }
2945                 page = list_entry(p, struct page, lru);
2946                 ClearPageLRU(page);
2947                 list_del(&page->lru);
2948                 tracing_pages_allocated--;
2949                 tracing_pages_allocated--;
2950                 __free_page(page);
2951
2952                 tracing_reset(data);
2953
2954 #ifdef CONFIG_TRACER_MAX_TRACE
2955                 data = max_tr.data[i];
2956                 p = data->trace_pages.next;
2957                 if (p == &data->trace_pages) {
2958                         /* should never happen */
2959                         WARN_ON(1);
2960                         tracing_disabled = 1;
2961                         ret = -1;
2962                         break;
2963                 }
2964                 page = list_entry(p, struct page, lru);
2965                 ClearPageLRU(page);
2966                 list_del(&page->lru);
2967                 __free_page(page);
2968
2969                 tracing_reset(data);
2970 #endif
2971         }
2972         global_trace.entries -= ENTRIES_PER_PAGE;
2973
2974         return ret;
2975 }
2976
2977 __init static int tracer_alloc_buffers(void)
2978 {
2979         struct trace_array_cpu *data;
2980         void *array;
2981         struct page *page;
2982         int pages = 0;
2983         int ret = -ENOMEM;
2984         int i;
2985
2986         /* TODO: make the number of buffers hot pluggable with CPUS */
2987         tracing_nr_buffers = num_possible_cpus();
2988         tracing_buffer_mask = cpu_possible_map;
2989
2990         /* Allocate the first page for all buffers */
2991         for_each_tracing_cpu(i) {
2992                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2993                 max_tr.data[i] = &per_cpu(max_data, i);
2994
2995                 array = (void *)__get_free_page(GFP_KERNEL);
2996                 if (array == NULL) {
2997                         printk(KERN_ERR "tracer: failed to allocate page"
2998                                "for trace buffer!\n");
2999                         goto free_buffers;
3000                 }
3001
3002                 /* set the array to the list */
3003                 INIT_LIST_HEAD(&data->trace_pages);
3004                 page = virt_to_page(array);
3005                 list_add(&page->lru, &data->trace_pages);
3006                 /* use the LRU flag to differentiate the two buffers */
3007                 ClearPageLRU(page);
3008
3009                 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3010                 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3011
3012 /* Only allocate if we are actually using the max trace */
3013 #ifdef CONFIG_TRACER_MAX_TRACE
3014                 array = (void *)__get_free_page(GFP_KERNEL);
3015                 if (array == NULL) {
3016                         printk(KERN_ERR "tracer: failed to allocate page"
3017                                "for trace buffer!\n");
3018                         goto free_buffers;
3019                 }
3020
3021                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3022                 page = virt_to_page(array);
3023                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3024                 SetPageLRU(page);
3025 #endif
3026         }
3027
3028         /*
3029          * Since we allocate by orders of pages, we may be able to
3030          * round up a bit.
3031          */
3032         global_trace.entries = ENTRIES_PER_PAGE;
3033         pages++;
3034
3035         while (global_trace.entries < trace_nr_entries) {
3036                 if (trace_alloc_page())
3037                         break;
3038                 pages++;
3039         }
3040         max_tr.entries = global_trace.entries;
3041
3042         pr_info("tracer: %d pages allocated for %ld",
3043                 pages, trace_nr_entries);
3044         pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
3045         pr_info("   actual entries %ld\n", global_trace.entries);
3046
3047         tracer_init_debugfs();
3048
3049         trace_init_cmdlines();
3050
3051         register_tracer(&no_tracer);
3052         current_trace = &no_tracer;
3053
3054         /* All seems OK, enable tracing */
3055         global_trace.ctrl = tracer_enabled;
3056         tracing_disabled = 0;
3057
3058         return 0;
3059
3060  free_buffers:
3061         for (i-- ; i >= 0; i--) {
3062                 struct page *page, *tmp;
3063                 struct trace_array_cpu *data = global_trace.data[i];
3064
3065                 if (data) {
3066                         list_for_each_entry_safe(page, tmp,
3067                                                  &data->trace_pages, lru) {
3068                                 list_del_init(&page->lru);
3069                                 __free_page(page);
3070                         }
3071                 }
3072
3073 #ifdef CONFIG_TRACER_MAX_TRACE
3074                 data = max_tr.data[i];
3075                 if (data) {
3076                         list_for_each_entry_safe(page, tmp,
3077                                                  &data->trace_pages, lru) {
3078                                 list_del_init(&page->lru);
3079                                 __free_page(page);
3080                         }
3081                 }
3082 #endif
3083         }
3084         return ret;
3085 }
3086 fs_initcall(tracer_alloc_buffers);