tracing: add binary buffer files for use with splice
[linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/ring_buffer.h>
15 #include <linux/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/ctype.h>
34 #include <linux/init.h>
35 #include <linux/poll.h>
36 #include <linux/gfp.h>
37 #include <linux/fs.h>
38
39 #include "trace.h"
40 #include "trace_output.h"
41
42 #define TRACE_BUFFER_FLAGS      (RB_FL_OVERWRITE)
43
44 unsigned long __read_mostly     tracing_max_latency;
45 unsigned long __read_mostly     tracing_thresh;
46
47 /*
48  * We need to change this state when a selftest is running.
49  * A selftest will lurk into the ring-buffer to count the
50  * entries inserted during the selftest although some concurrent
51  * insertions into the ring-buffer such as ftrace_printk could occurred
52  * at the same time, giving false positive or negative results.
53  */
54 static bool __read_mostly tracing_selftest_running;
55
56 /*
57  * If a tracer is running, we do not want to run SELFTEST.
58  */
59 static bool __read_mostly tracing_selftest_disabled;
60
61 /* For tracers that don't implement custom flags */
62 static struct tracer_opt dummy_tracer_opt[] = {
63         { }
64 };
65
66 static struct tracer_flags dummy_tracer_flags = {
67         .val = 0,
68         .opts = dummy_tracer_opt
69 };
70
71 static int dummy_set_flag(u32 old_flags, u32 bit, int set)
72 {
73         return 0;
74 }
75
76 /*
77  * Kill all tracing for good (never come back).
78  * It is initialized to 1 but will turn to zero if the initialization
79  * of the tracer is successful. But that is the only place that sets
80  * this back to zero.
81  */
82 static int tracing_disabled = 1;
83
84 static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
85
86 static inline void ftrace_disable_cpu(void)
87 {
88         preempt_disable();
89         local_inc(&__get_cpu_var(ftrace_cpu_disabled));
90 }
91
92 static inline void ftrace_enable_cpu(void)
93 {
94         local_dec(&__get_cpu_var(ftrace_cpu_disabled));
95         preempt_enable();
96 }
97
98 static cpumask_var_t __read_mostly      tracing_buffer_mask;
99
100 /* Define which cpu buffers are currently read in trace_pipe */
101 static cpumask_var_t                    tracing_reader_cpumask;
102
103 #define for_each_tracing_cpu(cpu)       \
104         for_each_cpu(cpu, tracing_buffer_mask)
105
106 /*
107  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
108  *
109  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
110  * is set, then ftrace_dump is called. This will output the contents
111  * of the ftrace buffers to the console.  This is very useful for
112  * capturing traces that lead to crashes and outputing it to a
113  * serial console.
114  *
115  * It is default off, but you can enable it with either specifying
116  * "ftrace_dump_on_oops" in the kernel command line, or setting
117  * /proc/sys/kernel/ftrace_dump_on_oops to true.
118  */
119 int ftrace_dump_on_oops;
120
121 static int tracing_set_tracer(const char *buf);
122
123 #define BOOTUP_TRACER_SIZE              100
124 static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
125 static char *default_bootup_tracer;
126
127 static int __init set_ftrace(char *str)
128 {
129         strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
130         default_bootup_tracer = bootup_tracer_buf;
131         return 1;
132 }
133 __setup("ftrace=", set_ftrace);
134
135 static int __init set_ftrace_dump_on_oops(char *str)
136 {
137         ftrace_dump_on_oops = 1;
138         return 1;
139 }
140 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
141
142 long
143 ns2usecs(cycle_t nsec)
144 {
145         nsec += 500;
146         do_div(nsec, 1000);
147         return nsec;
148 }
149
150 cycle_t ftrace_now(int cpu)
151 {
152         u64 ts = ring_buffer_time_stamp(cpu);
153         ring_buffer_normalize_time_stamp(cpu, &ts);
154         return ts;
155 }
156
157 /*
158  * The global_trace is the descriptor that holds the tracing
159  * buffers for the live tracing. For each CPU, it contains
160  * a link list of pages that will store trace entries. The
161  * page descriptor of the pages in the memory is used to hold
162  * the link list by linking the lru item in the page descriptor
163  * to each of the pages in the buffer per CPU.
164  *
165  * For each active CPU there is a data field that holds the
166  * pages for the buffer for that CPU. Each CPU has the same number
167  * of pages allocated for its buffer.
168  */
169 static struct trace_array       global_trace;
170
171 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
172
173 /*
174  * The max_tr is used to snapshot the global_trace when a maximum
175  * latency is reached. Some tracers will use this to store a maximum
176  * trace while it continues examining live traces.
177  *
178  * The buffers for the max_tr are set up the same as the global_trace.
179  * When a snapshot is taken, the link list of the max_tr is swapped
180  * with the link list of the global_trace and the buffers are reset for
181  * the global_trace so the tracing can continue.
182  */
183 static struct trace_array       max_tr;
184
185 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
186
187 /* tracer_enabled is used to toggle activation of a tracer */
188 static int                      tracer_enabled = 1;
189
190 /**
191  * tracing_is_enabled - return tracer_enabled status
192  *
193  * This function is used by other tracers to know the status
194  * of the tracer_enabled flag.  Tracers may use this function
195  * to know if it should enable their features when starting
196  * up. See irqsoff tracer for an example (start_irqsoff_tracer).
197  */
198 int tracing_is_enabled(void)
199 {
200         return tracer_enabled;
201 }
202
203 /*
204  * trace_buf_size is the size in bytes that is allocated
205  * for a buffer. Note, the number of bytes is always rounded
206  * to page size.
207  *
208  * This number is purposely set to a low number of 16384.
209  * If the dump on oops happens, it will be much appreciated
210  * to not have to wait for all that output. Anyway this can be
211  * boot time and run time configurable.
212  */
213 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
214
215 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
216
217 /* trace_types holds a link list of available tracers. */
218 static struct tracer            *trace_types __read_mostly;
219
220 /* current_trace points to the tracer that is currently active */
221 static struct tracer            *current_trace __read_mostly;
222
223 /*
224  * max_tracer_type_len is used to simplify the allocating of
225  * buffers to read userspace tracer names. We keep track of
226  * the longest tracer name registered.
227  */
228 static int                      max_tracer_type_len;
229
230 /*
231  * trace_types_lock is used to protect the trace_types list.
232  * This lock is also used to keep user access serialized.
233  * Accesses from userspace will grab this lock while userspace
234  * activities happen inside the kernel.
235  */
236 static DEFINE_MUTEX(trace_types_lock);
237
238 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
239 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
240
241 /* trace_flags holds trace_options default values */
242 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
243         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO;
244
245 /**
246  * trace_wake_up - wake up tasks waiting for trace input
247  *
248  * Simply wakes up any task that is blocked on the trace_wait
249  * queue. These is used with trace_poll for tasks polling the trace.
250  */
251 void trace_wake_up(void)
252 {
253         /*
254          * The runqueue_is_locked() can fail, but this is the best we
255          * have for now:
256          */
257         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
258                 wake_up(&trace_wait);
259 }
260
261 static int __init set_buf_size(char *str)
262 {
263         unsigned long buf_size;
264         int ret;
265
266         if (!str)
267                 return 0;
268         ret = strict_strtoul(str, 0, &buf_size);
269         /* nr_entries can not be zero */
270         if (ret < 0 || buf_size == 0)
271                 return 0;
272         trace_buf_size = buf_size;
273         return 1;
274 }
275 __setup("trace_buf_size=", set_buf_size);
276
277 unsigned long nsecs_to_usecs(unsigned long nsecs)
278 {
279         return nsecs / 1000;
280 }
281
282 /* These must match the bit postions in trace_iterator_flags */
283 static const char *trace_options[] = {
284         "print-parent",
285         "sym-offset",
286         "sym-addr",
287         "verbose",
288         "raw",
289         "hex",
290         "bin",
291         "block",
292         "stacktrace",
293         "sched-tree",
294         "ftrace_printk",
295         "ftrace_preempt",
296         "branch",
297         "annotate",
298         "userstacktrace",
299         "sym-userobj",
300         "printk-msg-only",
301         "context-info",
302         NULL
303 };
304
305 /*
306  * ftrace_max_lock is used to protect the swapping of buffers
307  * when taking a max snapshot. The buffers themselves are
308  * protected by per_cpu spinlocks. But the action of the swap
309  * needs its own lock.
310  *
311  * This is defined as a raw_spinlock_t in order to help
312  * with performance when lockdep debugging is enabled.
313  */
314 static raw_spinlock_t ftrace_max_lock =
315         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
316
317 /*
318  * Copy the new maximum trace into the separate maximum-trace
319  * structure. (this way the maximum trace is permanently saved,
320  * for later retrieval via /debugfs/tracing/latency_trace)
321  */
322 static void
323 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
324 {
325         struct trace_array_cpu *data = tr->data[cpu];
326
327         max_tr.cpu = cpu;
328         max_tr.time_start = data->preempt_timestamp;
329
330         data = max_tr.data[cpu];
331         data->saved_latency = tracing_max_latency;
332
333         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
334         data->pid = tsk->pid;
335         data->uid = task_uid(tsk);
336         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
337         data->policy = tsk->policy;
338         data->rt_priority = tsk->rt_priority;
339
340         /* record this tasks comm */
341         tracing_record_cmdline(tsk);
342 }
343
344 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
345 {
346         int len;
347         int ret;
348
349         if (s->len <= s->readpos)
350                 return -EBUSY;
351
352         len = s->len - s->readpos;
353         if (cnt > len)
354                 cnt = len;
355         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
356         if (ret)
357                 return -EFAULT;
358
359         s->readpos += len;
360         return cnt;
361 }
362
363 ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
364 {
365         int len;
366         void *ret;
367
368         if (s->len <= s->readpos)
369                 return -EBUSY;
370
371         len = s->len - s->readpos;
372         if (cnt > len)
373                 cnt = len;
374         ret = memcpy(buf, s->buffer + s->readpos, cnt);
375         if (!ret)
376                 return -EFAULT;
377
378         s->readpos += len;
379         return cnt;
380 }
381
382 static void
383 trace_print_seq(struct seq_file *m, struct trace_seq *s)
384 {
385         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
386
387         s->buffer[len] = 0;
388         seq_puts(m, s->buffer);
389
390         trace_seq_init(s);
391 }
392
393 /**
394  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
395  * @tr: tracer
396  * @tsk: the task with the latency
397  * @cpu: The cpu that initiated the trace.
398  *
399  * Flip the buffers between the @tr and the max_tr and record information
400  * about which task was the cause of this latency.
401  */
402 void
403 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
404 {
405         struct ring_buffer *buf = tr->buffer;
406
407         WARN_ON_ONCE(!irqs_disabled());
408         __raw_spin_lock(&ftrace_max_lock);
409
410         tr->buffer = max_tr.buffer;
411         max_tr.buffer = buf;
412
413         ftrace_disable_cpu();
414         ring_buffer_reset(tr->buffer);
415         ftrace_enable_cpu();
416
417         __update_max_tr(tr, tsk, cpu);
418         __raw_spin_unlock(&ftrace_max_lock);
419 }
420
421 /**
422  * update_max_tr_single - only copy one trace over, and reset the rest
423  * @tr - tracer
424  * @tsk - task with the latency
425  * @cpu - the cpu of the buffer to copy.
426  *
427  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
428  */
429 void
430 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
431 {
432         int ret;
433
434         WARN_ON_ONCE(!irqs_disabled());
435         __raw_spin_lock(&ftrace_max_lock);
436
437         ftrace_disable_cpu();
438
439         ring_buffer_reset(max_tr.buffer);
440         ret = ring_buffer_swap_cpu(max_tr.buffer, tr->buffer, cpu);
441
442         ftrace_enable_cpu();
443
444         WARN_ON_ONCE(ret && ret != -EAGAIN);
445
446         __update_max_tr(tr, tsk, cpu);
447         __raw_spin_unlock(&ftrace_max_lock);
448 }
449
450 /**
451  * register_tracer - register a tracer with the ftrace system.
452  * @type - the plugin for the tracer
453  *
454  * Register a new plugin tracer.
455  */
456 int register_tracer(struct tracer *type)
457 __releases(kernel_lock)
458 __acquires(kernel_lock)
459 {
460         struct tracer *t;
461         int len;
462         int ret = 0;
463
464         if (!type->name) {
465                 pr_info("Tracer must have a name\n");
466                 return -1;
467         }
468
469         /*
470          * When this gets called we hold the BKL which means that
471          * preemption is disabled. Various trace selftests however
472          * need to disable and enable preemption for successful tests.
473          * So we drop the BKL here and grab it after the tests again.
474          */
475         unlock_kernel();
476         mutex_lock(&trace_types_lock);
477
478         tracing_selftest_running = true;
479
480         for (t = trace_types; t; t = t->next) {
481                 if (strcmp(type->name, t->name) == 0) {
482                         /* already found */
483                         pr_info("Trace %s already registered\n",
484                                 type->name);
485                         ret = -1;
486                         goto out;
487                 }
488         }
489
490         if (!type->set_flag)
491                 type->set_flag = &dummy_set_flag;
492         if (!type->flags)
493                 type->flags = &dummy_tracer_flags;
494         else
495                 if (!type->flags->opts)
496                         type->flags->opts = dummy_tracer_opt;
497         if (!type->wait_pipe)
498                 type->wait_pipe = default_wait_pipe;
499
500
501 #ifdef CONFIG_FTRACE_STARTUP_TEST
502         if (type->selftest && !tracing_selftest_disabled) {
503                 struct tracer *saved_tracer = current_trace;
504                 struct trace_array *tr = &global_trace;
505                 int i;
506
507                 /*
508                  * Run a selftest on this tracer.
509                  * Here we reset the trace buffer, and set the current
510                  * tracer to be this tracer. The tracer can then run some
511                  * internal tracing to verify that everything is in order.
512                  * If we fail, we do not register this tracer.
513                  */
514                 for_each_tracing_cpu(i)
515                         tracing_reset(tr, i);
516
517                 current_trace = type;
518                 /* the test is responsible for initializing and enabling */
519                 pr_info("Testing tracer %s: ", type->name);
520                 ret = type->selftest(type, tr);
521                 /* the test is responsible for resetting too */
522                 current_trace = saved_tracer;
523                 if (ret) {
524                         printk(KERN_CONT "FAILED!\n");
525                         goto out;
526                 }
527                 /* Only reset on passing, to avoid touching corrupted buffers */
528                 for_each_tracing_cpu(i)
529                         tracing_reset(tr, i);
530
531                 printk(KERN_CONT "PASSED\n");
532         }
533 #endif
534
535         type->next = trace_types;
536         trace_types = type;
537         len = strlen(type->name);
538         if (len > max_tracer_type_len)
539                 max_tracer_type_len = len;
540
541  out:
542         tracing_selftest_running = false;
543         mutex_unlock(&trace_types_lock);
544
545         if (ret || !default_bootup_tracer)
546                 goto out_unlock;
547
548         if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
549                 goto out_unlock;
550
551         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
552         /* Do we want this tracer to start on bootup? */
553         tracing_set_tracer(type->name);
554         default_bootup_tracer = NULL;
555         /* disable other selftests, since this will break it. */
556         tracing_selftest_disabled = 1;
557 #ifdef CONFIG_FTRACE_STARTUP_TEST
558         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
559                type->name);
560 #endif
561
562  out_unlock:
563         lock_kernel();
564         return ret;
565 }
566
567 void unregister_tracer(struct tracer *type)
568 {
569         struct tracer **t;
570         int len;
571
572         mutex_lock(&trace_types_lock);
573         for (t = &trace_types; *t; t = &(*t)->next) {
574                 if (*t == type)
575                         goto found;
576         }
577         pr_info("Trace %s not registered\n", type->name);
578         goto out;
579
580  found:
581         *t = (*t)->next;
582
583         if (type == current_trace && tracer_enabled) {
584                 tracer_enabled = 0;
585                 tracing_stop();
586                 if (current_trace->stop)
587                         current_trace->stop(&global_trace);
588                 current_trace = &nop_trace;
589         }
590
591         if (strlen(type->name) != max_tracer_type_len)
592                 goto out;
593
594         max_tracer_type_len = 0;
595         for (t = &trace_types; *t; t = &(*t)->next) {
596                 len = strlen((*t)->name);
597                 if (len > max_tracer_type_len)
598                         max_tracer_type_len = len;
599         }
600  out:
601         mutex_unlock(&trace_types_lock);
602 }
603
604 void tracing_reset(struct trace_array *tr, int cpu)
605 {
606         ftrace_disable_cpu();
607         ring_buffer_reset_cpu(tr->buffer, cpu);
608         ftrace_enable_cpu();
609 }
610
611 void tracing_reset_online_cpus(struct trace_array *tr)
612 {
613         int cpu;
614
615         tr->time_start = ftrace_now(tr->cpu);
616
617         for_each_online_cpu(cpu)
618                 tracing_reset(tr, cpu);
619 }
620
621 #define SAVED_CMDLINES 128
622 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
623 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
624 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
625 static int cmdline_idx;
626 static DEFINE_SPINLOCK(trace_cmdline_lock);
627
628 /* temporary disable recording */
629 static atomic_t trace_record_cmdline_disabled __read_mostly;
630
631 static void trace_init_cmdlines(void)
632 {
633         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
634         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
635         cmdline_idx = 0;
636 }
637
638 static int trace_stop_count;
639 static DEFINE_SPINLOCK(tracing_start_lock);
640
641 /**
642  * ftrace_off_permanent - disable all ftrace code permanently
643  *
644  * This should only be called when a serious anomally has
645  * been detected.  This will turn off the function tracing,
646  * ring buffers, and other tracing utilites. It takes no
647  * locks and can be called from any context.
648  */
649 void ftrace_off_permanent(void)
650 {
651         tracing_disabled = 1;
652         ftrace_stop();
653         tracing_off_permanent();
654 }
655
656 /**
657  * tracing_start - quick start of the tracer
658  *
659  * If tracing is enabled but was stopped by tracing_stop,
660  * this will start the tracer back up.
661  */
662 void tracing_start(void)
663 {
664         struct ring_buffer *buffer;
665         unsigned long flags;
666
667         if (tracing_disabled)
668                 return;
669
670         spin_lock_irqsave(&tracing_start_lock, flags);
671         if (--trace_stop_count) {
672                 if (trace_stop_count < 0) {
673                         /* Someone screwed up their debugging */
674                         WARN_ON_ONCE(1);
675                         trace_stop_count = 0;
676                 }
677                 goto out;
678         }
679
680
681         buffer = global_trace.buffer;
682         if (buffer)
683                 ring_buffer_record_enable(buffer);
684
685         buffer = max_tr.buffer;
686         if (buffer)
687                 ring_buffer_record_enable(buffer);
688
689         ftrace_start();
690  out:
691         spin_unlock_irqrestore(&tracing_start_lock, flags);
692 }
693
694 /**
695  * tracing_stop - quick stop of the tracer
696  *
697  * Light weight way to stop tracing. Use in conjunction with
698  * tracing_start.
699  */
700 void tracing_stop(void)
701 {
702         struct ring_buffer *buffer;
703         unsigned long flags;
704
705         ftrace_stop();
706         spin_lock_irqsave(&tracing_start_lock, flags);
707         if (trace_stop_count++)
708                 goto out;
709
710         buffer = global_trace.buffer;
711         if (buffer)
712                 ring_buffer_record_disable(buffer);
713
714         buffer = max_tr.buffer;
715         if (buffer)
716                 ring_buffer_record_disable(buffer);
717
718  out:
719         spin_unlock_irqrestore(&tracing_start_lock, flags);
720 }
721
722 void trace_stop_cmdline_recording(void);
723
724 static void trace_save_cmdline(struct task_struct *tsk)
725 {
726         unsigned map;
727         unsigned idx;
728
729         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
730                 return;
731
732         /*
733          * It's not the end of the world if we don't get
734          * the lock, but we also don't want to spin
735          * nor do we want to disable interrupts,
736          * so if we miss here, then better luck next time.
737          */
738         if (!spin_trylock(&trace_cmdline_lock))
739                 return;
740
741         idx = map_pid_to_cmdline[tsk->pid];
742         if (idx >= SAVED_CMDLINES) {
743                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
744
745                 map = map_cmdline_to_pid[idx];
746                 if (map <= PID_MAX_DEFAULT)
747                         map_pid_to_cmdline[map] = (unsigned)-1;
748
749                 map_pid_to_cmdline[tsk->pid] = idx;
750
751                 cmdline_idx = idx;
752         }
753
754         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
755
756         spin_unlock(&trace_cmdline_lock);
757 }
758
759 char *trace_find_cmdline(int pid)
760 {
761         char *cmdline = "<...>";
762         unsigned map;
763
764         if (!pid)
765                 return "<idle>";
766
767         if (pid > PID_MAX_DEFAULT)
768                 goto out;
769
770         map = map_pid_to_cmdline[pid];
771         if (map >= SAVED_CMDLINES)
772                 goto out;
773
774         cmdline = saved_cmdlines[map];
775
776  out:
777         return cmdline;
778 }
779
780 void tracing_record_cmdline(struct task_struct *tsk)
781 {
782         if (atomic_read(&trace_record_cmdline_disabled))
783                 return;
784
785         trace_save_cmdline(tsk);
786 }
787
788 void
789 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
790                              int pc)
791 {
792         struct task_struct *tsk = current;
793
794         entry->preempt_count            = pc & 0xff;
795         entry->pid                      = (tsk) ? tsk->pid : 0;
796         entry->tgid                     = (tsk) ? tsk->tgid : 0;
797         entry->flags =
798 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
799                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
800 #else
801                 TRACE_FLAG_IRQS_NOSUPPORT |
802 #endif
803                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
804                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
805                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
806 }
807
808 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
809                                                     unsigned char type,
810                                                     unsigned long len,
811                                                     unsigned long flags, int pc)
812 {
813         struct ring_buffer_event *event;
814
815         event = ring_buffer_lock_reserve(tr->buffer, len);
816         if (event != NULL) {
817                 struct trace_entry *ent = ring_buffer_event_data(event);
818
819                 tracing_generic_entry_update(ent, flags, pc);
820                 ent->type = type;
821         }
822
823         return event;
824 }
825 static void ftrace_trace_stack(struct trace_array *tr,
826                                unsigned long flags, int skip, int pc);
827 static void ftrace_trace_userstack(struct trace_array *tr,
828                                    unsigned long flags, int pc);
829
830 void trace_buffer_unlock_commit(struct trace_array *tr,
831                                 struct ring_buffer_event *event,
832                                 unsigned long flags, int pc)
833 {
834         ring_buffer_unlock_commit(tr->buffer, event);
835
836         ftrace_trace_stack(tr, flags, 6, pc);
837         ftrace_trace_userstack(tr, flags, pc);
838         trace_wake_up();
839 }
840
841 struct ring_buffer_event *
842 trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
843                                   unsigned long flags, int pc)
844 {
845         return trace_buffer_lock_reserve(&global_trace,
846                                          type, len, flags, pc);
847 }
848
849 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
850                                         unsigned long flags, int pc)
851 {
852         return trace_buffer_unlock_commit(&global_trace, event, flags, pc);
853 }
854
855 void
856 trace_function(struct trace_array *tr,
857                unsigned long ip, unsigned long parent_ip, unsigned long flags,
858                int pc)
859 {
860         struct ring_buffer_event *event;
861         struct ftrace_entry *entry;
862
863         /* If we are reading the ring buffer, don't trace */
864         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
865                 return;
866
867         event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
868                                           flags, pc);
869         if (!event)
870                 return;
871         entry   = ring_buffer_event_data(event);
872         entry->ip                       = ip;
873         entry->parent_ip                = parent_ip;
874         ring_buffer_unlock_commit(tr->buffer, event);
875 }
876
877 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
878 static void __trace_graph_entry(struct trace_array *tr,
879                                 struct ftrace_graph_ent *trace,
880                                 unsigned long flags,
881                                 int pc)
882 {
883         struct ring_buffer_event *event;
884         struct ftrace_graph_ent_entry *entry;
885
886         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
887                 return;
888
889         event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
890                                           sizeof(*entry), flags, pc);
891         if (!event)
892                 return;
893         entry   = ring_buffer_event_data(event);
894         entry->graph_ent                        = *trace;
895         ring_buffer_unlock_commit(global_trace.buffer, event);
896 }
897
898 static void __trace_graph_return(struct trace_array *tr,
899                                 struct ftrace_graph_ret *trace,
900                                 unsigned long flags,
901                                 int pc)
902 {
903         struct ring_buffer_event *event;
904         struct ftrace_graph_ret_entry *entry;
905
906         if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
907                 return;
908
909         event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
910                                           sizeof(*entry), flags, pc);
911         if (!event)
912                 return;
913         entry   = ring_buffer_event_data(event);
914         entry->ret                              = *trace;
915         ring_buffer_unlock_commit(global_trace.buffer, event);
916 }
917 #endif
918
919 void
920 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
921        unsigned long ip, unsigned long parent_ip, unsigned long flags,
922        int pc)
923 {
924         if (likely(!atomic_read(&data->disabled)))
925                 trace_function(tr, ip, parent_ip, flags, pc);
926 }
927
928 static void __ftrace_trace_stack(struct trace_array *tr,
929                                  unsigned long flags,
930                                  int skip, int pc)
931 {
932 #ifdef CONFIG_STACKTRACE
933         struct ring_buffer_event *event;
934         struct stack_entry *entry;
935         struct stack_trace trace;
936
937         event = trace_buffer_lock_reserve(tr, TRACE_STACK,
938                                           sizeof(*entry), flags, pc);
939         if (!event)
940                 return;
941         entry   = ring_buffer_event_data(event);
942         memset(&entry->caller, 0, sizeof(entry->caller));
943
944         trace.nr_entries        = 0;
945         trace.max_entries       = FTRACE_STACK_ENTRIES;
946         trace.skip              = skip;
947         trace.entries           = entry->caller;
948
949         save_stack_trace(&trace);
950         ring_buffer_unlock_commit(tr->buffer, event);
951 #endif
952 }
953
954 static void ftrace_trace_stack(struct trace_array *tr,
955                                unsigned long flags,
956                                int skip, int pc)
957 {
958         if (!(trace_flags & TRACE_ITER_STACKTRACE))
959                 return;
960
961         __ftrace_trace_stack(tr, flags, skip, pc);
962 }
963
964 void __trace_stack(struct trace_array *tr,
965                    unsigned long flags,
966                    int skip, int pc)
967 {
968         __ftrace_trace_stack(tr, flags, skip, pc);
969 }
970
971 static void ftrace_trace_userstack(struct trace_array *tr,
972                                    unsigned long flags, int pc)
973 {
974 #ifdef CONFIG_STACKTRACE
975         struct ring_buffer_event *event;
976         struct userstack_entry *entry;
977         struct stack_trace trace;
978
979         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
980                 return;
981
982         event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
983                                           sizeof(*entry), flags, pc);
984         if (!event)
985                 return;
986         entry   = ring_buffer_event_data(event);
987
988         memset(&entry->caller, 0, sizeof(entry->caller));
989
990         trace.nr_entries        = 0;
991         trace.max_entries       = FTRACE_STACK_ENTRIES;
992         trace.skip              = 0;
993         trace.entries           = entry->caller;
994
995         save_stack_trace_user(&trace);
996         ring_buffer_unlock_commit(tr->buffer, event);
997 #endif
998 }
999
1000 #ifdef UNUSED
1001 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1002 {
1003         ftrace_trace_userstack(tr, flags, preempt_count());
1004 }
1005 #endif /* UNUSED */
1006
1007 static void
1008 ftrace_trace_special(void *__tr,
1009                      unsigned long arg1, unsigned long arg2, unsigned long arg3,
1010                      int pc)
1011 {
1012         struct ring_buffer_event *event;
1013         struct trace_array *tr = __tr;
1014         struct special_entry *entry;
1015
1016         event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
1017                                           sizeof(*entry), 0, pc);
1018         if (!event)
1019                 return;
1020         entry   = ring_buffer_event_data(event);
1021         entry->arg1                     = arg1;
1022         entry->arg2                     = arg2;
1023         entry->arg3                     = arg3;
1024         trace_buffer_unlock_commit(tr, event, 0, pc);
1025 }
1026
1027 void
1028 __trace_special(void *__tr, void *__data,
1029                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
1030 {
1031         ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
1032 }
1033
1034 void
1035 tracing_sched_switch_trace(struct trace_array *tr,
1036                            struct task_struct *prev,
1037                            struct task_struct *next,
1038                            unsigned long flags, int pc)
1039 {
1040         struct ring_buffer_event *event;
1041         struct ctx_switch_entry *entry;
1042
1043         event = trace_buffer_lock_reserve(tr, TRACE_CTX,
1044                                           sizeof(*entry), flags, pc);
1045         if (!event)
1046                 return;
1047         entry   = ring_buffer_event_data(event);
1048         entry->prev_pid                 = prev->pid;
1049         entry->prev_prio                = prev->prio;
1050         entry->prev_state               = prev->state;
1051         entry->next_pid                 = next->pid;
1052         entry->next_prio                = next->prio;
1053         entry->next_state               = next->state;
1054         entry->next_cpu = task_cpu(next);
1055         trace_buffer_unlock_commit(tr, event, flags, pc);
1056 }
1057
1058 void
1059 tracing_sched_wakeup_trace(struct trace_array *tr,
1060                            struct task_struct *wakee,
1061                            struct task_struct *curr,
1062                            unsigned long flags, int pc)
1063 {
1064         struct ring_buffer_event *event;
1065         struct ctx_switch_entry *entry;
1066
1067         event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
1068                                           sizeof(*entry), flags, pc);
1069         if (!event)
1070                 return;
1071         entry   = ring_buffer_event_data(event);
1072         entry->prev_pid                 = curr->pid;
1073         entry->prev_prio                = curr->prio;
1074         entry->prev_state               = curr->state;
1075         entry->next_pid                 = wakee->pid;
1076         entry->next_prio                = wakee->prio;
1077         entry->next_state               = wakee->state;
1078         entry->next_cpu                 = task_cpu(wakee);
1079
1080         ring_buffer_unlock_commit(tr->buffer, event);
1081         ftrace_trace_stack(tr, flags, 6, pc);
1082         ftrace_trace_userstack(tr, flags, pc);
1083 }
1084
1085 void
1086 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
1087 {
1088         struct trace_array *tr = &global_trace;
1089         struct trace_array_cpu *data;
1090         unsigned long flags;
1091         int cpu;
1092         int pc;
1093
1094         if (tracing_disabled)
1095                 return;
1096
1097         pc = preempt_count();
1098         local_irq_save(flags);
1099         cpu = raw_smp_processor_id();
1100         data = tr->data[cpu];
1101
1102         if (likely(atomic_inc_return(&data->disabled) == 1))
1103                 ftrace_trace_special(tr, arg1, arg2, arg3, pc);
1104
1105         atomic_dec(&data->disabled);
1106         local_irq_restore(flags);
1107 }
1108
1109 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
1110 int trace_graph_entry(struct ftrace_graph_ent *trace)
1111 {
1112         struct trace_array *tr = &global_trace;
1113         struct trace_array_cpu *data;
1114         unsigned long flags;
1115         long disabled;
1116         int cpu;
1117         int pc;
1118
1119         if (!ftrace_trace_task(current))
1120                 return 0;
1121
1122         if (!ftrace_graph_addr(trace->func))
1123                 return 0;
1124
1125         local_irq_save(flags);
1126         cpu = raw_smp_processor_id();
1127         data = tr->data[cpu];
1128         disabled = atomic_inc_return(&data->disabled);
1129         if (likely(disabled == 1)) {
1130                 pc = preempt_count();
1131                 __trace_graph_entry(tr, trace, flags, pc);
1132         }
1133         /* Only do the atomic if it is not already set */
1134         if (!test_tsk_trace_graph(current))
1135                 set_tsk_trace_graph(current);
1136         atomic_dec(&data->disabled);
1137         local_irq_restore(flags);
1138
1139         return 1;
1140 }
1141
1142 void trace_graph_return(struct ftrace_graph_ret *trace)
1143 {
1144         struct trace_array *tr = &global_trace;
1145         struct trace_array_cpu *data;
1146         unsigned long flags;
1147         long disabled;
1148         int cpu;
1149         int pc;
1150
1151         local_irq_save(flags);
1152         cpu = raw_smp_processor_id();
1153         data = tr->data[cpu];
1154         disabled = atomic_inc_return(&data->disabled);
1155         if (likely(disabled == 1)) {
1156                 pc = preempt_count();
1157                 __trace_graph_return(tr, trace, flags, pc);
1158         }
1159         if (!trace->depth)
1160                 clear_tsk_trace_graph(current);
1161         atomic_dec(&data->disabled);
1162         local_irq_restore(flags);
1163 }
1164 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
1165
1166 enum trace_file_type {
1167         TRACE_FILE_LAT_FMT      = 1,
1168         TRACE_FILE_ANNOTATE     = 2,
1169 };
1170
1171 static void trace_iterator_increment(struct trace_iterator *iter)
1172 {
1173         /* Don't allow ftrace to trace into the ring buffers */
1174         ftrace_disable_cpu();
1175
1176         iter->idx++;
1177         if (iter->buffer_iter[iter->cpu])
1178                 ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
1179
1180         ftrace_enable_cpu();
1181 }
1182
1183 static struct trace_entry *
1184 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
1185 {
1186         struct ring_buffer_event *event;
1187         struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
1188
1189         /* Don't allow ftrace to trace into the ring buffers */
1190         ftrace_disable_cpu();
1191
1192         if (buf_iter)
1193                 event = ring_buffer_iter_peek(buf_iter, ts);
1194         else
1195                 event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
1196
1197         ftrace_enable_cpu();
1198
1199         return event ? ring_buffer_event_data(event) : NULL;
1200 }
1201
1202 static struct trace_entry *
1203 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
1204 {
1205         struct ring_buffer *buffer = iter->tr->buffer;
1206         struct trace_entry *ent, *next = NULL;
1207         int cpu_file = iter->cpu_file;
1208         u64 next_ts = 0, ts;
1209         int next_cpu = -1;
1210         int cpu;
1211
1212         /*
1213          * If we are in a per_cpu trace file, don't bother by iterating over
1214          * all cpu and peek directly.
1215          */
1216         if (cpu_file > TRACE_PIPE_ALL_CPU) {
1217                 if (ring_buffer_empty_cpu(buffer, cpu_file))
1218                         return NULL;
1219                 ent = peek_next_entry(iter, cpu_file, ent_ts);
1220                 if (ent_cpu)
1221                         *ent_cpu = cpu_file;
1222
1223                 return ent;
1224         }
1225
1226         for_each_tracing_cpu(cpu) {
1227
1228                 if (ring_buffer_empty_cpu(buffer, cpu))
1229                         continue;
1230
1231                 ent = peek_next_entry(iter, cpu, &ts);
1232
1233                 /*
1234                  * Pick the entry with the smallest timestamp:
1235                  */
1236                 if (ent && (!next || ts < next_ts)) {
1237                         next = ent;
1238                         next_cpu = cpu;
1239                         next_ts = ts;
1240                 }
1241         }
1242
1243         if (ent_cpu)
1244                 *ent_cpu = next_cpu;
1245
1246         if (ent_ts)
1247                 *ent_ts = next_ts;
1248
1249         return next;
1250 }
1251
1252 /* Find the next real entry, without updating the iterator itself */
1253 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
1254                                           int *ent_cpu, u64 *ent_ts)
1255 {
1256         return __find_next_entry(iter, ent_cpu, ent_ts);
1257 }
1258
1259 /* Find the next real entry, and increment the iterator to the next entry */
1260 static void *find_next_entry_inc(struct trace_iterator *iter)
1261 {
1262         iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
1263
1264         if (iter->ent)
1265                 trace_iterator_increment(iter);
1266
1267         return iter->ent ? iter : NULL;
1268 }
1269
1270 static void trace_consume(struct trace_iterator *iter)
1271 {
1272         /* Don't allow ftrace to trace into the ring buffers */
1273         ftrace_disable_cpu();
1274         ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
1275         ftrace_enable_cpu();
1276 }
1277
1278 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1279 {
1280         struct trace_iterator *iter = m->private;
1281         int i = (int)*pos;
1282         void *ent;
1283
1284         (*pos)++;
1285
1286         /* can't go backwards */
1287         if (iter->idx > i)
1288                 return NULL;
1289
1290         if (iter->idx < 0)
1291                 ent = find_next_entry_inc(iter);
1292         else
1293                 ent = iter;
1294
1295         while (ent && iter->idx < i)
1296                 ent = find_next_entry_inc(iter);
1297
1298         iter->pos = *pos;
1299
1300         return ent;
1301 }
1302
1303 /*
1304  * No necessary locking here. The worst thing which can
1305  * happen is loosing events consumed at the same time
1306  * by a trace_pipe reader.
1307  * Other than that, we don't risk to crash the ring buffer
1308  * because it serializes the readers.
1309  *
1310  * The current tracer is copied to avoid a global locking
1311  * all around.
1312  */
1313 static void *s_start(struct seq_file *m, loff_t *pos)
1314 {
1315         struct trace_iterator *iter = m->private;
1316         static struct tracer *old_tracer;
1317         int cpu_file = iter->cpu_file;
1318         void *p = NULL;
1319         loff_t l = 0;
1320         int cpu;
1321
1322         /* copy the tracer to avoid using a global lock all around */
1323         mutex_lock(&trace_types_lock);
1324         if (unlikely(old_tracer != current_trace && current_trace)) {
1325                 old_tracer = current_trace;
1326                 *iter->trace = *current_trace;
1327         }
1328         mutex_unlock(&trace_types_lock);
1329
1330         atomic_inc(&trace_record_cmdline_disabled);
1331
1332         if (*pos != iter->pos) {
1333                 iter->ent = NULL;
1334                 iter->cpu = 0;
1335                 iter->idx = -1;
1336
1337                 ftrace_disable_cpu();
1338
1339                 if (cpu_file == TRACE_PIPE_ALL_CPU) {
1340                         for_each_tracing_cpu(cpu)
1341                                 ring_buffer_iter_reset(iter->buffer_iter[cpu]);
1342                 } else
1343                         ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
1344
1345
1346                 ftrace_enable_cpu();
1347
1348                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1349                         ;
1350
1351         } else {
1352                 l = *pos - 1;
1353                 p = s_next(m, p, &l);
1354         }
1355
1356         return p;
1357 }
1358
1359 static void s_stop(struct seq_file *m, void *p)
1360 {
1361         atomic_dec(&trace_record_cmdline_disabled);
1362 }
1363
1364 static void print_lat_help_header(struct seq_file *m)
1365 {
1366         seq_puts(m, "#                  _------=> CPU#            \n");
1367         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1368         seq_puts(m, "#                | / _----=> need-resched    \n");
1369         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1370         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1371         seq_puts(m, "#                |||| /                      \n");
1372         seq_puts(m, "#                |||||     delay             \n");
1373         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1374         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1375 }
1376
1377 static void print_func_help_header(struct seq_file *m)
1378 {
1379         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1380         seq_puts(m, "#              | |       |          |         |\n");
1381 }
1382
1383
1384 static void
1385 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1386 {
1387         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1388         struct trace_array *tr = iter->tr;
1389         struct trace_array_cpu *data = tr->data[tr->cpu];
1390         struct tracer *type = current_trace;
1391         unsigned long total;
1392         unsigned long entries;
1393         const char *name = "preemption";
1394
1395         if (type)
1396                 name = type->name;
1397
1398         entries = ring_buffer_entries(iter->tr->buffer);
1399         total = entries +
1400                 ring_buffer_overruns(iter->tr->buffer);
1401
1402         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1403                    name, UTS_RELEASE);
1404         seq_puts(m, "-----------------------------------"
1405                  "---------------------------------\n");
1406         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1407                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1408                    nsecs_to_usecs(data->saved_latency),
1409                    entries,
1410                    total,
1411                    tr->cpu,
1412 #if defined(CONFIG_PREEMPT_NONE)
1413                    "server",
1414 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1415                    "desktop",
1416 #elif defined(CONFIG_PREEMPT)
1417                    "preempt",
1418 #else
1419                    "unknown",
1420 #endif
1421                    /* These are reserved for later use */
1422                    0, 0, 0, 0);
1423 #ifdef CONFIG_SMP
1424         seq_printf(m, " #P:%d)\n", num_online_cpus());
1425 #else
1426         seq_puts(m, ")\n");
1427 #endif
1428         seq_puts(m, "    -----------------\n");
1429         seq_printf(m, "    | task: %.16s-%d "
1430                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1431                    data->comm, data->pid, data->uid, data->nice,
1432                    data->policy, data->rt_priority);
1433         seq_puts(m, "    -----------------\n");
1434
1435         if (data->critical_start) {
1436                 seq_puts(m, " => started at: ");
1437                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1438                 trace_print_seq(m, &iter->seq);
1439                 seq_puts(m, "\n => ended at:   ");
1440                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1441                 trace_print_seq(m, &iter->seq);
1442                 seq_puts(m, "\n");
1443         }
1444
1445         seq_puts(m, "\n");
1446 }
1447
1448 static void test_cpu_buff_start(struct trace_iterator *iter)
1449 {
1450         struct trace_seq *s = &iter->seq;
1451
1452         if (!(trace_flags & TRACE_ITER_ANNOTATE))
1453                 return;
1454
1455         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
1456                 return;
1457
1458         if (cpumask_test_cpu(iter->cpu, iter->started))
1459                 return;
1460
1461         cpumask_set_cpu(iter->cpu, iter->started);
1462         trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
1463 }
1464
1465 static enum print_line_t print_lat_fmt(struct trace_iterator *iter)
1466 {
1467         struct trace_seq *s = &iter->seq;
1468         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1469         struct trace_event *event;
1470         struct trace_entry *entry = iter->ent;
1471
1472         test_cpu_buff_start(iter);
1473
1474         event = ftrace_find_event(entry->type);
1475
1476         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1477                 if (!trace_print_lat_context(iter))
1478                         goto partial;
1479         }
1480
1481         if (event)
1482                 return event->latency_trace(iter, sym_flags);
1483
1484         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1485                 goto partial;
1486
1487         return TRACE_TYPE_HANDLED;
1488 partial:
1489         return TRACE_TYPE_PARTIAL_LINE;
1490 }
1491
1492 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
1493 {
1494         struct trace_seq *s = &iter->seq;
1495         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1496         struct trace_entry *entry;
1497         struct trace_event *event;
1498
1499         entry = iter->ent;
1500
1501         test_cpu_buff_start(iter);
1502
1503         event = ftrace_find_event(entry->type);
1504
1505         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1506                 if (!trace_print_context(iter))
1507                         goto partial;
1508         }
1509
1510         if (event)
1511                 return event->trace(iter, sym_flags);
1512
1513         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
1514                 goto partial;
1515
1516         return TRACE_TYPE_HANDLED;
1517 partial:
1518         return TRACE_TYPE_PARTIAL_LINE;
1519 }
1520
1521 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
1522 {
1523         struct trace_seq *s = &iter->seq;
1524         struct trace_entry *entry;
1525         struct trace_event *event;
1526
1527         entry = iter->ent;
1528
1529         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1530                 if (!trace_seq_printf(s, "%d %d %llu ",
1531                                       entry->pid, iter->cpu, iter->ts))
1532                         goto partial;
1533         }
1534
1535         event = ftrace_find_event(entry->type);
1536         if (event)
1537                 return event->raw(iter, 0);
1538
1539         if (!trace_seq_printf(s, "%d ?\n", entry->type))
1540                 goto partial;
1541
1542         return TRACE_TYPE_HANDLED;
1543 partial:
1544         return TRACE_TYPE_PARTIAL_LINE;
1545 }
1546
1547 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
1548 {
1549         struct trace_seq *s = &iter->seq;
1550         unsigned char newline = '\n';
1551         struct trace_entry *entry;
1552         struct trace_event *event;
1553
1554         entry = iter->ent;
1555
1556         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1557                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1558                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1559                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
1560         }
1561
1562         event = ftrace_find_event(entry->type);
1563         if (event) {
1564                 enum print_line_t ret = event->hex(iter, 0);
1565                 if (ret != TRACE_TYPE_HANDLED)
1566                         return ret;
1567         }
1568
1569         SEQ_PUT_FIELD_RET(s, newline);
1570
1571         return TRACE_TYPE_HANDLED;
1572 }
1573
1574 static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
1575 {
1576         struct trace_seq *s = &iter->seq;
1577         struct trace_entry *entry = iter->ent;
1578         struct print_entry *field;
1579         int ret;
1580
1581         trace_assign_type(field, entry);
1582
1583         ret = trace_seq_printf(s, "%s", field->buf);
1584         if (!ret)
1585                 return TRACE_TYPE_PARTIAL_LINE;
1586
1587         return TRACE_TYPE_HANDLED;
1588 }
1589
1590 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1591 {
1592         struct trace_seq *s = &iter->seq;
1593         struct trace_entry *entry;
1594         struct trace_event *event;
1595
1596         entry = iter->ent;
1597
1598         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
1599                 SEQ_PUT_FIELD_RET(s, entry->pid);
1600                 SEQ_PUT_FIELD_RET(s, iter->cpu);
1601                 SEQ_PUT_FIELD_RET(s, iter->ts);
1602         }
1603
1604         event = ftrace_find_event(entry->type);
1605         return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
1606 }
1607
1608 static int trace_empty(struct trace_iterator *iter)
1609 {
1610         int cpu;
1611
1612         for_each_tracing_cpu(cpu) {
1613                 if (iter->buffer_iter[cpu]) {
1614                         if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
1615                                 return 0;
1616                 } else {
1617                         if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
1618                                 return 0;
1619                 }
1620         }
1621
1622         return 1;
1623 }
1624
1625 static enum print_line_t print_trace_line(struct trace_iterator *iter)
1626 {
1627         enum print_line_t ret;
1628
1629         if (iter->trace && iter->trace->print_line) {
1630                 ret = iter->trace->print_line(iter);
1631                 if (ret != TRACE_TYPE_UNHANDLED)
1632                         return ret;
1633         }
1634
1635         if (iter->ent->type == TRACE_PRINT &&
1636                         trace_flags & TRACE_ITER_PRINTK &&
1637                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
1638                 return print_printk_msg_only(iter);
1639
1640         if (trace_flags & TRACE_ITER_BIN)
1641                 return print_bin_fmt(iter);
1642
1643         if (trace_flags & TRACE_ITER_HEX)
1644                 return print_hex_fmt(iter);
1645
1646         if (trace_flags & TRACE_ITER_RAW)
1647                 return print_raw_fmt(iter);
1648
1649         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1650                 return print_lat_fmt(iter);
1651
1652         return print_trace_fmt(iter);
1653 }
1654
1655 static int s_show(struct seq_file *m, void *v)
1656 {
1657         struct trace_iterator *iter = v;
1658
1659         if (iter->ent == NULL) {
1660                 if (iter->tr) {
1661                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1662                         seq_puts(m, "#\n");
1663                 }
1664                 if (iter->trace && iter->trace->print_header)
1665                         iter->trace->print_header(m);
1666                 else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1667                         /* print nothing if the buffers are empty */
1668                         if (trace_empty(iter))
1669                                 return 0;
1670                         print_trace_header(m, iter);
1671                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1672                                 print_lat_help_header(m);
1673                 } else {
1674                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1675                                 print_func_help_header(m);
1676                 }
1677         } else {
1678                 print_trace_line(iter);
1679                 trace_print_seq(m, &iter->seq);
1680         }
1681
1682         return 0;
1683 }
1684
1685 static struct seq_operations tracer_seq_ops = {
1686         .start          = s_start,
1687         .next           = s_next,
1688         .stop           = s_stop,
1689         .show           = s_show,
1690 };
1691
1692 static struct trace_iterator *
1693 __tracing_open(struct inode *inode, struct file *file)
1694 {
1695         long cpu_file = (long) inode->i_private;
1696         void *fail_ret = ERR_PTR(-ENOMEM);
1697         struct trace_iterator *iter;
1698         struct seq_file *m;
1699         int cpu, ret;
1700
1701         if (tracing_disabled)
1702                 return ERR_PTR(-ENODEV);
1703
1704         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1705         if (!iter)
1706                 return ERR_PTR(-ENOMEM);
1707
1708         /*
1709          * We make a copy of the current tracer to avoid concurrent
1710          * changes on it while we are reading.
1711          */
1712         mutex_lock(&trace_types_lock);
1713         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
1714         if (!iter->trace)
1715                 goto fail;
1716
1717         if (current_trace)
1718                 *iter->trace = *current_trace;
1719
1720         if (current_trace && current_trace->print_max)
1721                 iter->tr = &max_tr;
1722         else
1723                 iter->tr = &global_trace;
1724         iter->pos = -1;
1725         mutex_init(&iter->mutex);
1726         iter->cpu_file = cpu_file;
1727
1728         /* Notify the tracer early; before we stop tracing. */
1729         if (iter->trace && iter->trace->open)
1730                 iter->trace->open(iter);
1731
1732         /* Annotate start of buffers if we had overruns */
1733         if (ring_buffer_overruns(iter->tr->buffer))
1734                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
1735
1736         if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
1737                 for_each_tracing_cpu(cpu) {
1738
1739                         iter->buffer_iter[cpu] =
1740                                 ring_buffer_read_start(iter->tr->buffer, cpu);
1741
1742                         if (!iter->buffer_iter[cpu])
1743                                 goto fail_buffer;
1744                 }
1745         } else {
1746                 cpu = iter->cpu_file;
1747                 iter->buffer_iter[cpu] =
1748                                 ring_buffer_read_start(iter->tr->buffer, cpu);
1749
1750                 if (!iter->buffer_iter[cpu])
1751                         goto fail;
1752         }
1753
1754         /* TODO stop tracer */
1755         ret = seq_open(file, &tracer_seq_ops);
1756         if (ret < 0) {
1757                 fail_ret = ERR_PTR(ret);
1758                 goto fail_buffer;
1759         }
1760
1761         m = file->private_data;
1762         m->private = iter;
1763
1764         /* stop the trace while dumping */
1765         tracing_stop();
1766
1767         mutex_unlock(&trace_types_lock);
1768
1769         return iter;
1770
1771  fail_buffer:
1772         for_each_tracing_cpu(cpu) {
1773                 if (iter->buffer_iter[cpu])
1774                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1775         }
1776  fail:
1777         mutex_unlock(&trace_types_lock);
1778         kfree(iter->trace);
1779         kfree(iter);
1780
1781         return fail_ret;
1782 }
1783
1784 int tracing_open_generic(struct inode *inode, struct file *filp)
1785 {
1786         if (tracing_disabled)
1787                 return -ENODEV;
1788
1789         filp->private_data = inode->i_private;
1790         return 0;
1791 }
1792
1793 static int tracing_release(struct inode *inode, struct file *file)
1794 {
1795         struct seq_file *m = (struct seq_file *)file->private_data;
1796         struct trace_iterator *iter = m->private;
1797         int cpu;
1798
1799         mutex_lock(&trace_types_lock);
1800         for_each_tracing_cpu(cpu) {
1801                 if (iter->buffer_iter[cpu])
1802                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
1803         }
1804
1805         if (iter->trace && iter->trace->close)
1806                 iter->trace->close(iter);
1807
1808         /* reenable tracing if it was previously enabled */
1809         tracing_start();
1810         mutex_unlock(&trace_types_lock);
1811
1812         seq_release(inode, file);
1813         mutex_destroy(&iter->mutex);
1814         kfree(iter->trace);
1815         kfree(iter);
1816         return 0;
1817 }
1818
1819 static int tracing_open(struct inode *inode, struct file *file)
1820 {
1821         struct trace_iterator *iter;
1822         int ret = 0;
1823
1824         iter = __tracing_open(inode, file);
1825         if (IS_ERR(iter))
1826                 ret = PTR_ERR(iter);
1827
1828         return ret;
1829 }
1830
1831 static int tracing_lt_open(struct inode *inode, struct file *file)
1832 {
1833         struct trace_iterator *iter;
1834         int ret = 0;
1835
1836         iter = __tracing_open(inode, file);
1837
1838         if (IS_ERR(iter))
1839                 ret = PTR_ERR(iter);
1840         else
1841                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1842
1843         return ret;
1844 }
1845
1846
1847 static void *
1848 t_next(struct seq_file *m, void *v, loff_t *pos)
1849 {
1850         struct tracer *t = m->private;
1851
1852         (*pos)++;
1853
1854         if (t)
1855                 t = t->next;
1856
1857         m->private = t;
1858
1859         return t;
1860 }
1861
1862 static void *t_start(struct seq_file *m, loff_t *pos)
1863 {
1864         struct tracer *t = m->private;
1865         loff_t l = 0;
1866
1867         mutex_lock(&trace_types_lock);
1868         for (; t && l < *pos; t = t_next(m, t, &l))
1869                 ;
1870
1871         return t;
1872 }
1873
1874 static void t_stop(struct seq_file *m, void *p)
1875 {
1876         mutex_unlock(&trace_types_lock);
1877 }
1878
1879 static int t_show(struct seq_file *m, void *v)
1880 {
1881         struct tracer *t = v;
1882
1883         if (!t)
1884                 return 0;
1885
1886         seq_printf(m, "%s", t->name);
1887         if (t->next)
1888                 seq_putc(m, ' ');
1889         else
1890                 seq_putc(m, '\n');
1891
1892         return 0;
1893 }
1894
1895 static struct seq_operations show_traces_seq_ops = {
1896         .start          = t_start,
1897         .next           = t_next,
1898         .stop           = t_stop,
1899         .show           = t_show,
1900 };
1901
1902 static int show_traces_open(struct inode *inode, struct file *file)
1903 {
1904         int ret;
1905
1906         if (tracing_disabled)
1907                 return -ENODEV;
1908
1909         ret = seq_open(file, &show_traces_seq_ops);
1910         if (!ret) {
1911                 struct seq_file *m = file->private_data;
1912                 m->private = trace_types;
1913         }
1914
1915         return ret;
1916 }
1917
1918 static struct file_operations tracing_fops = {
1919         .open           = tracing_open,
1920         .read           = seq_read,
1921         .llseek         = seq_lseek,
1922         .release        = tracing_release,
1923 };
1924
1925 static struct file_operations tracing_lt_fops = {
1926         .open           = tracing_lt_open,
1927         .read           = seq_read,
1928         .llseek         = seq_lseek,
1929         .release        = tracing_release,
1930 };
1931
1932 static struct file_operations show_traces_fops = {
1933         .open           = show_traces_open,
1934         .read           = seq_read,
1935         .release        = seq_release,
1936 };
1937
1938 /*
1939  * Only trace on a CPU if the bitmask is set:
1940  */
1941 static cpumask_var_t tracing_cpumask;
1942
1943 /*
1944  * The tracer itself will not take this lock, but still we want
1945  * to provide a consistent cpumask to user-space:
1946  */
1947 static DEFINE_MUTEX(tracing_cpumask_update_lock);
1948
1949 /*
1950  * Temporary storage for the character representation of the
1951  * CPU bitmask (and one more byte for the newline):
1952  */
1953 static char mask_str[NR_CPUS + 1];
1954
1955 static ssize_t
1956 tracing_cpumask_read(struct file *filp, char __user *ubuf,
1957                      size_t count, loff_t *ppos)
1958 {
1959         int len;
1960
1961         mutex_lock(&tracing_cpumask_update_lock);
1962
1963         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
1964         if (count - len < 2) {
1965                 count = -EINVAL;
1966                 goto out_err;
1967         }
1968         len += sprintf(mask_str + len, "\n");
1969         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
1970
1971 out_err:
1972         mutex_unlock(&tracing_cpumask_update_lock);
1973
1974         return count;
1975 }
1976
1977 static ssize_t
1978 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1979                       size_t count, loff_t *ppos)
1980 {
1981         int err, cpu;
1982         cpumask_var_t tracing_cpumask_new;
1983
1984         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
1985                 return -ENOMEM;
1986
1987         mutex_lock(&tracing_cpumask_update_lock);
1988         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
1989         if (err)
1990                 goto err_unlock;
1991
1992         local_irq_disable();
1993         __raw_spin_lock(&ftrace_max_lock);
1994         for_each_tracing_cpu(cpu) {
1995                 /*
1996                  * Increase/decrease the disabled counter if we are
1997                  * about to flip a bit in the cpumask:
1998                  */
1999                 if (cpumask_test_cpu(cpu, tracing_cpumask) &&
2000                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2001                         atomic_inc(&global_trace.data[cpu]->disabled);
2002                 }
2003                 if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
2004                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
2005                         atomic_dec(&global_trace.data[cpu]->disabled);
2006                 }
2007         }
2008         __raw_spin_unlock(&ftrace_max_lock);
2009         local_irq_enable();
2010
2011         cpumask_copy(tracing_cpumask, tracing_cpumask_new);
2012
2013         mutex_unlock(&tracing_cpumask_update_lock);
2014         free_cpumask_var(tracing_cpumask_new);
2015
2016         return count;
2017
2018 err_unlock:
2019         mutex_unlock(&tracing_cpumask_update_lock);
2020         free_cpumask_var(tracing_cpumask);
2021
2022         return err;
2023 }
2024
2025 static struct file_operations tracing_cpumask_fops = {
2026         .open           = tracing_open_generic,
2027         .read           = tracing_cpumask_read,
2028         .write          = tracing_cpumask_write,
2029 };
2030
2031 static ssize_t
2032 tracing_trace_options_read(struct file *filp, char __user *ubuf,
2033                        size_t cnt, loff_t *ppos)
2034 {
2035         struct tracer_opt *trace_opts;
2036         u32 tracer_flags;
2037         int len = 0;
2038         char *buf;
2039         int r = 0;
2040         int i;
2041
2042
2043         /* calculate max size */
2044         for (i = 0; trace_options[i]; i++) {
2045                 len += strlen(trace_options[i]);
2046                 len += 3; /* "no" and newline */
2047         }
2048
2049         mutex_lock(&trace_types_lock);
2050         tracer_flags = current_trace->flags->val;
2051         trace_opts = current_trace->flags->opts;
2052
2053         /*
2054          * Increase the size with names of options specific
2055          * of the current tracer.
2056          */
2057         for (i = 0; trace_opts[i].name; i++) {
2058                 len += strlen(trace_opts[i].name);
2059                 len += 3; /* "no" and newline */
2060         }
2061
2062         /* +2 for \n and \0 */
2063         buf = kmalloc(len + 2, GFP_KERNEL);
2064         if (!buf) {
2065                 mutex_unlock(&trace_types_lock);
2066                 return -ENOMEM;
2067         }
2068
2069         for (i = 0; trace_options[i]; i++) {
2070                 if (trace_flags & (1 << i))
2071                         r += sprintf(buf + r, "%s\n", trace_options[i]);
2072                 else
2073                         r += sprintf(buf + r, "no%s\n", trace_options[i]);
2074         }
2075
2076         for (i = 0; trace_opts[i].name; i++) {
2077                 if (tracer_flags & trace_opts[i].bit)
2078                         r += sprintf(buf + r, "%s\n",
2079                                 trace_opts[i].name);
2080                 else
2081                         r += sprintf(buf + r, "no%s\n",
2082                                 trace_opts[i].name);
2083         }
2084         mutex_unlock(&trace_types_lock);
2085
2086         WARN_ON(r >= len + 2);
2087
2088         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2089
2090         kfree(buf);
2091         return r;
2092 }
2093
2094 /* Try to assign a tracer specific option */
2095 static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2096 {
2097         struct tracer_flags *trace_flags = trace->flags;
2098         struct tracer_opt *opts = NULL;
2099         int ret = 0, i = 0;
2100         int len;
2101
2102         for (i = 0; trace_flags->opts[i].name; i++) {
2103                 opts = &trace_flags->opts[i];
2104                 len = strlen(opts->name);
2105
2106                 if (strncmp(cmp, opts->name, len) == 0) {
2107                         ret = trace->set_flag(trace_flags->val,
2108                                 opts->bit, !neg);
2109                         break;
2110                 }
2111         }
2112         /* Not found */
2113         if (!trace_flags->opts[i].name)
2114                 return -EINVAL;
2115
2116         /* Refused to handle */
2117         if (ret)
2118                 return ret;
2119
2120         if (neg)
2121                 trace_flags->val &= ~opts->bit;
2122         else
2123                 trace_flags->val |= opts->bit;
2124
2125         return 0;
2126 }
2127
2128 static ssize_t
2129 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2130                         size_t cnt, loff_t *ppos)
2131 {
2132         char buf[64];
2133         char *cmp = buf;
2134         int neg = 0;
2135         int ret;
2136         int i;
2137
2138         if (cnt >= sizeof(buf))
2139                 return -EINVAL;
2140
2141         if (copy_from_user(&buf, ubuf, cnt))
2142                 return -EFAULT;
2143
2144         buf[cnt] = 0;
2145
2146         if (strncmp(buf, "no", 2) == 0) {
2147                 neg = 1;
2148                 cmp += 2;
2149         }
2150
2151         for (i = 0; trace_options[i]; i++) {
2152                 int len = strlen(trace_options[i]);
2153
2154                 if (strncmp(cmp, trace_options[i], len) == 0) {
2155                         if (neg)
2156                                 trace_flags &= ~(1 << i);
2157                         else
2158                                 trace_flags |= (1 << i);
2159                         break;
2160                 }
2161         }
2162
2163         /* If no option could be set, test the specific tracer options */
2164         if (!trace_options[i]) {
2165                 mutex_lock(&trace_types_lock);
2166                 ret = set_tracer_option(current_trace, cmp, neg);
2167                 mutex_unlock(&trace_types_lock);
2168                 if (ret)
2169                         return ret;
2170         }
2171
2172         filp->f_pos += cnt;
2173
2174         return cnt;
2175 }
2176
2177 static struct file_operations tracing_iter_fops = {
2178         .open           = tracing_open_generic,
2179         .read           = tracing_trace_options_read,
2180         .write          = tracing_trace_options_write,
2181 };
2182
2183 static const char readme_msg[] =
2184         "tracing mini-HOWTO:\n\n"
2185         "# mkdir /debug\n"
2186         "# mount -t debugfs nodev /debug\n\n"
2187         "# cat /debug/tracing/available_tracers\n"
2188         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2189         "# cat /debug/tracing/current_tracer\n"
2190         "none\n"
2191         "# echo sched_switch > /debug/tracing/current_tracer\n"
2192         "# cat /debug/tracing/current_tracer\n"
2193         "sched_switch\n"
2194         "# cat /debug/tracing/trace_options\n"
2195         "noprint-parent nosym-offset nosym-addr noverbose\n"
2196         "# echo print-parent > /debug/tracing/trace_options\n"
2197         "# echo 1 > /debug/tracing/tracing_enabled\n"
2198         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2199         "echo 0 > /debug/tracing/tracing_enabled\n"
2200 ;
2201
2202 static ssize_t
2203 tracing_readme_read(struct file *filp, char __user *ubuf,
2204                        size_t cnt, loff_t *ppos)
2205 {
2206         return simple_read_from_buffer(ubuf, cnt, ppos,
2207                                         readme_msg, strlen(readme_msg));
2208 }
2209
2210 static struct file_operations tracing_readme_fops = {
2211         .open           = tracing_open_generic,
2212         .read           = tracing_readme_read,
2213 };
2214
2215 static ssize_t
2216 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2217                   size_t cnt, loff_t *ppos)
2218 {
2219         char buf[64];
2220         int r;
2221
2222         r = sprintf(buf, "%u\n", tracer_enabled);
2223         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2224 }
2225
2226 static ssize_t
2227 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2228                    size_t cnt, loff_t *ppos)
2229 {
2230         struct trace_array *tr = filp->private_data;
2231         char buf[64];
2232         unsigned long val;
2233         int ret;
2234
2235         if (cnt >= sizeof(buf))
2236                 return -EINVAL;
2237
2238         if (copy_from_user(&buf, ubuf, cnt))
2239                 return -EFAULT;
2240
2241         buf[cnt] = 0;
2242
2243         ret = strict_strtoul(buf, 10, &val);
2244         if (ret < 0)
2245                 return ret;
2246
2247         val = !!val;
2248
2249         mutex_lock(&trace_types_lock);
2250         if (tracer_enabled ^ val) {
2251                 if (val) {
2252                         tracer_enabled = 1;
2253                         if (current_trace->start)
2254                                 current_trace->start(tr);
2255                         tracing_start();
2256                 } else {
2257                         tracer_enabled = 0;
2258                         tracing_stop();
2259                         if (current_trace->stop)
2260                                 current_trace->stop(tr);
2261                 }
2262         }
2263         mutex_unlock(&trace_types_lock);
2264
2265         filp->f_pos += cnt;
2266
2267         return cnt;
2268 }
2269
2270 static ssize_t
2271 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2272                        size_t cnt, loff_t *ppos)
2273 {
2274         char buf[max_tracer_type_len+2];
2275         int r;
2276
2277         mutex_lock(&trace_types_lock);
2278         if (current_trace)
2279                 r = sprintf(buf, "%s\n", current_trace->name);
2280         else
2281                 r = sprintf(buf, "\n");
2282         mutex_unlock(&trace_types_lock);
2283
2284         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2285 }
2286
2287 int tracer_init(struct tracer *t, struct trace_array *tr)
2288 {
2289         tracing_reset_online_cpus(tr);
2290         return t->init(tr);
2291 }
2292
2293 struct trace_option_dentry;
2294
2295 static struct trace_option_dentry *
2296 create_trace_option_files(struct tracer *tracer);
2297
2298 static void
2299 destroy_trace_option_files(struct trace_option_dentry *topts);
2300
2301 static int tracing_set_tracer(const char *buf)
2302 {
2303         static struct trace_option_dentry *topts;
2304         struct trace_array *tr = &global_trace;
2305         struct tracer *t;
2306         int ret = 0;
2307
2308         mutex_lock(&trace_types_lock);
2309         for (t = trace_types; t; t = t->next) {
2310                 if (strcmp(t->name, buf) == 0)
2311                         break;
2312         }
2313         if (!t) {
2314                 ret = -EINVAL;
2315                 goto out;
2316         }
2317         if (t == current_trace)
2318                 goto out;
2319
2320         trace_branch_disable();
2321         if (current_trace && current_trace->reset)
2322                 current_trace->reset(tr);
2323
2324         destroy_trace_option_files(topts);
2325
2326         current_trace = t;
2327
2328         topts = create_trace_option_files(current_trace);
2329
2330         if (t->init) {
2331                 ret = tracer_init(t, tr);
2332                 if (ret)
2333                         goto out;
2334         }
2335
2336         trace_branch_enable(tr);
2337  out:
2338         mutex_unlock(&trace_types_lock);
2339
2340         return ret;
2341 }
2342
2343 static ssize_t
2344 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2345                         size_t cnt, loff_t *ppos)
2346 {
2347         char buf[max_tracer_type_len+1];
2348         int i;
2349         size_t ret;
2350         int err;
2351
2352         ret = cnt;
2353
2354         if (cnt > max_tracer_type_len)
2355                 cnt = max_tracer_type_len;
2356
2357         if (copy_from_user(&buf, ubuf, cnt))
2358                 return -EFAULT;
2359
2360         buf[cnt] = 0;
2361
2362         /* strip ending whitespace. */
2363         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2364                 buf[i] = 0;
2365
2366         err = tracing_set_tracer(buf);
2367         if (err)
2368                 return err;
2369
2370         filp->f_pos += ret;
2371
2372         return ret;
2373 }
2374
2375 static ssize_t
2376 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2377                      size_t cnt, loff_t *ppos)
2378 {
2379         unsigned long *ptr = filp->private_data;
2380         char buf[64];
2381         int r;
2382
2383         r = snprintf(buf, sizeof(buf), "%ld\n",
2384                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2385         if (r > sizeof(buf))
2386                 r = sizeof(buf);
2387         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2388 }
2389
2390 static ssize_t
2391 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2392                       size_t cnt, loff_t *ppos)
2393 {
2394         unsigned long *ptr = filp->private_data;
2395         char buf[64];
2396         unsigned long val;
2397         int ret;
2398
2399         if (cnt >= sizeof(buf))
2400                 return -EINVAL;
2401
2402         if (copy_from_user(&buf, ubuf, cnt))
2403                 return -EFAULT;
2404
2405         buf[cnt] = 0;
2406
2407         ret = strict_strtoul(buf, 10, &val);
2408         if (ret < 0)
2409                 return ret;
2410
2411         *ptr = val * 1000;
2412
2413         return cnt;
2414 }
2415
2416 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2417 {
2418         long cpu_file = (long) inode->i_private;
2419         struct trace_iterator *iter;
2420         int ret = 0;
2421
2422         if (tracing_disabled)
2423                 return -ENODEV;
2424
2425         mutex_lock(&trace_types_lock);
2426
2427         /* We only allow one reader per cpu */
2428         if (cpu_file == TRACE_PIPE_ALL_CPU) {
2429                 if (!cpumask_empty(tracing_reader_cpumask)) {
2430                         ret = -EBUSY;
2431                         goto out;
2432                 }
2433                 cpumask_setall(tracing_reader_cpumask);
2434         } else {
2435                 if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
2436                         cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
2437                 else {
2438                         ret = -EBUSY;
2439                         goto out;
2440                 }
2441         }
2442
2443         /* create a buffer to store the information to pass to userspace */
2444         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2445         if (!iter) {
2446                 ret = -ENOMEM;
2447                 goto out;
2448         }
2449
2450         /*
2451          * We make a copy of the current tracer to avoid concurrent
2452          * changes on it while we are reading.
2453          */
2454         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
2455         if (!iter->trace) {
2456                 ret = -ENOMEM;
2457                 goto fail;
2458         }
2459         if (current_trace)
2460                 *iter->trace = *current_trace;
2461
2462         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
2463                 ret = -ENOMEM;
2464                 goto fail;
2465         }
2466
2467         /* trace pipe does not show start of buffer */
2468         cpumask_setall(iter->started);
2469
2470         iter->cpu_file = cpu_file;
2471         iter->tr = &global_trace;
2472         mutex_init(&iter->mutex);
2473         filp->private_data = iter;
2474
2475         if (iter->trace->pipe_open)
2476                 iter->trace->pipe_open(iter);
2477
2478 out:
2479         mutex_unlock(&trace_types_lock);
2480         return ret;
2481
2482 fail:
2483         kfree(iter->trace);
2484         kfree(iter);
2485         mutex_unlock(&trace_types_lock);
2486         return ret;
2487 }
2488
2489 static int tracing_release_pipe(struct inode *inode, struct file *file)
2490 {
2491         struct trace_iterator *iter = file->private_data;
2492
2493         mutex_lock(&trace_types_lock);
2494
2495         if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
2496                 cpumask_clear(tracing_reader_cpumask);
2497         else
2498                 cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
2499
2500         mutex_unlock(&trace_types_lock);
2501
2502         free_cpumask_var(iter->started);
2503         mutex_destroy(&iter->mutex);
2504         kfree(iter->trace);
2505         kfree(iter);
2506
2507         return 0;
2508 }
2509
2510 static unsigned int
2511 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2512 {
2513         struct trace_iterator *iter = filp->private_data;
2514
2515         if (trace_flags & TRACE_ITER_BLOCK) {
2516                 /*
2517                  * Always select as readable when in blocking mode
2518                  */
2519                 return POLLIN | POLLRDNORM;
2520         } else {
2521                 if (!trace_empty(iter))
2522                         return POLLIN | POLLRDNORM;
2523                 poll_wait(filp, &trace_wait, poll_table);
2524                 if (!trace_empty(iter))
2525                         return POLLIN | POLLRDNORM;
2526
2527                 return 0;
2528         }
2529 }
2530
2531
2532 void default_wait_pipe(struct trace_iterator *iter)
2533 {
2534         DEFINE_WAIT(wait);
2535
2536         prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
2537
2538         if (trace_empty(iter))
2539                 schedule();
2540
2541         finish_wait(&trace_wait, &wait);
2542 }
2543
2544 /*
2545  * This is a make-shift waitqueue.
2546  * A tracer might use this callback on some rare cases:
2547  *
2548  *  1) the current tracer might hold the runqueue lock when it wakes up
2549  *     a reader, hence a deadlock (sched, function, and function graph tracers)
2550  *  2) the function tracers, trace all functions, we don't want
2551  *     the overhead of calling wake_up and friends
2552  *     (and tracing them too)
2553  *
2554  *     Anyway, this is really very primitive wakeup.
2555  */
2556 void poll_wait_pipe(struct trace_iterator *iter)
2557 {
2558         set_current_state(TASK_INTERRUPTIBLE);
2559         /* sleep for 100 msecs, and try again. */
2560         schedule_timeout(HZ / 10);
2561 }
2562
2563 /* Must be called with trace_types_lock mutex held. */
2564 static int tracing_wait_pipe(struct file *filp)
2565 {
2566         struct trace_iterator *iter = filp->private_data;
2567
2568         while (trace_empty(iter)) {
2569
2570                 if ((filp->f_flags & O_NONBLOCK)) {
2571                         return -EAGAIN;
2572                 }
2573
2574                 mutex_unlock(&iter->mutex);
2575
2576                 iter->trace->wait_pipe(iter);
2577
2578                 mutex_lock(&iter->mutex);
2579
2580                 if (signal_pending(current))
2581                         return -EINTR;
2582
2583                 /*
2584                  * We block until we read something and tracing is disabled.
2585                  * We still block if tracing is disabled, but we have never
2586                  * read anything. This allows a user to cat this file, and
2587                  * then enable tracing. But after we have read something,
2588                  * we give an EOF when tracing is again disabled.
2589                  *
2590                  * iter->pos will be 0 if we haven't read anything.
2591                  */
2592                 if (!tracer_enabled && iter->pos)
2593                         break;
2594         }
2595
2596         return 1;
2597 }
2598
2599 /*
2600  * Consumer reader.
2601  */
2602 static ssize_t
2603 tracing_read_pipe(struct file *filp, char __user *ubuf,
2604                   size_t cnt, loff_t *ppos)
2605 {
2606         struct trace_iterator *iter = filp->private_data;
2607         static struct tracer *old_tracer;
2608         ssize_t sret;
2609
2610         /* return any leftover data */
2611         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2612         if (sret != -EBUSY)
2613                 return sret;
2614
2615         trace_seq_init(&iter->seq);
2616
2617         /* copy the tracer to avoid using a global lock all around */
2618         mutex_lock(&trace_types_lock);
2619         if (unlikely(old_tracer != current_trace && current_trace)) {
2620                 old_tracer = current_trace;
2621                 *iter->trace = *current_trace;
2622         }
2623         mutex_unlock(&trace_types_lock);
2624
2625         /*
2626          * Avoid more than one consumer on a single file descriptor
2627          * This is just a matter of traces coherency, the ring buffer itself
2628          * is protected.
2629          */
2630         mutex_lock(&iter->mutex);
2631         if (iter->trace->read) {
2632                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2633                 if (sret)
2634                         goto out;
2635         }
2636
2637 waitagain:
2638         sret = tracing_wait_pipe(filp);
2639         if (sret <= 0)
2640                 goto out;
2641
2642         /* stop when tracing is finished */
2643         if (trace_empty(iter)) {
2644                 sret = 0;
2645                 goto out;
2646         }
2647
2648         if (cnt >= PAGE_SIZE)
2649                 cnt = PAGE_SIZE - 1;
2650
2651         /* reset all but tr, trace, and overruns */
2652         memset(&iter->seq, 0,
2653                sizeof(struct trace_iterator) -
2654                offsetof(struct trace_iterator, seq));
2655         iter->pos = -1;
2656
2657         while (find_next_entry_inc(iter) != NULL) {
2658                 enum print_line_t ret;
2659                 int len = iter->seq.len;
2660
2661                 ret = print_trace_line(iter);
2662                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2663                         /* don't print partial lines */
2664                         iter->seq.len = len;
2665                         break;
2666                 }
2667                 if (ret != TRACE_TYPE_NO_CONSUME)
2668                         trace_consume(iter);
2669
2670                 if (iter->seq.len >= cnt)
2671                         break;
2672         }
2673
2674         /* Now copy what we have to the user */
2675         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2676         if (iter->seq.readpos >= iter->seq.len)
2677                 trace_seq_init(&iter->seq);
2678
2679         /*
2680          * If there was nothing to send to user, inspite of consuming trace
2681          * entries, go back to wait for more entries.
2682          */
2683         if (sret == -EBUSY)
2684                 goto waitagain;
2685
2686 out:
2687         mutex_unlock(&iter->mutex);
2688
2689         return sret;
2690 }
2691
2692 static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
2693                                      struct pipe_buffer *buf)
2694 {
2695         __free_page(buf->page);
2696 }
2697
2698 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
2699                                      unsigned int idx)
2700 {
2701         __free_page(spd->pages[idx]);
2702 }
2703
2704 static struct pipe_buf_operations tracing_pipe_buf_ops = {
2705         .can_merge              = 0,
2706         .map                    = generic_pipe_buf_map,
2707         .unmap                  = generic_pipe_buf_unmap,
2708         .confirm                = generic_pipe_buf_confirm,
2709         .release                = tracing_pipe_buf_release,
2710         .steal                  = generic_pipe_buf_steal,
2711         .get                    = generic_pipe_buf_get,
2712 };
2713
2714 static size_t
2715 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
2716 {
2717         size_t count;
2718         int ret;
2719
2720         /* Seq buffer is page-sized, exactly what we need. */
2721         for (;;) {
2722                 count = iter->seq.len;
2723                 ret = print_trace_line(iter);
2724                 count = iter->seq.len - count;
2725                 if (rem < count) {
2726                         rem = 0;
2727                         iter->seq.len -= count;
2728                         break;
2729                 }
2730                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
2731                         iter->seq.len -= count;
2732                         break;
2733                 }
2734
2735                 trace_consume(iter);
2736                 rem -= count;
2737                 if (!find_next_entry_inc(iter)) {
2738                         rem = 0;
2739                         iter->ent = NULL;
2740                         break;
2741                 }
2742         }
2743
2744         return rem;
2745 }
2746
2747 static ssize_t tracing_splice_read_pipe(struct file *filp,
2748                                         loff_t *ppos,
2749                                         struct pipe_inode_info *pipe,
2750                                         size_t len,
2751                                         unsigned int flags)
2752 {
2753         struct page *pages[PIPE_BUFFERS];
2754         struct partial_page partial[PIPE_BUFFERS];
2755         struct trace_iterator *iter = filp->private_data;
2756         struct splice_pipe_desc spd = {
2757                 .pages          = pages,
2758                 .partial        = partial,
2759                 .nr_pages       = 0, /* This gets updated below. */
2760                 .flags          = flags,
2761                 .ops            = &tracing_pipe_buf_ops,
2762                 .spd_release    = tracing_spd_release_pipe,
2763         };
2764         static struct tracer *old_tracer;
2765         ssize_t ret;
2766         size_t rem;
2767         unsigned int i;
2768
2769         /* copy the tracer to avoid using a global lock all around */
2770         mutex_lock(&trace_types_lock);
2771         if (unlikely(old_tracer != current_trace && current_trace)) {
2772                 old_tracer = current_trace;
2773                 *iter->trace = *current_trace;
2774         }
2775         mutex_unlock(&trace_types_lock);
2776
2777         mutex_lock(&iter->mutex);
2778
2779         if (iter->trace->splice_read) {
2780                 ret = iter->trace->splice_read(iter, filp,
2781                                                ppos, pipe, len, flags);
2782                 if (ret)
2783                         goto out_err;
2784         }
2785
2786         ret = tracing_wait_pipe(filp);
2787         if (ret <= 0)
2788                 goto out_err;
2789
2790         if (!iter->ent && !find_next_entry_inc(iter)) {
2791                 ret = -EFAULT;
2792                 goto out_err;
2793         }
2794
2795         /* Fill as many pages as possible. */
2796         for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
2797                 pages[i] = alloc_page(GFP_KERNEL);
2798                 if (!pages[i])
2799                         break;
2800
2801                 rem = tracing_fill_pipe_page(rem, iter);
2802
2803                 /* Copy the data into the page, so we can start over. */
2804                 ret = trace_seq_to_buffer(&iter->seq,
2805                                           page_address(pages[i]),
2806                                           iter->seq.len);
2807                 if (ret < 0) {
2808                         __free_page(pages[i]);
2809                         break;
2810                 }
2811                 partial[i].offset = 0;
2812                 partial[i].len = iter->seq.len;
2813
2814                 trace_seq_init(&iter->seq);
2815         }
2816
2817         mutex_unlock(&iter->mutex);
2818
2819         spd.nr_pages = i;
2820
2821         return splice_to_pipe(pipe, &spd);
2822
2823 out_err:
2824         mutex_unlock(&iter->mutex);
2825
2826         return ret;
2827 }
2828
2829 static ssize_t
2830 tracing_entries_read(struct file *filp, char __user *ubuf,
2831                      size_t cnt, loff_t *ppos)
2832 {
2833         struct trace_array *tr = filp->private_data;
2834         char buf[64];
2835         int r;
2836
2837         r = sprintf(buf, "%lu\n", tr->entries >> 10);
2838         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2839 }
2840
2841 static ssize_t
2842 tracing_entries_write(struct file *filp, const char __user *ubuf,
2843                       size_t cnt, loff_t *ppos)
2844 {
2845         unsigned long val;
2846         char buf[64];
2847         int ret, cpu;
2848
2849         if (cnt >= sizeof(buf))
2850                 return -EINVAL;
2851
2852         if (copy_from_user(&buf, ubuf, cnt))
2853                 return -EFAULT;
2854
2855         buf[cnt] = 0;
2856
2857         ret = strict_strtoul(buf, 10, &val);
2858         if (ret < 0)
2859                 return ret;
2860
2861         /* must have at least 1 entry */
2862         if (!val)
2863                 return -EINVAL;
2864
2865         mutex_lock(&trace_types_lock);
2866
2867         tracing_stop();
2868
2869         /* disable all cpu buffers */
2870         for_each_tracing_cpu(cpu) {
2871                 if (global_trace.data[cpu])
2872                         atomic_inc(&global_trace.data[cpu]->disabled);
2873                 if (max_tr.data[cpu])
2874                         atomic_inc(&max_tr.data[cpu]->disabled);
2875         }
2876
2877         /* value is in KB */
2878         val <<= 10;
2879
2880         if (val != global_trace.entries) {
2881                 ret = ring_buffer_resize(global_trace.buffer, val);
2882                 if (ret < 0) {
2883                         cnt = ret;
2884                         goto out;
2885                 }
2886
2887                 ret = ring_buffer_resize(max_tr.buffer, val);
2888                 if (ret < 0) {
2889                         int r;
2890                         cnt = ret;
2891                         r = ring_buffer_resize(global_trace.buffer,
2892                                                global_trace.entries);
2893                         if (r < 0) {
2894                                 /* AARGH! We are left with different
2895                                  * size max buffer!!!! */
2896                                 WARN_ON(1);
2897                                 tracing_disabled = 1;
2898                         }
2899                         goto out;
2900                 }
2901
2902                 global_trace.entries = val;
2903         }
2904
2905         filp->f_pos += cnt;
2906
2907         /* If check pages failed, return ENOMEM */
2908         if (tracing_disabled)
2909                 cnt = -ENOMEM;
2910  out:
2911         for_each_tracing_cpu(cpu) {
2912                 if (global_trace.data[cpu])
2913                         atomic_dec(&global_trace.data[cpu]->disabled);
2914                 if (max_tr.data[cpu])
2915                         atomic_dec(&max_tr.data[cpu]->disabled);
2916         }
2917
2918         tracing_start();
2919         max_tr.entries = global_trace.entries;
2920         mutex_unlock(&trace_types_lock);
2921
2922         return cnt;
2923 }
2924
2925 static int mark_printk(const char *fmt, ...)
2926 {
2927         int ret;
2928         va_list args;
2929         va_start(args, fmt);
2930         ret = trace_vprintk(0, -1, fmt, args);
2931         va_end(args);
2932         return ret;
2933 }
2934
2935 static ssize_t
2936 tracing_mark_write(struct file *filp, const char __user *ubuf,
2937                                         size_t cnt, loff_t *fpos)
2938 {
2939         char *buf;
2940         char *end;
2941
2942         if (tracing_disabled)
2943                 return -EINVAL;
2944
2945         if (cnt > TRACE_BUF_SIZE)
2946                 cnt = TRACE_BUF_SIZE;
2947
2948         buf = kmalloc(cnt + 1, GFP_KERNEL);
2949         if (buf == NULL)
2950                 return -ENOMEM;
2951
2952         if (copy_from_user(buf, ubuf, cnt)) {
2953                 kfree(buf);
2954                 return -EFAULT;
2955         }
2956
2957         /* Cut from the first nil or newline. */
2958         buf[cnt] = '\0';
2959         end = strchr(buf, '\n');
2960         if (end)
2961                 *end = '\0';
2962
2963         cnt = mark_printk("%s\n", buf);
2964         kfree(buf);
2965         *fpos += cnt;
2966
2967         return cnt;
2968 }
2969
2970 static struct file_operations tracing_max_lat_fops = {
2971         .open           = tracing_open_generic,
2972         .read           = tracing_max_lat_read,
2973         .write          = tracing_max_lat_write,
2974 };
2975
2976 static struct file_operations tracing_ctrl_fops = {
2977         .open           = tracing_open_generic,
2978         .read           = tracing_ctrl_read,
2979         .write          = tracing_ctrl_write,
2980 };
2981
2982 static struct file_operations set_tracer_fops = {
2983         .open           = tracing_open_generic,
2984         .read           = tracing_set_trace_read,
2985         .write          = tracing_set_trace_write,
2986 };
2987
2988 static struct file_operations tracing_pipe_fops = {
2989         .open           = tracing_open_pipe,
2990         .poll           = tracing_poll_pipe,
2991         .read           = tracing_read_pipe,
2992         .splice_read    = tracing_splice_read_pipe,
2993         .release        = tracing_release_pipe,
2994 };
2995
2996 static struct file_operations tracing_entries_fops = {
2997         .open           = tracing_open_generic,
2998         .read           = tracing_entries_read,
2999         .write          = tracing_entries_write,
3000 };
3001
3002 static struct file_operations tracing_mark_fops = {
3003         .open           = tracing_open_generic,
3004         .write          = tracing_mark_write,
3005 };
3006
3007 struct ftrace_buffer_info {
3008         struct trace_array      *tr;
3009         void                    *spare;
3010         int                     cpu;
3011         unsigned int            read;
3012 };
3013
3014 static int tracing_buffers_open(struct inode *inode, struct file *filp)
3015 {
3016         int cpu = (int)(long)inode->i_private;
3017         struct ftrace_buffer_info *info;
3018
3019         if (tracing_disabled)
3020                 return -ENODEV;
3021
3022         info = kzalloc(sizeof(*info), GFP_KERNEL);
3023         if (!info)
3024                 return -ENOMEM;
3025
3026         info->tr        = &global_trace;
3027         info->cpu       = cpu;
3028         info->spare     = ring_buffer_alloc_read_page(info->tr->buffer);
3029         /* Force reading ring buffer for first read */
3030         info->read      = (unsigned int)-1;
3031         if (!info->spare)
3032                 goto out;
3033
3034         filp->private_data = info;
3035
3036         return 0;
3037
3038  out:
3039         kfree(info);
3040         return -ENOMEM;
3041 }
3042
3043 static ssize_t
3044 tracing_buffers_read(struct file *filp, char __user *ubuf,
3045                      size_t count, loff_t *ppos)
3046 {
3047         struct ftrace_buffer_info *info = filp->private_data;
3048         unsigned int pos;
3049         ssize_t ret;
3050         size_t size;
3051
3052         /* Do we have previous read data to read? */
3053         if (info->read < PAGE_SIZE)
3054                 goto read;
3055
3056         info->read = 0;
3057
3058         ret = ring_buffer_read_page(info->tr->buffer,
3059                                     &info->spare,
3060                                     count,
3061                                     info->cpu, 0);
3062         if (ret < 0)
3063                 return 0;
3064
3065         pos = ring_buffer_page_len(info->spare);
3066
3067         if (pos < PAGE_SIZE)
3068                 memset(info->spare + pos, 0, PAGE_SIZE - pos);
3069
3070 read:
3071         size = PAGE_SIZE - info->read;
3072         if (size > count)
3073                 size = count;
3074
3075         ret = copy_to_user(ubuf, info->spare + info->read, size);
3076         if (ret)
3077                 return -EFAULT;
3078         *ppos += size;
3079         info->read += size;
3080
3081         return size;
3082 }
3083
3084 static int tracing_buffers_release(struct inode *inode, struct file *file)
3085 {
3086         struct ftrace_buffer_info *info = file->private_data;
3087
3088         ring_buffer_free_read_page(info->tr->buffer, info->spare);
3089         kfree(info);
3090
3091         return 0;
3092 }
3093
3094 struct buffer_ref {
3095         struct ring_buffer      *buffer;
3096         void                    *page;
3097         int                     ref;
3098 };
3099
3100 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
3101                                     struct pipe_buffer *buf)
3102 {
3103         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3104
3105         if (--ref->ref)
3106                 return;
3107
3108         ring_buffer_free_read_page(ref->buffer, ref->page);
3109         kfree(ref);
3110         buf->private = 0;
3111 }
3112
3113 static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
3114                                  struct pipe_buffer *buf)
3115 {
3116         return 1;
3117 }
3118
3119 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
3120                                 struct pipe_buffer *buf)
3121 {
3122         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
3123
3124         ref->ref++;
3125 }
3126
3127 /* Pipe buffer operations for a buffer. */
3128 static struct pipe_buf_operations buffer_pipe_buf_ops = {
3129         .can_merge              = 0,
3130         .map                    = generic_pipe_buf_map,
3131         .unmap                  = generic_pipe_buf_unmap,
3132         .confirm                = generic_pipe_buf_confirm,
3133         .release                = buffer_pipe_buf_release,
3134         .steal                  = buffer_pipe_buf_steal,
3135         .get                    = buffer_pipe_buf_get,
3136 };
3137
3138 /*
3139  * Callback from splice_to_pipe(), if we need to release some pages
3140  * at the end of the spd in case we error'ed out in filling the pipe.
3141  */
3142 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
3143 {
3144         struct buffer_ref *ref =
3145                 (struct buffer_ref *)spd->partial[i].private;
3146
3147         if (--ref->ref)
3148                 return;
3149
3150         ring_buffer_free_read_page(ref->buffer, ref->page);
3151         kfree(ref);
3152         spd->partial[i].private = 0;
3153 }
3154
3155 static ssize_t
3156 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3157                             struct pipe_inode_info *pipe, size_t len,
3158                             unsigned int flags)
3159 {
3160         struct ftrace_buffer_info *info = file->private_data;
3161         struct partial_page partial[PIPE_BUFFERS];
3162         struct page *pages[PIPE_BUFFERS];
3163         struct splice_pipe_desc spd = {
3164                 .pages          = pages,
3165                 .partial        = partial,
3166                 .flags          = flags,
3167                 .ops            = &buffer_pipe_buf_ops,
3168                 .spd_release    = buffer_spd_release,
3169         };
3170         struct buffer_ref *ref;
3171         int size, i;
3172         size_t ret;
3173
3174         /*
3175          * We can't seek on a buffer input
3176          */
3177         if (unlikely(*ppos))
3178                 return -ESPIPE;
3179
3180
3181         for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
3182                 struct page *page;
3183                 int r;
3184
3185                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
3186                 if (!ref)
3187                         break;
3188
3189                 ref->buffer = info->tr->buffer;
3190                 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3191                 if (!ref->page) {
3192                         kfree(ref);
3193                         break;
3194                 }
3195
3196                 r = ring_buffer_read_page(ref->buffer, &ref->page,
3197                                           len, info->cpu, 0);
3198                 if (r < 0) {
3199                         ring_buffer_free_read_page(ref->buffer,
3200                                                    ref->page);
3201                         kfree(ref);
3202                         break;
3203                 }
3204
3205                 /*
3206                  * zero out any left over data, this is going to
3207                  * user land.
3208                  */
3209                 size = ring_buffer_page_len(ref->page);
3210                 if (size < PAGE_SIZE)
3211                         memset(ref->page + size, 0, PAGE_SIZE - size);
3212
3213                 page = virt_to_page(ref->page);
3214
3215                 spd.pages[i] = page;
3216                 spd.partial[i].len = PAGE_SIZE;
3217                 spd.partial[i].offset = 0;
3218                 spd.partial[i].private = (unsigned long)ref;
3219                 spd.nr_pages++;
3220         }
3221
3222         spd.nr_pages = i;
3223
3224         /* did we read anything? */
3225         if (!spd.nr_pages) {
3226                 if (flags & SPLICE_F_NONBLOCK)
3227                         ret = -EAGAIN;
3228                 else
3229                         ret = 0;
3230                 /* TODO: block */
3231                 return ret;
3232         }
3233
3234         ret = splice_to_pipe(pipe, &spd);
3235
3236         return ret;
3237 }
3238
3239 static const struct file_operations tracing_buffers_fops = {
3240         .open           = tracing_buffers_open,
3241         .read           = tracing_buffers_read,
3242         .release        = tracing_buffers_release,
3243         .splice_read    = tracing_buffers_splice_read,
3244         .llseek         = no_llseek,
3245 };
3246
3247 #ifdef CONFIG_DYNAMIC_FTRACE
3248
3249 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
3250 {
3251         return 0;
3252 }
3253
3254 static ssize_t
3255 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
3256                   size_t cnt, loff_t *ppos)
3257 {
3258         static char ftrace_dyn_info_buffer[1024];
3259         static DEFINE_MUTEX(dyn_info_mutex);
3260         unsigned long *p = filp->private_data;
3261         char *buf = ftrace_dyn_info_buffer;
3262         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
3263         int r;
3264
3265         mutex_lock(&dyn_info_mutex);
3266         r = sprintf(buf, "%ld ", *p);
3267
3268         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
3269         buf[r++] = '\n';
3270
3271         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3272
3273         mutex_unlock(&dyn_info_mutex);
3274
3275         return r;
3276 }
3277
3278 static struct file_operations tracing_dyn_info_fops = {
3279         .open           = tracing_open_generic,
3280         .read           = tracing_read_dyn_info,
3281 };
3282 #endif
3283
3284 static struct dentry *d_tracer;
3285
3286 struct dentry *tracing_init_dentry(void)
3287 {
3288         static int once;
3289
3290         if (d_tracer)
3291                 return d_tracer;
3292
3293         d_tracer = debugfs_create_dir("tracing", NULL);
3294
3295         if (!d_tracer && !once) {
3296                 once = 1;
3297                 pr_warning("Could not create debugfs directory 'tracing'\n");
3298                 return NULL;
3299         }
3300
3301         return d_tracer;
3302 }
3303
3304 static struct dentry *d_percpu;
3305
3306 struct dentry *tracing_dentry_percpu(void)
3307 {
3308         static int once;
3309         struct dentry *d_tracer;
3310
3311         if (d_percpu)
3312                 return d_percpu;
3313
3314         d_tracer = tracing_init_dentry();
3315
3316         if (!d_tracer)
3317                 return NULL;
3318
3319         d_percpu = debugfs_create_dir("per_cpu", d_tracer);
3320
3321         if (!d_percpu && !once) {
3322                 once = 1;
3323                 pr_warning("Could not create debugfs directory 'per_cpu'\n");
3324                 return NULL;
3325         }
3326
3327         return d_percpu;
3328 }
3329
3330 static void tracing_init_debugfs_percpu(long cpu)
3331 {
3332         struct dentry *d_percpu = tracing_dentry_percpu();
3333         struct dentry *entry, *d_cpu;
3334         /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
3335         char cpu_dir[7];
3336
3337         if (cpu > 999 || cpu < 0)
3338                 return;
3339
3340         sprintf(cpu_dir, "cpu%ld", cpu);
3341         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
3342         if (!d_cpu) {
3343                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
3344                 return;
3345         }
3346
3347         /* per cpu trace_pipe */
3348         entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
3349                                 (void *) cpu, &tracing_pipe_fops);
3350         if (!entry)
3351                 pr_warning("Could not create debugfs 'trace_pipe' entry\n");
3352
3353         /* per cpu trace */
3354         entry = debugfs_create_file("trace", 0444, d_cpu,
3355                                 (void *) cpu, &tracing_fops);
3356         if (!entry)
3357                 pr_warning("Could not create debugfs 'trace' entry\n");
3358 }
3359
3360 #ifdef CONFIG_FTRACE_SELFTEST
3361 /* Let selftest have access to static functions in this file */
3362 #include "trace_selftest.c"
3363 #endif
3364
3365 struct trace_option_dentry {
3366         struct tracer_opt               *opt;
3367         struct tracer_flags             *flags;
3368         struct dentry                   *entry;
3369 };
3370
3371 static ssize_t
3372 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
3373                         loff_t *ppos)
3374 {
3375         struct trace_option_dentry *topt = filp->private_data;
3376         char *buf;
3377
3378         if (topt->flags->val & topt->opt->bit)
3379                 buf = "1\n";
3380         else
3381                 buf = "0\n";
3382
3383         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3384 }
3385
3386 static ssize_t
3387 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
3388                          loff_t *ppos)
3389 {
3390         struct trace_option_dentry *topt = filp->private_data;
3391         unsigned long val;
3392         char buf[64];
3393         int ret;
3394
3395         if (cnt >= sizeof(buf))
3396                 return -EINVAL;
3397
3398         if (copy_from_user(&buf, ubuf, cnt))
3399                 return -EFAULT;
3400
3401         buf[cnt] = 0;
3402
3403         ret = strict_strtoul(buf, 10, &val);
3404         if (ret < 0)
3405                 return ret;
3406
3407         ret = 0;
3408         switch (val) {
3409         case 0:
3410                 /* do nothing if already cleared */
3411                 if (!(topt->flags->val & topt->opt->bit))
3412                         break;
3413
3414                 mutex_lock(&trace_types_lock);
3415                 if (current_trace->set_flag)
3416                         ret = current_trace->set_flag(topt->flags->val,
3417                                                       topt->opt->bit, 0);
3418                 mutex_unlock(&trace_types_lock);
3419                 if (ret)
3420                         return ret;
3421                 topt->flags->val &= ~topt->opt->bit;
3422                 break;
3423         case 1:
3424                 /* do nothing if already set */
3425                 if (topt->flags->val & topt->opt->bit)
3426                         break;
3427
3428                 mutex_lock(&trace_types_lock);
3429                 if (current_trace->set_flag)
3430                         ret = current_trace->set_flag(topt->flags->val,
3431                                                       topt->opt->bit, 1);
3432                 mutex_unlock(&trace_types_lock);
3433                 if (ret)
3434                         return ret;
3435                 topt->flags->val |= topt->opt->bit;
3436                 break;
3437
3438         default:
3439                 return -EINVAL;
3440         }
3441
3442         *ppos += cnt;
3443
3444         return cnt;
3445 }
3446
3447
3448 static const struct file_operations trace_options_fops = {
3449         .open = tracing_open_generic,
3450         .read = trace_options_read,
3451         .write = trace_options_write,
3452 };
3453
3454 static ssize_t
3455 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
3456                         loff_t *ppos)
3457 {
3458         long index = (long)filp->private_data;
3459         char *buf;
3460
3461         if (trace_flags & (1 << index))
3462                 buf = "1\n";
3463         else
3464                 buf = "0\n";
3465
3466         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
3467 }
3468
3469 static ssize_t
3470 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
3471                          loff_t *ppos)
3472 {
3473         long index = (long)filp->private_data;
3474         char buf[64];
3475         unsigned long val;
3476         int ret;
3477
3478         if (cnt >= sizeof(buf))
3479                 return -EINVAL;
3480
3481         if (copy_from_user(&buf, ubuf, cnt))
3482                 return -EFAULT;
3483
3484         buf[cnt] = 0;
3485
3486         ret = strict_strtoul(buf, 10, &val);
3487         if (ret < 0)
3488                 return ret;
3489
3490         switch (val) {
3491         case 0:
3492                 trace_flags &= ~(1 << index);
3493                 break;
3494         case 1:
3495                 trace_flags |= 1 << index;
3496                 break;
3497
3498         default:
3499                 return -EINVAL;
3500         }
3501
3502         *ppos += cnt;
3503
3504         return cnt;
3505 }
3506
3507 static const struct file_operations trace_options_core_fops = {
3508         .open = tracing_open_generic,
3509         .read = trace_options_core_read,
3510         .write = trace_options_core_write,
3511 };
3512
3513 static struct dentry *trace_options_init_dentry(void)
3514 {
3515         struct dentry *d_tracer;
3516         static struct dentry *t_options;
3517
3518         if (t_options)
3519                 return t_options;
3520
3521         d_tracer = tracing_init_dentry();
3522         if (!d_tracer)
3523                 return NULL;
3524
3525         t_options = debugfs_create_dir("options", d_tracer);
3526         if (!t_options) {
3527                 pr_warning("Could not create debugfs directory 'options'\n");
3528                 return NULL;
3529         }
3530
3531         return t_options;
3532 }
3533
3534 static void
3535 create_trace_option_file(struct trace_option_dentry *topt,
3536                          struct tracer_flags *flags,
3537                          struct tracer_opt *opt)
3538 {
3539         struct dentry *t_options;
3540         struct dentry *entry;
3541
3542         t_options = trace_options_init_dentry();
3543         if (!t_options)
3544                 return;
3545
3546         topt->flags = flags;
3547         topt->opt = opt;
3548
3549         entry = debugfs_create_file(opt->name, 0644, t_options, topt,
3550                                     &trace_options_fops);
3551
3552         topt->entry = entry;
3553
3554 }
3555
3556 static struct trace_option_dentry *
3557 create_trace_option_files(struct tracer *tracer)
3558 {
3559         struct trace_option_dentry *topts;
3560         struct tracer_flags *flags;
3561         struct tracer_opt *opts;
3562         int cnt;
3563
3564         if (!tracer)
3565                 return NULL;
3566
3567         flags = tracer->flags;
3568
3569         if (!flags || !flags->opts)
3570                 return NULL;
3571
3572         opts = flags->opts;
3573
3574         for (cnt = 0; opts[cnt].name; cnt++)
3575                 ;
3576
3577         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
3578         if (!topts)
3579                 return NULL;
3580
3581         for (cnt = 0; opts[cnt].name; cnt++)
3582                 create_trace_option_file(&topts[cnt], flags,
3583                                          &opts[cnt]);
3584
3585         return topts;
3586 }
3587
3588 static void
3589 destroy_trace_option_files(struct trace_option_dentry *topts)
3590 {
3591         int cnt;
3592
3593         if (!topts)
3594                 return;
3595
3596         for (cnt = 0; topts[cnt].opt; cnt++) {
3597                 if (topts[cnt].entry)
3598                         debugfs_remove(topts[cnt].entry);
3599         }
3600
3601         kfree(topts);
3602 }
3603
3604 static struct dentry *
3605 create_trace_option_core_file(const char *option, long index)
3606 {
3607         struct dentry *t_options;
3608         struct dentry *entry;
3609
3610         t_options = trace_options_init_dentry();
3611         if (!t_options)
3612                 return NULL;
3613
3614         entry = debugfs_create_file(option, 0644, t_options, (void *)index,
3615                                     &trace_options_core_fops);
3616
3617         return entry;
3618 }
3619
3620 static __init void create_trace_options_dir(void)
3621 {
3622         struct dentry *t_options;
3623         struct dentry *entry;
3624         int i;
3625
3626         t_options = trace_options_init_dentry();
3627         if (!t_options)
3628                 return;
3629
3630         for (i = 0; trace_options[i]; i++) {
3631                 entry = create_trace_option_core_file(trace_options[i], i);
3632                 if (!entry)
3633                         pr_warning("Could not create debugfs %s entry\n",
3634                                    trace_options[i]);
3635         }
3636 }
3637
3638 static __init int tracer_init_debugfs(void)
3639 {
3640         struct dentry *d_tracer;
3641         struct dentry *buffers;
3642         struct dentry *entry;
3643         int cpu;
3644
3645         d_tracer = tracing_init_dentry();
3646
3647         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
3648                                     &global_trace, &tracing_ctrl_fops);
3649         if (!entry)
3650                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
3651
3652         entry = debugfs_create_file("trace_options", 0644, d_tracer,
3653                                     NULL, &tracing_iter_fops);
3654         if (!entry)
3655                 pr_warning("Could not create debugfs 'trace_options' entry\n");
3656
3657         create_trace_options_dir();
3658
3659         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3660                                     NULL, &tracing_cpumask_fops);
3661         if (!entry)
3662                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3663
3664         entry = debugfs_create_file("trace", 0444, d_tracer,
3665                                  (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
3666         if (!entry)
3667                 pr_warning("Could not create debugfs 'trace' entry\n");
3668
3669         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
3670                                     &global_trace, &show_traces_fops);
3671         if (!entry)
3672                 pr_warning("Could not create debugfs 'available_tracers' entry\n");
3673
3674         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
3675                                     &global_trace, &set_tracer_fops);
3676         if (!entry)
3677                 pr_warning("Could not create debugfs 'current_tracer' entry\n");
3678
3679         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
3680                                     &tracing_max_latency,
3681                                     &tracing_max_lat_fops);
3682         if (!entry)
3683                 pr_warning("Could not create debugfs "
3684                            "'tracing_max_latency' entry\n");
3685
3686         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
3687                                     &tracing_thresh, &tracing_max_lat_fops);
3688         if (!entry)
3689                 pr_warning("Could not create debugfs "
3690                            "'tracing_thresh' entry\n");
3691         entry = debugfs_create_file("README", 0644, d_tracer,
3692                                     NULL, &tracing_readme_fops);
3693         if (!entry)
3694                 pr_warning("Could not create debugfs 'README' entry\n");
3695
3696         entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
3697                         (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
3698         if (!entry)
3699                 pr_warning("Could not create debugfs "
3700                            "'trace_pipe' entry\n");
3701
3702         entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
3703                                     &global_trace, &tracing_entries_fops);
3704         if (!entry)
3705                 pr_warning("Could not create debugfs "
3706                            "'buffer_size_kb' entry\n");
3707
3708         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3709                                     NULL, &tracing_mark_fops);
3710         if (!entry)
3711                 pr_warning("Could not create debugfs "
3712                            "'trace_marker' entry\n");
3713
3714         buffers = debugfs_create_dir("binary_buffers", d_tracer);
3715
3716         if (!buffers)
3717                 pr_warning("Could not create buffers directory\n");
3718         else {
3719                 int cpu;
3720                 char buf[64];
3721
3722                 for_each_tracing_cpu(cpu) {
3723                         sprintf(buf, "%d", cpu);
3724
3725                         entry = debugfs_create_file(buf, 0444, buffers,
3726                                                     (void *)(long)cpu,
3727                                                     &tracing_buffers_fops);
3728                         if (!entry)
3729                                 pr_warning("Could not create debugfs buffers "
3730                                            "'%s' entry\n", buf);
3731                 }
3732         }
3733
3734 #ifdef CONFIG_DYNAMIC_FTRACE
3735         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3736                                     &ftrace_update_tot_cnt,
3737                                     &tracing_dyn_info_fops);
3738         if (!entry)
3739                 pr_warning("Could not create debugfs "
3740                            "'dyn_ftrace_total_info' entry\n");
3741 #endif
3742 #ifdef CONFIG_SYSPROF_TRACER
3743         init_tracer_sysprof_debugfs(d_tracer);
3744 #endif
3745
3746         for_each_tracing_cpu(cpu)
3747                 tracing_init_debugfs_percpu(cpu);
3748
3749         return 0;
3750 }
3751
3752 int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
3753 {
3754         static DEFINE_SPINLOCK(trace_buf_lock);
3755         static char trace_buf[TRACE_BUF_SIZE];
3756
3757         struct ring_buffer_event *event;
3758         struct trace_array *tr = &global_trace;
3759         struct trace_array_cpu *data;
3760         int cpu, len = 0, size, pc;
3761         struct print_entry *entry;
3762         unsigned long irq_flags;
3763
3764         if (tracing_disabled || tracing_selftest_running)
3765                 return 0;
3766
3767         pc = preempt_count();
3768         preempt_disable_notrace();
3769         cpu = raw_smp_processor_id();
3770         data = tr->data[cpu];
3771
3772         if (unlikely(atomic_read(&data->disabled)))
3773                 goto out;
3774
3775         pause_graph_tracing();
3776         spin_lock_irqsave(&trace_buf_lock, irq_flags);
3777         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3778
3779         len = min(len, TRACE_BUF_SIZE-1);
3780         trace_buf[len] = 0;
3781
3782         size = sizeof(*entry) + len + 1;
3783         event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
3784         if (!event)
3785                 goto out_unlock;
3786         entry = ring_buffer_event_data(event);
3787         entry->ip                       = ip;
3788         entry->depth                    = depth;
3789
3790         memcpy(&entry->buf, trace_buf, len);
3791         entry->buf[len] = 0;
3792         ring_buffer_unlock_commit(tr->buffer, event);
3793
3794  out_unlock:
3795         spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
3796         unpause_graph_tracing();
3797  out:
3798         preempt_enable_notrace();
3799
3800         return len;
3801 }
3802 EXPORT_SYMBOL_GPL(trace_vprintk);
3803
3804 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3805 {
3806         int ret;
3807         va_list ap;
3808
3809         if (!(trace_flags & TRACE_ITER_PRINTK))
3810                 return 0;
3811
3812         va_start(ap, fmt);
3813         ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3814         va_end(ap);
3815         return ret;
3816 }
3817 EXPORT_SYMBOL_GPL(__ftrace_printk);
3818
3819 int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
3820 {
3821         if (!(trace_flags & TRACE_ITER_PRINTK))
3822                 return 0;
3823
3824         return trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
3825 }
3826 EXPORT_SYMBOL_GPL(__ftrace_vprintk);
3827
3828 static int trace_panic_handler(struct notifier_block *this,
3829                                unsigned long event, void *unused)
3830 {
3831         if (ftrace_dump_on_oops)
3832                 ftrace_dump();
3833         return NOTIFY_OK;
3834 }
3835
3836 static struct notifier_block trace_panic_notifier = {
3837         .notifier_call  = trace_panic_handler,
3838         .next           = NULL,
3839         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
3840 };
3841
3842 static int trace_die_handler(struct notifier_block *self,
3843                              unsigned long val,
3844                              void *data)
3845 {
3846         switch (val) {
3847         case DIE_OOPS:
3848                 if (ftrace_dump_on_oops)
3849                         ftrace_dump();
3850                 break;
3851         default:
3852                 break;
3853         }
3854         return NOTIFY_OK;
3855 }
3856
3857 static struct notifier_block trace_die_notifier = {
3858         .notifier_call = trace_die_handler,
3859         .priority = 200
3860 };
3861
3862 /*
3863  * printk is set to max of 1024, we really don't need it that big.
3864  * Nothing should be printing 1000 characters anyway.
3865  */
3866 #define TRACE_MAX_PRINT         1000
3867
3868 /*
3869  * Define here KERN_TRACE so that we have one place to modify
3870  * it if we decide to change what log level the ftrace dump
3871  * should be at.
3872  */
3873 #define KERN_TRACE              KERN_EMERG
3874
3875 static void
3876 trace_printk_seq(struct trace_seq *s)
3877 {
3878         /* Probably should print a warning here. */
3879         if (s->len >= 1000)
3880                 s->len = 1000;
3881
3882         /* should be zero ended, but we are paranoid. */
3883         s->buffer[s->len] = 0;
3884
3885         printk(KERN_TRACE "%s", s->buffer);
3886
3887         trace_seq_init(s);
3888 }
3889
3890 void ftrace_dump(void)
3891 {
3892         static DEFINE_SPINLOCK(ftrace_dump_lock);
3893         /* use static because iter can be a bit big for the stack */
3894         static struct trace_iterator iter;
3895         static int dump_ran;
3896         unsigned long flags;
3897         int cnt = 0, cpu;
3898
3899         /* only one dump */
3900         spin_lock_irqsave(&ftrace_dump_lock, flags);
3901         if (dump_ran)
3902                 goto out;
3903
3904         dump_ran = 1;
3905
3906         /* No turning back! */
3907         tracing_off();
3908         ftrace_kill();
3909
3910         for_each_tracing_cpu(cpu) {
3911                 atomic_inc(&global_trace.data[cpu]->disabled);
3912         }
3913
3914         /* don't look at user memory in panic mode */
3915         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
3916
3917         printk(KERN_TRACE "Dumping ftrace buffer:\n");
3918
3919         iter.tr = &global_trace;
3920         iter.trace = current_trace;
3921
3922         /*
3923          * We need to stop all tracing on all CPUS to read the
3924          * the next buffer. This is a bit expensive, but is
3925          * not done often. We fill all what we can read,
3926          * and then release the locks again.
3927          */
3928
3929         while (!trace_empty(&iter)) {
3930
3931                 if (!cnt)
3932                         printk(KERN_TRACE "---------------------------------\n");
3933
3934                 cnt++;
3935
3936                 /* reset all but tr, trace, and overruns */
3937                 memset(&iter.seq, 0,
3938                        sizeof(struct trace_iterator) -
3939                        offsetof(struct trace_iterator, seq));
3940                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3941                 iter.pos = -1;
3942
3943                 if (find_next_entry_inc(&iter) != NULL) {
3944                         print_trace_line(&iter);
3945                         trace_consume(&iter);
3946                 }
3947
3948                 trace_printk_seq(&iter.seq);
3949         }
3950
3951         if (!cnt)
3952                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
3953         else
3954                 printk(KERN_TRACE "---------------------------------\n");
3955
3956  out:
3957         spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3958 }
3959
3960 __init static int tracer_alloc_buffers(void)
3961 {
3962         struct trace_array_cpu *data;
3963         int i;
3964         int ret = -ENOMEM;
3965
3966         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
3967                 goto out;
3968
3969         if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
3970                 goto out_free_buffer_mask;
3971
3972         if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
3973                 goto out_free_tracing_cpumask;
3974
3975         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
3976         cpumask_copy(tracing_cpumask, cpu_all_mask);
3977         cpumask_clear(tracing_reader_cpumask);
3978
3979         /* TODO: make the number of buffers hot pluggable with CPUS */
3980         global_trace.buffer = ring_buffer_alloc(trace_buf_size,
3981                                                    TRACE_BUFFER_FLAGS);
3982         if (!global_trace.buffer) {
3983                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
3984                 WARN_ON(1);
3985                 goto out_free_cpumask;
3986         }
3987         global_trace.entries = ring_buffer_size(global_trace.buffer);
3988
3989
3990 #ifdef CONFIG_TRACER_MAX_TRACE
3991         max_tr.buffer = ring_buffer_alloc(trace_buf_size,
3992                                              TRACE_BUFFER_FLAGS);
3993         if (!max_tr.buffer) {
3994                 printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
3995                 WARN_ON(1);
3996                 ring_buffer_free(global_trace.buffer);
3997                 goto out_free_cpumask;
3998         }
3999         max_tr.entries = ring_buffer_size(max_tr.buffer);
4000         WARN_ON(max_tr.entries != global_trace.entries);
4001 #endif
4002
4003         /* Allocate the first page for all buffers */
4004         for_each_tracing_cpu(i) {
4005                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
4006                 max_tr.data[i] = &per_cpu(max_data, i);
4007         }
4008
4009         trace_init_cmdlines();
4010
4011         register_tracer(&nop_trace);
4012         current_trace = &nop_trace;
4013 #ifdef CONFIG_BOOT_TRACER
4014         register_tracer(&boot_tracer);
4015 #endif
4016         /* All seems OK, enable tracing */
4017         tracing_disabled = 0;
4018
4019         atomic_notifier_chain_register(&panic_notifier_list,
4020                                        &trace_panic_notifier);
4021
4022         register_die_notifier(&trace_die_notifier);
4023         ret = 0;
4024
4025 out_free_cpumask:
4026         free_cpumask_var(tracing_reader_cpumask);
4027 out_free_tracing_cpumask:
4028         free_cpumask_var(tracing_cpumask);
4029 out_free_buffer_mask:
4030         free_cpumask_var(tracing_buffer_mask);
4031 out:
4032         return ret;
4033 }
4034
4035 __init static int clear_boot_tracer(void)
4036 {
4037         /*
4038          * The default tracer at boot buffer is an init section.
4039          * This function is called in lateinit. If we did not
4040          * find the boot tracer, then clear it out, to prevent
4041          * later registration from accessing the buffer that is
4042          * about to be freed.
4043          */
4044         if (!default_bootup_tracer)
4045                 return 0;
4046
4047         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
4048                default_bootup_tracer);
4049         default_bootup_tracer = NULL;
4050
4051         return 0;
4052 }
4053
4054 early_initcall(tracer_alloc_buffers);
4055 fs_initcall(tracer_init_debugfs);
4056 late_initcall(clear_boot_tracer);