tracing/ftrace: replace none tracer by nop tracer
[linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/notifier.h>
18 #include <linux/debugfs.h>
19 #include <linux/pagemap.h>
20 #include <linux/hardirq.h>
21 #include <linux/linkage.h>
22 #include <linux/uaccess.h>
23 #include <linux/ftrace.h>
24 #include <linux/module.h>
25 #include <linux/percpu.h>
26 #include <linux/kdebug.h>
27 #include <linux/ctype.h>
28 #include <linux/init.h>
29 #include <linux/poll.h>
30 #include <linux/gfp.h>
31 #include <linux/fs.h>
32 #include <linux/kprobes.h>
33 #include <linux/writeback.h>
34
35 #include <linux/stacktrace.h>
36
37 #include "trace.h"
38
39 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
40 unsigned long __read_mostly     tracing_thresh;
41
42 static unsigned long __read_mostly      tracing_nr_buffers;
43 static cpumask_t __read_mostly          tracing_buffer_mask;
44
45 #define for_each_tracing_cpu(cpu)       \
46         for_each_cpu_mask(cpu, tracing_buffer_mask)
47
48 static int trace_alloc_page(void);
49 static int trace_free_page(void);
50
51 static int tracing_disabled = 1;
52
53 static unsigned long tracing_pages_allocated;
54
55 long
56 ns2usecs(cycle_t nsec)
57 {
58         nsec += 500;
59         do_div(nsec, 1000);
60         return nsec;
61 }
62
63 cycle_t ftrace_now(int cpu)
64 {
65         return cpu_clock(cpu);
66 }
67
68 /*
69  * The global_trace is the descriptor that holds the tracing
70  * buffers for the live tracing. For each CPU, it contains
71  * a link list of pages that will store trace entries. The
72  * page descriptor of the pages in the memory is used to hold
73  * the link list by linking the lru item in the page descriptor
74  * to each of the pages in the buffer per CPU.
75  *
76  * For each active CPU there is a data field that holds the
77  * pages for the buffer for that CPU. Each CPU has the same number
78  * of pages allocated for its buffer.
79  */
80 static struct trace_array       global_trace;
81
82 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
83
84 /*
85  * The max_tr is used to snapshot the global_trace when a maximum
86  * latency is reached. Some tracers will use this to store a maximum
87  * trace while it continues examining live traces.
88  *
89  * The buffers for the max_tr are set up the same as the global_trace.
90  * When a snapshot is taken, the link list of the max_tr is swapped
91  * with the link list of the global_trace and the buffers are reset for
92  * the global_trace so the tracing can continue.
93  */
94 static struct trace_array       max_tr;
95
96 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
97
98 /* tracer_enabled is used to toggle activation of a tracer */
99 static int                      tracer_enabled = 1;
100
101 /* function tracing enabled */
102 int                             ftrace_function_enabled;
103
104 /*
105  * trace_nr_entries is the number of entries that is allocated
106  * for a buffer. Note, the number of entries is always rounded
107  * to ENTRIES_PER_PAGE.
108  *
109  * This number is purposely set to a low number of 16384.
110  * If the dump on oops happens, it will be much appreciated
111  * to not have to wait for all that output. Anyway this can be
112  * boot time and run time configurable.
113  */
114 #define TRACE_ENTRIES_DEFAULT   16384UL
115
116 static unsigned long            trace_nr_entries = TRACE_ENTRIES_DEFAULT;
117
118 /* trace_types holds a link list of available tracers. */
119 static struct tracer            *trace_types __read_mostly;
120
121 /* current_trace points to the tracer that is currently active */
122 static struct tracer            *current_trace __read_mostly;
123
124 /*
125  * max_tracer_type_len is used to simplify the allocating of
126  * buffers to read userspace tracer names. We keep track of
127  * the longest tracer name registered.
128  */
129 static int                      max_tracer_type_len;
130
131 /*
132  * trace_types_lock is used to protect the trace_types list.
133  * This lock is also used to keep user access serialized.
134  * Accesses from userspace will grab this lock while userspace
135  * activities happen inside the kernel.
136  */
137 static DEFINE_MUTEX(trace_types_lock);
138
139 /* trace_wait is a waitqueue for tasks blocked on trace_poll */
140 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
141
142 /* trace_flags holds iter_ctrl options */
143 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
144
145 /**
146  * trace_wake_up - wake up tasks waiting for trace input
147  *
148  * Simply wakes up any task that is blocked on the trace_wait
149  * queue. These is used with trace_poll for tasks polling the trace.
150  */
151 void trace_wake_up(void)
152 {
153         /*
154          * The runqueue_is_locked() can fail, but this is the best we
155          * have for now:
156          */
157         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
158                 wake_up(&trace_wait);
159 }
160
161 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
162
163 static int __init set_nr_entries(char *str)
164 {
165         unsigned long nr_entries;
166         int ret;
167
168         if (!str)
169                 return 0;
170         ret = strict_strtoul(str, 0, &nr_entries);
171         /* nr_entries can not be zero */
172         if (ret < 0 || nr_entries == 0)
173                 return 0;
174         trace_nr_entries = nr_entries;
175         return 1;
176 }
177 __setup("trace_entries=", set_nr_entries);
178
179 unsigned long nsecs_to_usecs(unsigned long nsecs)
180 {
181         return nsecs / 1000;
182 }
183
184 /*
185  * TRACE_ITER_SYM_MASK masks the options in trace_flags that
186  * control the output of kernel symbols.
187  */
188 #define TRACE_ITER_SYM_MASK \
189         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
190
191 /* These must match the bit postions in trace_iterator_flags */
192 static const char *trace_options[] = {
193         "print-parent",
194         "sym-offset",
195         "sym-addr",
196         "verbose",
197         "raw",
198         "hex",
199         "bin",
200         "block",
201         "stacktrace",
202         "sched-tree",
203         "ftrace_printk",
204         NULL
205 };
206
207 /*
208  * ftrace_max_lock is used to protect the swapping of buffers
209  * when taking a max snapshot. The buffers themselves are
210  * protected by per_cpu spinlocks. But the action of the swap
211  * needs its own lock.
212  *
213  * This is defined as a raw_spinlock_t in order to help
214  * with performance when lockdep debugging is enabled.
215  */
216 static raw_spinlock_t ftrace_max_lock =
217         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
218
219 /*
220  * Copy the new maximum trace into the separate maximum-trace
221  * structure. (this way the maximum trace is permanently saved,
222  * for later retrieval via /debugfs/tracing/latency_trace)
223  */
224 static void
225 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
226 {
227         struct trace_array_cpu *data = tr->data[cpu];
228
229         max_tr.cpu = cpu;
230         max_tr.time_start = data->preempt_timestamp;
231
232         data = max_tr.data[cpu];
233         data->saved_latency = tracing_max_latency;
234
235         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
236         data->pid = tsk->pid;
237         data->uid = tsk->uid;
238         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
239         data->policy = tsk->policy;
240         data->rt_priority = tsk->rt_priority;
241
242         /* record this tasks comm */
243         tracing_record_cmdline(current);
244 }
245
246 #define CHECK_COND(cond)                        \
247         if (unlikely(cond)) {                   \
248                 tracing_disabled = 1;           \
249                 WARN_ON(1);                     \
250                 return -1;                      \
251         }
252
253 /**
254  * check_pages - integrity check of trace buffers
255  *
256  * As a safty measure we check to make sure the data pages have not
257  * been corrupted.
258  */
259 int check_pages(struct trace_array_cpu *data)
260 {
261         struct page *page, *tmp;
262
263         CHECK_COND(data->trace_pages.next->prev != &data->trace_pages);
264         CHECK_COND(data->trace_pages.prev->next != &data->trace_pages);
265
266         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
267                 CHECK_COND(page->lru.next->prev != &page->lru);
268                 CHECK_COND(page->lru.prev->next != &page->lru);
269         }
270
271         return 0;
272 }
273
274 /**
275  * head_page - page address of the first page in per_cpu buffer.
276  *
277  * head_page returns the page address of the first page in
278  * a per_cpu buffer. This also preforms various consistency
279  * checks to make sure the buffer has not been corrupted.
280  */
281 void *head_page(struct trace_array_cpu *data)
282 {
283         struct page *page;
284
285         if (list_empty(&data->trace_pages))
286                 return NULL;
287
288         page = list_entry(data->trace_pages.next, struct page, lru);
289         BUG_ON(&page->lru == &data->trace_pages);
290
291         return page_address(page);
292 }
293
294 /**
295  * trace_seq_printf - sequence printing of trace information
296  * @s: trace sequence descriptor
297  * @fmt: printf format string
298  *
299  * The tracer may use either sequence operations or its own
300  * copy to user routines. To simplify formating of a trace
301  * trace_seq_printf is used to store strings into a special
302  * buffer (@s). Then the output may be either used by
303  * the sequencer or pulled into another buffer.
304  */
305 int
306 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
307 {
308         int len = (PAGE_SIZE - 1) - s->len;
309         va_list ap;
310         int ret;
311
312         if (!len)
313                 return 0;
314
315         va_start(ap, fmt);
316         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
317         va_end(ap);
318
319         /* If we can't write it all, don't bother writing anything */
320         if (ret >= len)
321                 return 0;
322
323         s->len += ret;
324
325         return len;
326 }
327
328 /**
329  * trace_seq_puts - trace sequence printing of simple string
330  * @s: trace sequence descriptor
331  * @str: simple string to record
332  *
333  * The tracer may use either the sequence operations or its own
334  * copy to user routines. This function records a simple string
335  * into a special buffer (@s) for later retrieval by a sequencer
336  * or other mechanism.
337  */
338 static int
339 trace_seq_puts(struct trace_seq *s, const char *str)
340 {
341         int len = strlen(str);
342
343         if (len > ((PAGE_SIZE - 1) - s->len))
344                 return 0;
345
346         memcpy(s->buffer + s->len, str, len);
347         s->len += len;
348
349         return len;
350 }
351
352 static int
353 trace_seq_putc(struct trace_seq *s, unsigned char c)
354 {
355         if (s->len >= (PAGE_SIZE - 1))
356                 return 0;
357
358         s->buffer[s->len++] = c;
359
360         return 1;
361 }
362
363 static int
364 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
365 {
366         if (len > ((PAGE_SIZE - 1) - s->len))
367                 return 0;
368
369         memcpy(s->buffer + s->len, mem, len);
370         s->len += len;
371
372         return len;
373 }
374
375 #define HEX_CHARS 17
376 static const char hex2asc[] = "0123456789abcdef";
377
378 static int
379 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
380 {
381         unsigned char hex[HEX_CHARS];
382         unsigned char *data = mem;
383         unsigned char byte;
384         int i, j;
385
386         BUG_ON(len >= HEX_CHARS);
387
388 #ifdef __BIG_ENDIAN
389         for (i = 0, j = 0; i < len; i++) {
390 #else
391         for (i = len-1, j = 0; i >= 0; i--) {
392 #endif
393                 byte = data[i];
394
395                 hex[j++] = hex2asc[byte & 0x0f];
396                 hex[j++] = hex2asc[byte >> 4];
397         }
398         hex[j++] = ' ';
399
400         return trace_seq_putmem(s, hex, j);
401 }
402
403 static void
404 trace_seq_reset(struct trace_seq *s)
405 {
406         s->len = 0;
407         s->readpos = 0;
408 }
409
410 ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
411 {
412         int len;
413         int ret;
414
415         if (s->len <= s->readpos)
416                 return -EBUSY;
417
418         len = s->len - s->readpos;
419         if (cnt > len)
420                 cnt = len;
421         ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
422         if (ret)
423                 return -EFAULT;
424
425         s->readpos += len;
426         return cnt;
427 }
428
429 static void
430 trace_print_seq(struct seq_file *m, struct trace_seq *s)
431 {
432         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
433
434         s->buffer[len] = 0;
435         seq_puts(m, s->buffer);
436
437         trace_seq_reset(s);
438 }
439
440 /*
441  * flip the trace buffers between two trace descriptors.
442  * This usually is the buffers between the global_trace and
443  * the max_tr to record a snapshot of a current trace.
444  *
445  * The ftrace_max_lock must be held.
446  */
447 static void
448 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
449 {
450         struct list_head flip_pages;
451
452         INIT_LIST_HEAD(&flip_pages);
453
454         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
455                 sizeof(struct trace_array_cpu) -
456                 offsetof(struct trace_array_cpu, trace_head_idx));
457
458         check_pages(tr1);
459         check_pages(tr2);
460         list_splice_init(&tr1->trace_pages, &flip_pages);
461         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
462         list_splice_init(&flip_pages, &tr2->trace_pages);
463         BUG_ON(!list_empty(&flip_pages));
464         check_pages(tr1);
465         check_pages(tr2);
466 }
467
468 /**
469  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
470  * @tr: tracer
471  * @tsk: the task with the latency
472  * @cpu: The cpu that initiated the trace.
473  *
474  * Flip the buffers between the @tr and the max_tr and record information
475  * about which task was the cause of this latency.
476  */
477 void
478 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
479 {
480         struct trace_array_cpu *data;
481         int i;
482
483         WARN_ON_ONCE(!irqs_disabled());
484         __raw_spin_lock(&ftrace_max_lock);
485         /* clear out all the previous traces */
486         for_each_tracing_cpu(i) {
487                 data = tr->data[i];
488                 flip_trace(max_tr.data[i], data);
489                 tracing_reset(data);
490         }
491
492         __update_max_tr(tr, tsk, cpu);
493         __raw_spin_unlock(&ftrace_max_lock);
494 }
495
496 /**
497  * update_max_tr_single - only copy one trace over, and reset the rest
498  * @tr - tracer
499  * @tsk - task with the latency
500  * @cpu - the cpu of the buffer to copy.
501  *
502  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
503  */
504 void
505 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
506 {
507         struct trace_array_cpu *data = tr->data[cpu];
508         int i;
509
510         WARN_ON_ONCE(!irqs_disabled());
511         __raw_spin_lock(&ftrace_max_lock);
512         for_each_tracing_cpu(i)
513                 tracing_reset(max_tr.data[i]);
514
515         flip_trace(max_tr.data[cpu], data);
516         tracing_reset(data);
517
518         __update_max_tr(tr, tsk, cpu);
519         __raw_spin_unlock(&ftrace_max_lock);
520 }
521
522 /**
523  * register_tracer - register a tracer with the ftrace system.
524  * @type - the plugin for the tracer
525  *
526  * Register a new plugin tracer.
527  */
528 int register_tracer(struct tracer *type)
529 {
530         struct tracer *t;
531         int len;
532         int ret = 0;
533
534         if (!type->name) {
535                 pr_info("Tracer must have a name\n");
536                 return -1;
537         }
538
539         mutex_lock(&trace_types_lock);
540         for (t = trace_types; t; t = t->next) {
541                 if (strcmp(type->name, t->name) == 0) {
542                         /* already found */
543                         pr_info("Trace %s already registered\n",
544                                 type->name);
545                         ret = -1;
546                         goto out;
547                 }
548         }
549
550 #ifdef CONFIG_FTRACE_STARTUP_TEST
551         if (type->selftest) {
552                 struct tracer *saved_tracer = current_trace;
553                 struct trace_array_cpu *data;
554                 struct trace_array *tr = &global_trace;
555                 int saved_ctrl = tr->ctrl;
556                 int i;
557                 /*
558                  * Run a selftest on this tracer.
559                  * Here we reset the trace buffer, and set the current
560                  * tracer to be this tracer. The tracer can then run some
561                  * internal tracing to verify that everything is in order.
562                  * If we fail, we do not register this tracer.
563                  */
564                 for_each_tracing_cpu(i) {
565                         data = tr->data[i];
566                         if (!head_page(data))
567                                 continue;
568                         tracing_reset(data);
569                 }
570                 current_trace = type;
571                 tr->ctrl = 0;
572                 /* the test is responsible for initializing and enabling */
573                 pr_info("Testing tracer %s: ", type->name);
574                 ret = type->selftest(type, tr);
575                 /* the test is responsible for resetting too */
576                 current_trace = saved_tracer;
577                 tr->ctrl = saved_ctrl;
578                 if (ret) {
579                         printk(KERN_CONT "FAILED!\n");
580                         goto out;
581                 }
582                 /* Only reset on passing, to avoid touching corrupted buffers */
583                 for_each_tracing_cpu(i) {
584                         data = tr->data[i];
585                         if (!head_page(data))
586                                 continue;
587                         tracing_reset(data);
588                 }
589                 printk(KERN_CONT "PASSED\n");
590         }
591 #endif
592
593         type->next = trace_types;
594         trace_types = type;
595         len = strlen(type->name);
596         if (len > max_tracer_type_len)
597                 max_tracer_type_len = len;
598
599  out:
600         mutex_unlock(&trace_types_lock);
601
602         return ret;
603 }
604
605 void unregister_tracer(struct tracer *type)
606 {
607         struct tracer **t;
608         int len;
609
610         mutex_lock(&trace_types_lock);
611         for (t = &trace_types; *t; t = &(*t)->next) {
612                 if (*t == type)
613                         goto found;
614         }
615         pr_info("Trace %s not registered\n", type->name);
616         goto out;
617
618  found:
619         *t = (*t)->next;
620         if (strlen(type->name) != max_tracer_type_len)
621                 goto out;
622
623         max_tracer_type_len = 0;
624         for (t = &trace_types; *t; t = &(*t)->next) {
625                 len = strlen((*t)->name);
626                 if (len > max_tracer_type_len)
627                         max_tracer_type_len = len;
628         }
629  out:
630         mutex_unlock(&trace_types_lock);
631 }
632
633 void tracing_reset(struct trace_array_cpu *data)
634 {
635         data->trace_idx = 0;
636         data->overrun = 0;
637         data->trace_head = data->trace_tail = head_page(data);
638         data->trace_head_idx = 0;
639         data->trace_tail_idx = 0;
640 }
641
642 #define SAVED_CMDLINES 128
643 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
644 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
645 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
646 static int cmdline_idx;
647 static DEFINE_SPINLOCK(trace_cmdline_lock);
648
649 /* temporary disable recording */
650 atomic_t trace_record_cmdline_disabled __read_mostly;
651
652 static void trace_init_cmdlines(void)
653 {
654         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
655         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
656         cmdline_idx = 0;
657 }
658
659 void trace_stop_cmdline_recording(void);
660
661 static void trace_save_cmdline(struct task_struct *tsk)
662 {
663         unsigned map;
664         unsigned idx;
665
666         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
667                 return;
668
669         /*
670          * It's not the end of the world if we don't get
671          * the lock, but we also don't want to spin
672          * nor do we want to disable interrupts,
673          * so if we miss here, then better luck next time.
674          */
675         if (!spin_trylock(&trace_cmdline_lock))
676                 return;
677
678         idx = map_pid_to_cmdline[tsk->pid];
679         if (idx >= SAVED_CMDLINES) {
680                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
681
682                 map = map_cmdline_to_pid[idx];
683                 if (map <= PID_MAX_DEFAULT)
684                         map_pid_to_cmdline[map] = (unsigned)-1;
685
686                 map_pid_to_cmdline[tsk->pid] = idx;
687
688                 cmdline_idx = idx;
689         }
690
691         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
692
693         spin_unlock(&trace_cmdline_lock);
694 }
695
696 static char *trace_find_cmdline(int pid)
697 {
698         char *cmdline = "<...>";
699         unsigned map;
700
701         if (!pid)
702                 return "<idle>";
703
704         if (pid > PID_MAX_DEFAULT)
705                 goto out;
706
707         map = map_pid_to_cmdline[pid];
708         if (map >= SAVED_CMDLINES)
709                 goto out;
710
711         cmdline = saved_cmdlines[map];
712
713  out:
714         return cmdline;
715 }
716
717 void tracing_record_cmdline(struct task_struct *tsk)
718 {
719         if (atomic_read(&trace_record_cmdline_disabled))
720                 return;
721
722         trace_save_cmdline(tsk);
723 }
724
725 static inline struct list_head *
726 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
727 {
728         /*
729          * Roundrobin - but skip the head (which is not a real page):
730          */
731         next = next->next;
732         if (unlikely(next == &data->trace_pages))
733                 next = next->next;
734         BUG_ON(next == &data->trace_pages);
735
736         return next;
737 }
738
739 static inline void *
740 trace_next_page(struct trace_array_cpu *data, void *addr)
741 {
742         struct list_head *next;
743         struct page *page;
744
745         page = virt_to_page(addr);
746
747         next = trace_next_list(data, &page->lru);
748         page = list_entry(next, struct page, lru);
749
750         return page_address(page);
751 }
752
753 struct trace_entry *
754 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
755 {
756         unsigned long idx, idx_next;
757         struct trace_entry *entry;
758
759         data->trace_idx++;
760         idx = data->trace_head_idx;
761         idx_next = idx + 1;
762
763         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
764
765         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
766
767         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
768                 data->trace_head = trace_next_page(data, data->trace_head);
769                 idx_next = 0;
770         }
771
772         if (data->trace_head == data->trace_tail &&
773             idx_next == data->trace_tail_idx) {
774                 /* overrun */
775                 data->overrun++;
776                 data->trace_tail_idx++;
777                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
778                         data->trace_tail =
779                                 trace_next_page(data, data->trace_tail);
780                         data->trace_tail_idx = 0;
781                 }
782         }
783
784         data->trace_head_idx = idx_next;
785
786         return entry;
787 }
788
789 void
790 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
791 {
792         struct task_struct *tsk = current;
793         unsigned long pc;
794
795         pc = preempt_count();
796
797         entry->field.preempt_count      = pc & 0xff;
798         entry->field.pid                = (tsk) ? tsk->pid : 0;
799         entry->field.t                  = ftrace_now(raw_smp_processor_id());
800         entry->field.flags =
801                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
802                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
803                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
804                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
805 }
806
807 void
808 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
809                unsigned long ip, unsigned long parent_ip, unsigned long flags)
810 {
811         struct trace_entry *entry;
812         unsigned long irq_flags;
813
814         raw_local_irq_save(irq_flags);
815         __raw_spin_lock(&data->lock);
816         entry                           = tracing_get_trace_entry(tr, data);
817         tracing_generic_entry_update(entry, flags);
818         entry->type                     = TRACE_FN;
819         entry->field.fn.ip              = ip;
820         entry->field.fn.parent_ip       = parent_ip;
821         __raw_spin_unlock(&data->lock);
822         raw_local_irq_restore(irq_flags);
823 }
824
825 void
826 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
827        unsigned long ip, unsigned long parent_ip, unsigned long flags)
828 {
829         if (likely(!atomic_read(&data->disabled)))
830                 trace_function(tr, data, ip, parent_ip, flags);
831 }
832
833 void __trace_stack(struct trace_array *tr,
834                    struct trace_array_cpu *data,
835                    unsigned long flags,
836                    int skip)
837 {
838         struct trace_entry *entry;
839         struct stack_trace trace;
840
841         if (!(trace_flags & TRACE_ITER_STACKTRACE))
842                 return;
843
844         entry                   = tracing_get_trace_entry(tr, data);
845         tracing_generic_entry_update(entry, flags);
846         entry->type             = TRACE_STACK;
847
848         memset(&entry->field.stack, 0, sizeof(entry->field.stack));
849
850         trace.nr_entries        = 0;
851         trace.max_entries       = FTRACE_STACK_ENTRIES;
852         trace.skip              = skip;
853         trace.entries           = entry->field.stack.caller;
854
855         save_stack_trace(&trace);
856 }
857
858 void
859 __trace_special(void *__tr, void *__data,
860                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
861 {
862         struct trace_array_cpu *data = __data;
863         struct trace_array *tr = __tr;
864         struct trace_entry *entry;
865         unsigned long irq_flags;
866
867         raw_local_irq_save(irq_flags);
868         __raw_spin_lock(&data->lock);
869         entry                           = tracing_get_trace_entry(tr, data);
870         tracing_generic_entry_update(entry, 0);
871         entry->type                     = TRACE_SPECIAL;
872         entry->field.special.arg1       = arg1;
873         entry->field.special.arg2       = arg2;
874         entry->field.special.arg3       = arg3;
875         __trace_stack(tr, data, irq_flags, 4);
876         __raw_spin_unlock(&data->lock);
877         raw_local_irq_restore(irq_flags);
878
879         trace_wake_up();
880 }
881
882 void
883 tracing_sched_switch_trace(struct trace_array *tr,
884                            struct trace_array_cpu *data,
885                            struct task_struct *prev,
886                            struct task_struct *next,
887                            unsigned long flags)
888 {
889         struct trace_entry *entry;
890         unsigned long irq_flags;
891
892         raw_local_irq_save(irq_flags);
893         __raw_spin_lock(&data->lock);
894         entry                           = tracing_get_trace_entry(tr, data);
895         tracing_generic_entry_update(entry, flags);
896         entry->type                     = TRACE_CTX;
897         entry->field.ctx.prev_pid       = prev->pid;
898         entry->field.ctx.prev_prio      = prev->prio;
899         entry->field.ctx.prev_state     = prev->state;
900         entry->field.ctx.next_pid       = next->pid;
901         entry->field.ctx.next_prio      = next->prio;
902         entry->field.ctx.next_state     = next->state;
903         entry->field.ctx.next_cpu       = task_cpu(next);
904         __trace_stack(tr, data, flags, 5);
905         __raw_spin_unlock(&data->lock);
906         raw_local_irq_restore(irq_flags);
907 }
908
909 void
910 tracing_sched_wakeup_trace(struct trace_array *tr,
911                            struct trace_array_cpu *data,
912                            struct task_struct *wakee,
913                            struct task_struct *curr,
914                            unsigned long flags)
915 {
916         struct trace_entry *entry;
917         unsigned long irq_flags;
918
919         raw_local_irq_save(irq_flags);
920         __raw_spin_lock(&data->lock);
921         entry                   = tracing_get_trace_entry(tr, data);
922         tracing_generic_entry_update(entry, flags);
923         entry->type             = TRACE_WAKE;
924         entry->field.ctx.prev_pid       = curr->pid;
925         entry->field.ctx.prev_prio      = curr->prio;
926         entry->field.ctx.prev_state     = curr->state;
927         entry->field.ctx.next_pid       = wakee->pid;
928         entry->field.ctx.next_prio      = wakee->prio;
929         entry->field.ctx.next_state     = wakee->state;
930         entry->field.ctx.next_cpu       = task_cpu(wakee);
931         __trace_stack(tr, data, flags, 6);
932         __raw_spin_unlock(&data->lock);
933         raw_local_irq_restore(irq_flags);
934
935         trace_wake_up();
936 }
937
938 void
939 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
940 {
941         struct trace_array *tr = &global_trace;
942         struct trace_array_cpu *data;
943         unsigned long flags;
944         long disabled;
945         int cpu;
946
947         if (tracing_disabled || !tr->ctrl)
948                 return;
949
950         local_irq_save(flags);
951         cpu = raw_smp_processor_id();
952         data = tr->data[cpu];
953         disabled = atomic_inc_return(&data->disabled);
954
955         if (likely(disabled == 1))
956                 __trace_special(tr, data, arg1, arg2, arg3);
957
958         atomic_dec(&data->disabled);
959         local_irq_restore(flags);
960 }
961
962 #ifdef CONFIG_FTRACE
963 static void
964 function_trace_call(unsigned long ip, unsigned long parent_ip)
965 {
966         struct trace_array *tr = &global_trace;
967         struct trace_array_cpu *data;
968         unsigned long flags;
969         long disabled;
970         int cpu;
971
972         if (unlikely(!ftrace_function_enabled))
973                 return;
974
975         if (skip_trace(ip))
976                 return;
977
978         local_irq_save(flags);
979         cpu = raw_smp_processor_id();
980         data = tr->data[cpu];
981         disabled = atomic_inc_return(&data->disabled);
982
983         if (likely(disabled == 1))
984                 trace_function(tr, data, ip, parent_ip, flags);
985
986         atomic_dec(&data->disabled);
987         local_irq_restore(flags);
988 }
989
990 static struct ftrace_ops trace_ops __read_mostly =
991 {
992         .func = function_trace_call,
993 };
994
995 void tracing_start_function_trace(void)
996 {
997         ftrace_function_enabled = 0;
998         register_ftrace_function(&trace_ops);
999         if (tracer_enabled)
1000                 ftrace_function_enabled = 1;
1001 }
1002
1003 void tracing_stop_function_trace(void)
1004 {
1005         ftrace_function_enabled = 0;
1006         unregister_ftrace_function(&trace_ops);
1007 }
1008 #endif
1009
1010 enum trace_file_type {
1011         TRACE_FILE_LAT_FMT      = 1,
1012 };
1013
1014 /* Return the current entry.  */
1015 static struct trace_entry *
1016 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
1017                 struct trace_iterator *iter, int cpu)
1018 {
1019         struct page *page;
1020         struct trace_entry *array;
1021
1022         if (iter->next_idx[cpu] >= tr->entries ||
1023             iter->next_idx[cpu] >= data->trace_idx ||
1024             (data->trace_head == data->trace_tail &&
1025              data->trace_head_idx == data->trace_tail_idx))
1026                 return NULL;
1027
1028         if (!iter->next_page[cpu]) {
1029                 /* Initialize the iterator for this cpu trace buffer */
1030                 WARN_ON(!data->trace_tail);
1031                 page = virt_to_page(data->trace_tail);
1032                 iter->next_page[cpu] = &page->lru;
1033                 iter->next_page_idx[cpu] = data->trace_tail_idx;
1034         }
1035
1036         page = list_entry(iter->next_page[cpu], struct page, lru);
1037         BUG_ON(&data->trace_pages == &page->lru);
1038
1039         array = page_address(page);
1040
1041         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
1042         return &array[iter->next_page_idx[cpu]];
1043 }
1044
1045 /* Increment the index counter of an iterator by one */
1046 static void __trace_iterator_increment(struct trace_iterator *iter, int cpu)
1047 {
1048         iter->next_idx[cpu]++;
1049         iter->next_page_idx[cpu]++;
1050
1051         if (iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE) {
1052                 struct trace_array_cpu *data = iter->tr->data[cpu];
1053
1054                 iter->next_page_idx[cpu] = 0;
1055                 iter->next_page[cpu] =
1056                         trace_next_list(data, iter->next_page[cpu]);
1057         }
1058 }
1059
1060 static void trace_iterator_increment(struct trace_iterator *iter, int cpu)
1061 {
1062         iter->idx++;
1063         __trace_iterator_increment(iter, cpu);
1064 }
1065
1066 static struct trace_entry *
1067 trace_entry_next(struct trace_array *tr, struct trace_array_cpu *data,
1068                  struct trace_iterator *iter, int cpu)
1069 {
1070         struct list_head *next_page;
1071         struct trace_entry *ent;
1072         int idx, next_idx, next_page_idx;
1073
1074         ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1075
1076         if (likely(!ent || ent->type != TRACE_CONT))
1077                 return ent;
1078
1079         /* save the iterator details */
1080         idx             = iter->idx;
1081         next_idx        = iter->next_idx[cpu];
1082         next_page_idx   = iter->next_page_idx[cpu];
1083         next_page       = iter->next_page[cpu];
1084
1085         /* find a real entry */
1086         do {
1087                 __trace_iterator_increment(iter, cpu);
1088                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1089         } while (ent && ent->type != TRACE_CONT);
1090
1091         /* reset the iterator */
1092         iter->idx                       = idx;
1093         iter->next_idx[cpu]             = next_idx;
1094         iter->next_page_idx[cpu]        = next_page_idx;
1095         iter->next_page[cpu]            = next_page;
1096
1097         return ent;
1098 }
1099
1100 static struct trace_entry *
1101 __find_next_entry(struct trace_iterator *iter, int *ent_cpu, int inc)
1102 {
1103         struct trace_array *tr = iter->tr;
1104         struct trace_entry *ent, *next = NULL;
1105         int next_cpu = -1;
1106         int cpu;
1107
1108         for_each_tracing_cpu(cpu) {
1109                 if (!head_page(tr->data[cpu]))
1110                         continue;
1111
1112                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
1113
1114                 if (ent && ent->type == TRACE_CONT) {
1115                         struct trace_array_cpu *data = tr->data[cpu];
1116
1117                         if (!inc)
1118                                 ent = trace_entry_next(tr, data, iter, cpu);
1119                         else {
1120                                 while (ent && ent->type == TRACE_CONT) {
1121                                         __trace_iterator_increment(iter, cpu);
1122                                         ent = trace_entry_idx(tr, tr->data[cpu],
1123                                                               iter, cpu);
1124                                 }
1125                         }
1126                 }
1127
1128                 /*
1129                  * Pick the entry with the smallest timestamp:
1130                  */
1131                 if (ent && (!next || ent->field.t < next->field.t)) {
1132                         next = ent;
1133                         next_cpu = cpu;
1134                 }
1135         }
1136
1137         if (ent_cpu)
1138                 *ent_cpu = next_cpu;
1139
1140         return next;
1141 }
1142
1143 /* Find the next real entry, without updating the iterator itself */
1144 static struct trace_entry *
1145 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
1146 {
1147         return __find_next_entry(iter, ent_cpu, 0);
1148 }
1149
1150 /* Find the next real entry, and increment the iterator to the next entry */
1151 static void *find_next_entry_inc(struct trace_iterator *iter)
1152 {
1153         struct trace_entry *next;
1154         int next_cpu = -1;
1155
1156         next = __find_next_entry(iter, &next_cpu, 1);
1157
1158         iter->prev_ent = iter->ent;
1159         iter->prev_cpu = iter->cpu;
1160
1161         iter->ent = next;
1162         iter->cpu = next_cpu;
1163
1164         if (next)
1165                 trace_iterator_increment(iter, iter->cpu);
1166
1167         return next ? iter : NULL;
1168 }
1169
1170 static void trace_consume(struct trace_iterator *iter)
1171 {
1172         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
1173         struct trace_entry *ent;
1174
1175  again:
1176         data->trace_tail_idx++;
1177         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
1178                 data->trace_tail = trace_next_page(data, data->trace_tail);
1179                 data->trace_tail_idx = 0;
1180         }
1181
1182         /* Check if we empty it, then reset the index */
1183         if (data->trace_head == data->trace_tail &&
1184             data->trace_head_idx == data->trace_tail_idx)
1185                 data->trace_idx = 0;
1186
1187         ent = trace_entry_idx(iter->tr, iter->tr->data[iter->cpu],
1188                               iter, iter->cpu);
1189         if (ent && ent->type == TRACE_CONT)
1190                 goto again;
1191 }
1192
1193 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
1194 {
1195         struct trace_iterator *iter = m->private;
1196         int i = (int)*pos;
1197         void *ent;
1198
1199         (*pos)++;
1200
1201         /* can't go backwards */
1202         if (iter->idx > i)
1203                 return NULL;
1204
1205         if (iter->idx < 0)
1206                 ent = find_next_entry_inc(iter);
1207         else
1208                 ent = iter;
1209
1210         while (ent && iter->idx < i)
1211                 ent = find_next_entry_inc(iter);
1212
1213         iter->pos = *pos;
1214
1215         return ent;
1216 }
1217
1218 static void *s_start(struct seq_file *m, loff_t *pos)
1219 {
1220         struct trace_iterator *iter = m->private;
1221         void *p = NULL;
1222         loff_t l = 0;
1223         int i;
1224
1225         mutex_lock(&trace_types_lock);
1226
1227         if (!current_trace || current_trace != iter->trace) {
1228                 mutex_unlock(&trace_types_lock);
1229                 return NULL;
1230         }
1231
1232         atomic_inc(&trace_record_cmdline_disabled);
1233
1234         /* let the tracer grab locks here if needed */
1235         if (current_trace->start)
1236                 current_trace->start(iter);
1237
1238         if (*pos != iter->pos) {
1239                 iter->ent = NULL;
1240                 iter->cpu = 0;
1241                 iter->idx = -1;
1242                 iter->prev_ent = NULL;
1243                 iter->prev_cpu = -1;
1244
1245                 for_each_tracing_cpu(i) {
1246                         iter->next_idx[i] = 0;
1247                         iter->next_page[i] = NULL;
1248                 }
1249
1250                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
1251                         ;
1252
1253         } else {
1254                 l = *pos - 1;
1255                 p = s_next(m, p, &l);
1256         }
1257
1258         return p;
1259 }
1260
1261 static void s_stop(struct seq_file *m, void *p)
1262 {
1263         struct trace_iterator *iter = m->private;
1264
1265         atomic_dec(&trace_record_cmdline_disabled);
1266
1267         /* let the tracer release locks here if needed */
1268         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1269                 iter->trace->stop(iter);
1270
1271         mutex_unlock(&trace_types_lock);
1272 }
1273
1274 #define KRETPROBE_MSG "[unknown/kretprobe'd]"
1275
1276 #ifdef CONFIG_KRETPROBES
1277 static inline int kretprobed(unsigned long addr)
1278 {
1279         return addr == (unsigned long)kretprobe_trampoline;
1280 }
1281 #else
1282 static inline int kretprobed(unsigned long addr)
1283 {
1284         return 0;
1285 }
1286 #endif /* CONFIG_KRETPROBES */
1287
1288 static int
1289 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1290 {
1291 #ifdef CONFIG_KALLSYMS
1292         char str[KSYM_SYMBOL_LEN];
1293
1294         kallsyms_lookup(address, NULL, NULL, NULL, str);
1295
1296         return trace_seq_printf(s, fmt, str);
1297 #endif
1298         return 1;
1299 }
1300
1301 static int
1302 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1303                      unsigned long address)
1304 {
1305 #ifdef CONFIG_KALLSYMS
1306         char str[KSYM_SYMBOL_LEN];
1307
1308         sprint_symbol(str, address);
1309         return trace_seq_printf(s, fmt, str);
1310 #endif
1311         return 1;
1312 }
1313
1314 #ifndef CONFIG_64BIT
1315 # define IP_FMT "%08lx"
1316 #else
1317 # define IP_FMT "%016lx"
1318 #endif
1319
1320 static int
1321 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1322 {
1323         int ret;
1324
1325         if (!ip)
1326                 return trace_seq_printf(s, "0");
1327
1328         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1329                 ret = seq_print_sym_offset(s, "%s", ip);
1330         else
1331                 ret = seq_print_sym_short(s, "%s", ip);
1332
1333         if (!ret)
1334                 return 0;
1335
1336         if (sym_flags & TRACE_ITER_SYM_ADDR)
1337                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1338         return ret;
1339 }
1340
1341 static void print_lat_help_header(struct seq_file *m)
1342 {
1343         seq_puts(m, "#                  _------=> CPU#            \n");
1344         seq_puts(m, "#                 / _-----=> irqs-off        \n");
1345         seq_puts(m, "#                | / _----=> need-resched    \n");
1346         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
1347         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
1348         seq_puts(m, "#                |||| /                      \n");
1349         seq_puts(m, "#                |||||     delay             \n");
1350         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
1351         seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
1352 }
1353
1354 static void print_func_help_header(struct seq_file *m)
1355 {
1356         seq_puts(m, "#           TASK-PID    CPU#    TIMESTAMP  FUNCTION\n");
1357         seq_puts(m, "#              | |       |          |         |\n");
1358 }
1359
1360
1361 static void
1362 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1363 {
1364         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1365         struct trace_array *tr = iter->tr;
1366         struct trace_array_cpu *data = tr->data[tr->cpu];
1367         struct tracer *type = current_trace;
1368         unsigned long total   = 0;
1369         unsigned long entries = 0;
1370         int cpu;
1371         const char *name = "preemption";
1372
1373         if (type)
1374                 name = type->name;
1375
1376         for_each_tracing_cpu(cpu) {
1377                 if (head_page(tr->data[cpu])) {
1378                         total += tr->data[cpu]->trace_idx;
1379                         if (tr->data[cpu]->trace_idx > tr->entries)
1380                                 entries += tr->entries;
1381                         else
1382                                 entries += tr->data[cpu]->trace_idx;
1383                 }
1384         }
1385
1386         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1387                    name, UTS_RELEASE);
1388         seq_puts(m, "-----------------------------------"
1389                  "---------------------------------\n");
1390         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1391                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1392                    nsecs_to_usecs(data->saved_latency),
1393                    entries,
1394                    total,
1395                    tr->cpu,
1396 #if defined(CONFIG_PREEMPT_NONE)
1397                    "server",
1398 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1399                    "desktop",
1400 #elif defined(CONFIG_PREEMPT)
1401                    "preempt",
1402 #else
1403                    "unknown",
1404 #endif
1405                    /* These are reserved for later use */
1406                    0, 0, 0, 0);
1407 #ifdef CONFIG_SMP
1408         seq_printf(m, " #P:%d)\n", num_online_cpus());
1409 #else
1410         seq_puts(m, ")\n");
1411 #endif
1412         seq_puts(m, "    -----------------\n");
1413         seq_printf(m, "    | task: %.16s-%d "
1414                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1415                    data->comm, data->pid, data->uid, data->nice,
1416                    data->policy, data->rt_priority);
1417         seq_puts(m, "    -----------------\n");
1418
1419         if (data->critical_start) {
1420                 seq_puts(m, " => started at: ");
1421                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1422                 trace_print_seq(m, &iter->seq);
1423                 seq_puts(m, "\n => ended at:   ");
1424                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1425                 trace_print_seq(m, &iter->seq);
1426                 seq_puts(m, "\n");
1427         }
1428
1429         seq_puts(m, "\n");
1430 }
1431
1432 static void
1433 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1434 {
1435         struct trace_field *field = &entry->field;
1436         int hardirq, softirq;
1437         char *comm;
1438
1439         comm = trace_find_cmdline(field->pid);
1440
1441         trace_seq_printf(s, "%8.8s-%-5d ", comm, field->pid);
1442         trace_seq_printf(s, "%3d", cpu);
1443         trace_seq_printf(s, "%c%c",
1444                         (field->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1445                         ((field->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1446
1447         hardirq = field->flags & TRACE_FLAG_HARDIRQ;
1448         softirq = field->flags & TRACE_FLAG_SOFTIRQ;
1449         if (hardirq && softirq) {
1450                 trace_seq_putc(s, 'H');
1451         } else {
1452                 if (hardirq) {
1453                         trace_seq_putc(s, 'h');
1454                 } else {
1455                         if (softirq)
1456                                 trace_seq_putc(s, 's');
1457                         else
1458                                 trace_seq_putc(s, '.');
1459                 }
1460         }
1461
1462         if (field->preempt_count)
1463                 trace_seq_printf(s, "%x", field->preempt_count);
1464         else
1465                 trace_seq_puts(s, ".");
1466 }
1467
1468 unsigned long preempt_mark_thresh = 100;
1469
1470 static void
1471 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1472                     unsigned long rel_usecs)
1473 {
1474         trace_seq_printf(s, " %4lldus", abs_usecs);
1475         if (rel_usecs > preempt_mark_thresh)
1476                 trace_seq_puts(s, "!: ");
1477         else if (rel_usecs > 1)
1478                 trace_seq_puts(s, "+: ");
1479         else
1480                 trace_seq_puts(s, " : ");
1481 }
1482
1483 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1484
1485 /*
1486  * The message is supposed to contain an ending newline.
1487  * If the printing stops prematurely, try to add a newline of our own.
1488  */
1489 void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
1490 {
1491         struct trace_array *tr = iter->tr;
1492         struct trace_array_cpu *data = tr->data[iter->cpu];
1493         struct trace_entry *ent;
1494         bool ok = true;
1495
1496         ent = trace_entry_idx(tr, data, iter, iter->cpu);
1497         if (!ent || ent->type != TRACE_CONT) {
1498                 trace_seq_putc(s, '\n');
1499                 return;
1500         }
1501
1502         do {
1503                 if (ok)
1504                         ok = (trace_seq_printf(s, "%s", ent->cont.buf) > 0);
1505                 __trace_iterator_increment(iter, iter->cpu);
1506                 ent = trace_entry_idx(tr, data, iter, iter->cpu);
1507         } while (ent && ent->type == TRACE_CONT);
1508
1509         if (!ok)
1510                 trace_seq_putc(s, '\n');
1511 }
1512
1513 static int
1514 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1515 {
1516         struct trace_seq *s = &iter->seq;
1517         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1518         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1519         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1520         struct trace_entry *entry = iter->ent;
1521         struct trace_field *field = &entry->field;
1522         unsigned long abs_usecs;
1523         unsigned long rel_usecs;
1524         char *comm;
1525         int S, T;
1526         int i;
1527         unsigned state;
1528
1529         if (!next_entry)
1530                 next_entry = entry;
1531
1532         if (entry->type == TRACE_CONT)
1533                 return 1;
1534
1535         rel_usecs = ns2usecs(next_entry->field.t - entry->field.t);
1536         abs_usecs = ns2usecs(entry->field.t - iter->tr->time_start);
1537
1538         if (verbose) {
1539                 comm = trace_find_cmdline(field->pid);
1540                 trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
1541                                  " %ld.%03ldms (+%ld.%03ldms): ",
1542                                  comm,
1543                                  field->pid, cpu, field->flags,
1544                                  field->preempt_count, trace_idx,
1545                                  ns2usecs(field->t),
1546                                  abs_usecs/1000,
1547                                  abs_usecs % 1000, rel_usecs/1000,
1548                                  rel_usecs % 1000);
1549         } else {
1550                 lat_print_generic(s, entry, cpu);
1551                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1552         }
1553         switch (entry->type) {
1554         case TRACE_FN:
1555                 seq_print_ip_sym(s, field->fn.ip, sym_flags);
1556                 trace_seq_puts(s, " (");
1557                 if (kretprobed(field->fn.parent_ip))
1558                         trace_seq_puts(s, KRETPROBE_MSG);
1559                 else
1560                         seq_print_ip_sym(s, field->fn.parent_ip, sym_flags);
1561                 trace_seq_puts(s, ")\n");
1562                 break;
1563         case TRACE_CTX:
1564         case TRACE_WAKE:
1565                 T = field->ctx.next_state < sizeof(state_to_char) ?
1566                         state_to_char[field->ctx.next_state] : 'X';
1567
1568                 state = field->ctx.prev_state ?
1569                         __ffs(field->ctx.prev_state) + 1 : 0;
1570                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1571                 comm = trace_find_cmdline(field->ctx.next_pid);
1572                 trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
1573                                  field->ctx.prev_pid,
1574                                  field->ctx.prev_prio,
1575                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1576                                  field->ctx.next_cpu,
1577                                  field->ctx.next_pid,
1578                                  field->ctx.next_prio,
1579                                  T, comm);
1580                 break;
1581         case TRACE_SPECIAL:
1582                 trace_seq_printf(s, "# %ld %ld %ld\n",
1583                                  field->special.arg1,
1584                                  field->special.arg2,
1585                                  field->special.arg3);
1586                 break;
1587         case TRACE_STACK:
1588                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1589                         if (i)
1590                                 trace_seq_puts(s, " <= ");
1591                         seq_print_ip_sym(s, field->stack.caller[i], sym_flags);
1592                 }
1593                 trace_seq_puts(s, "\n");
1594                 break;
1595         case TRACE_PRINT:
1596                 seq_print_ip_sym(s, field->print.ip, sym_flags);
1597                 trace_seq_printf(s, ": %s", field->print.buf);
1598                 if (field->flags & TRACE_FLAG_CONT)
1599                         trace_seq_print_cont(s, iter);
1600                 break;
1601         default:
1602                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1603         }
1604         return 1;
1605 }
1606
1607 static int print_trace_fmt(struct trace_iterator *iter)
1608 {
1609         struct trace_seq *s = &iter->seq;
1610         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1611         struct trace_entry *entry;
1612         struct trace_field *field;
1613         unsigned long usec_rem;
1614         unsigned long long t;
1615         unsigned long secs;
1616         char *comm;
1617         int ret;
1618         int S, T;
1619         int i;
1620
1621         entry = iter->ent;
1622
1623         if (entry->type == TRACE_CONT)
1624                 return 1;
1625
1626         field = &entry->field;
1627
1628         comm = trace_find_cmdline(iter->ent->field.pid);
1629
1630         t = ns2usecs(field->t);
1631         usec_rem = do_div(t, 1000000ULL);
1632         secs = (unsigned long)t;
1633
1634         ret = trace_seq_printf(s, "%16s-%-5d ", comm, field->pid);
1635         if (!ret)
1636                 return 0;
1637         ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
1638         if (!ret)
1639                 return 0;
1640         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1641         if (!ret)
1642                 return 0;
1643
1644         switch (entry->type) {
1645         case TRACE_FN:
1646                 ret = seq_print_ip_sym(s, field->fn.ip, sym_flags);
1647                 if (!ret)
1648                         return 0;
1649                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1650                                                 field->fn.parent_ip) {
1651                         ret = trace_seq_printf(s, " <-");
1652                         if (!ret)
1653                                 return 0;
1654                         if (kretprobed(field->fn.parent_ip))
1655                                 ret = trace_seq_puts(s, KRETPROBE_MSG);
1656                         else
1657                                 ret = seq_print_ip_sym(s,
1658                                                        field->fn.parent_ip,
1659                                                        sym_flags);
1660                         if (!ret)
1661                                 return 0;
1662                 }
1663                 ret = trace_seq_printf(s, "\n");
1664                 if (!ret)
1665                         return 0;
1666                 break;
1667         case TRACE_CTX:
1668         case TRACE_WAKE:
1669                 S = field->ctx.prev_state < sizeof(state_to_char) ?
1670                         state_to_char[field->ctx.prev_state] : 'X';
1671                 T = field->ctx.next_state < sizeof(state_to_char) ?
1672                         state_to_char[field->ctx.next_state] : 'X';
1673                 ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
1674                                        field->ctx.prev_pid,
1675                                        field->ctx.prev_prio,
1676                                        S,
1677                                        entry->type == TRACE_CTX ? "==>" : "  +",
1678                                        field->ctx.next_cpu,
1679                                        field->ctx.next_pid,
1680                                        field->ctx.next_prio,
1681                                        T);
1682                 if (!ret)
1683                         return 0;
1684                 break;
1685         case TRACE_SPECIAL:
1686                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1687                                  field->special.arg1,
1688                                  field->special.arg2,
1689                                  field->special.arg3);
1690                 if (!ret)
1691                         return 0;
1692                 break;
1693         case TRACE_STACK:
1694                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1695                         if (i) {
1696                                 ret = trace_seq_puts(s, " <= ");
1697                                 if (!ret)
1698                                         return 0;
1699                         }
1700                         ret = seq_print_ip_sym(s, field->stack.caller[i],
1701                                                sym_flags);
1702                         if (!ret)
1703                                 return 0;
1704                 }
1705                 ret = trace_seq_puts(s, "\n");
1706                 if (!ret)
1707                         return 0;
1708                 break;
1709         case TRACE_PRINT:
1710                 seq_print_ip_sym(s, field->print.ip, sym_flags);
1711                 trace_seq_printf(s, ": %s", field->print.buf);
1712                 if (field->flags & TRACE_FLAG_CONT)
1713                         trace_seq_print_cont(s, iter);
1714                 break;
1715         }
1716         return 1;
1717 }
1718
1719 static int print_raw_fmt(struct trace_iterator *iter)
1720 {
1721         struct trace_seq *s = &iter->seq;
1722         struct trace_entry *entry;
1723         struct trace_field *field;
1724         int ret;
1725         int S, T;
1726
1727         entry = iter->ent;
1728
1729         if (entry->type == TRACE_CONT)
1730                 return 1;
1731
1732         field = &entry->field;
1733
1734         ret = trace_seq_printf(s, "%d %d %llu ",
1735                 field->pid, iter->cpu, field->t);
1736         if (!ret)
1737                 return 0;
1738
1739         switch (entry->type) {
1740         case TRACE_FN:
1741                 ret = trace_seq_printf(s, "%x %x\n",
1742                                         field->fn.ip,
1743                                         field->fn.parent_ip);
1744                 if (!ret)
1745                         return 0;
1746                 break;
1747         case TRACE_CTX:
1748         case TRACE_WAKE:
1749                 S = field->ctx.prev_state < sizeof(state_to_char) ?
1750                         state_to_char[field->ctx.prev_state] : 'X';
1751                 T = field->ctx.next_state < sizeof(state_to_char) ?
1752                         state_to_char[field->ctx.next_state] : 'X';
1753                 if (entry->type == TRACE_WAKE)
1754                         S = '+';
1755                 ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
1756                                        field->ctx.prev_pid,
1757                                        field->ctx.prev_prio,
1758                                        S,
1759                                        field->ctx.next_cpu,
1760                                        field->ctx.next_pid,
1761                                        field->ctx.next_prio,
1762                                        T);
1763                 if (!ret)
1764                         return 0;
1765                 break;
1766         case TRACE_SPECIAL:
1767         case TRACE_STACK:
1768                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1769                                  field->special.arg1,
1770                                  field->special.arg2,
1771                                  field->special.arg3);
1772                 if (!ret)
1773                         return 0;
1774                 break;
1775         case TRACE_PRINT:
1776                 trace_seq_printf(s, "# %lx %s",
1777                                  field->print.ip, field->print.buf);
1778                 if (field->flags & TRACE_FLAG_CONT)
1779                         trace_seq_print_cont(s, iter);
1780                 break;
1781         }
1782         return 1;
1783 }
1784
1785 #define SEQ_PUT_FIELD_RET(s, x)                         \
1786 do {                                                    \
1787         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1788                 return 0;                               \
1789 } while (0)
1790
1791 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1792 do {                                                    \
1793         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1794                 return 0;                               \
1795 } while (0)
1796
1797 static int print_hex_fmt(struct trace_iterator *iter)
1798 {
1799         struct trace_seq *s = &iter->seq;
1800         unsigned char newline = '\n';
1801         struct trace_entry *entry;
1802         struct trace_field *field;
1803         int S, T;
1804
1805         entry = iter->ent;
1806
1807         if (entry->type == TRACE_CONT)
1808                 return 1;
1809
1810         field = &entry->field;
1811
1812         SEQ_PUT_HEX_FIELD_RET(s, field->pid);
1813         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1814         SEQ_PUT_HEX_FIELD_RET(s, field->t);
1815
1816         switch (entry->type) {
1817         case TRACE_FN:
1818                 SEQ_PUT_HEX_FIELD_RET(s, field->fn.ip);
1819                 SEQ_PUT_HEX_FIELD_RET(s, field->fn.parent_ip);
1820                 break;
1821         case TRACE_CTX:
1822         case TRACE_WAKE:
1823                 S = field->ctx.prev_state < sizeof(state_to_char) ?
1824                         state_to_char[field->ctx.prev_state] : 'X';
1825                 T = field->ctx.next_state < sizeof(state_to_char) ?
1826                         state_to_char[field->ctx.next_state] : 'X';
1827                 if (entry->type == TRACE_WAKE)
1828                         S = '+';
1829                 SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_pid);
1830                 SEQ_PUT_HEX_FIELD_RET(s, field->ctx.prev_prio);
1831                 SEQ_PUT_HEX_FIELD_RET(s, S);
1832                 SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_cpu);
1833                 SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_pid);
1834                 SEQ_PUT_HEX_FIELD_RET(s, field->ctx.next_prio);
1835                 SEQ_PUT_HEX_FIELD_RET(s, T);
1836                 break;
1837         case TRACE_SPECIAL:
1838         case TRACE_STACK:
1839                 SEQ_PUT_HEX_FIELD_RET(s, field->special.arg1);
1840                 SEQ_PUT_HEX_FIELD_RET(s, field->special.arg2);
1841                 SEQ_PUT_HEX_FIELD_RET(s, field->special.arg3);
1842                 break;
1843         }
1844         SEQ_PUT_FIELD_RET(s, newline);
1845
1846         return 1;
1847 }
1848
1849 static int print_bin_fmt(struct trace_iterator *iter)
1850 {
1851         struct trace_seq *s = &iter->seq;
1852         struct trace_entry *entry;
1853         struct trace_field *field;
1854
1855         entry = iter->ent;
1856
1857         if (entry->type == TRACE_CONT)
1858                 return 1;
1859
1860         field = &entry->field;
1861
1862         SEQ_PUT_FIELD_RET(s, field->pid);
1863         SEQ_PUT_FIELD_RET(s, field->cpu);
1864         SEQ_PUT_FIELD_RET(s, field->t);
1865
1866         switch (entry->type) {
1867         case TRACE_FN:
1868                 SEQ_PUT_FIELD_RET(s, field->fn.ip);
1869                 SEQ_PUT_FIELD_RET(s, field->fn.parent_ip);
1870                 break;
1871         case TRACE_CTX:
1872                 SEQ_PUT_FIELD_RET(s, field->ctx.prev_pid);
1873                 SEQ_PUT_FIELD_RET(s, field->ctx.prev_prio);
1874                 SEQ_PUT_FIELD_RET(s, field->ctx.prev_state);
1875                 SEQ_PUT_FIELD_RET(s, field->ctx.next_pid);
1876                 SEQ_PUT_FIELD_RET(s, field->ctx.next_prio);
1877                 SEQ_PUT_FIELD_RET(s, field->ctx.next_state);
1878                 break;
1879         case TRACE_SPECIAL:
1880         case TRACE_STACK:
1881                 SEQ_PUT_FIELD_RET(s, field->special.arg1);
1882                 SEQ_PUT_FIELD_RET(s, field->special.arg2);
1883                 SEQ_PUT_FIELD_RET(s, field->special.arg3);
1884                 break;
1885         }
1886         return 1;
1887 }
1888
1889 static int trace_empty(struct trace_iterator *iter)
1890 {
1891         struct trace_array_cpu *data;
1892         int cpu;
1893
1894         for_each_tracing_cpu(cpu) {
1895                 data = iter->tr->data[cpu];
1896
1897                 if (head_page(data) && data->trace_idx &&
1898                     (data->trace_tail != data->trace_head ||
1899                      data->trace_tail_idx != data->trace_head_idx))
1900                         return 0;
1901         }
1902         return 1;
1903 }
1904
1905 static int print_trace_line(struct trace_iterator *iter)
1906 {
1907         if (iter->trace && iter->trace->print_line)
1908                 return iter->trace->print_line(iter);
1909
1910         if (trace_flags & TRACE_ITER_BIN)
1911                 return print_bin_fmt(iter);
1912
1913         if (trace_flags & TRACE_ITER_HEX)
1914                 return print_hex_fmt(iter);
1915
1916         if (trace_flags & TRACE_ITER_RAW)
1917                 return print_raw_fmt(iter);
1918
1919         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1920                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1921
1922         return print_trace_fmt(iter);
1923 }
1924
1925 static int s_show(struct seq_file *m, void *v)
1926 {
1927         struct trace_iterator *iter = v;
1928
1929         if (iter->ent == NULL) {
1930                 if (iter->tr) {
1931                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1932                         seq_puts(m, "#\n");
1933                 }
1934                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1935                         /* print nothing if the buffers are empty */
1936                         if (trace_empty(iter))
1937                                 return 0;
1938                         print_trace_header(m, iter);
1939                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1940                                 print_lat_help_header(m);
1941                 } else {
1942                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1943                                 print_func_help_header(m);
1944                 }
1945         } else {
1946                 print_trace_line(iter);
1947                 trace_print_seq(m, &iter->seq);
1948         }
1949
1950         return 0;
1951 }
1952
1953 static struct seq_operations tracer_seq_ops = {
1954         .start          = s_start,
1955         .next           = s_next,
1956         .stop           = s_stop,
1957         .show           = s_show,
1958 };
1959
1960 static struct trace_iterator *
1961 __tracing_open(struct inode *inode, struct file *file, int *ret)
1962 {
1963         struct trace_iterator *iter;
1964
1965         if (tracing_disabled) {
1966                 *ret = -ENODEV;
1967                 return NULL;
1968         }
1969
1970         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1971         if (!iter) {
1972                 *ret = -ENOMEM;
1973                 goto out;
1974         }
1975
1976         mutex_lock(&trace_types_lock);
1977         if (current_trace && current_trace->print_max)
1978                 iter->tr = &max_tr;
1979         else
1980                 iter->tr = inode->i_private;
1981         iter->trace = current_trace;
1982         iter->pos = -1;
1983
1984         /* TODO stop tracer */
1985         *ret = seq_open(file, &tracer_seq_ops);
1986         if (!*ret) {
1987                 struct seq_file *m = file->private_data;
1988                 m->private = iter;
1989
1990                 /* stop the trace while dumping */
1991                 if (iter->tr->ctrl) {
1992                         tracer_enabled = 0;
1993                         ftrace_function_enabled = 0;
1994                 }
1995
1996                 if (iter->trace && iter->trace->open)
1997                         iter->trace->open(iter);
1998         } else {
1999                 kfree(iter);
2000                 iter = NULL;
2001         }
2002         mutex_unlock(&trace_types_lock);
2003
2004  out:
2005         return iter;
2006 }
2007
2008 int tracing_open_generic(struct inode *inode, struct file *filp)
2009 {
2010         if (tracing_disabled)
2011                 return -ENODEV;
2012
2013         filp->private_data = inode->i_private;
2014         return 0;
2015 }
2016
2017 int tracing_release(struct inode *inode, struct file *file)
2018 {
2019         struct seq_file *m = (struct seq_file *)file->private_data;
2020         struct trace_iterator *iter = m->private;
2021
2022         mutex_lock(&trace_types_lock);
2023         if (iter->trace && iter->trace->close)
2024                 iter->trace->close(iter);
2025
2026         /* reenable tracing if it was previously enabled */
2027         if (iter->tr->ctrl) {
2028                 tracer_enabled = 1;
2029                 /*
2030                  * It is safe to enable function tracing even if it
2031                  * isn't used
2032                  */
2033                 ftrace_function_enabled = 1;
2034         }
2035         mutex_unlock(&trace_types_lock);
2036
2037         seq_release(inode, file);
2038         kfree(iter);
2039         return 0;
2040 }
2041
2042 static int tracing_open(struct inode *inode, struct file *file)
2043 {
2044         int ret;
2045
2046         __tracing_open(inode, file, &ret);
2047
2048         return ret;
2049 }
2050
2051 static int tracing_lt_open(struct inode *inode, struct file *file)
2052 {
2053         struct trace_iterator *iter;
2054         int ret;
2055
2056         iter = __tracing_open(inode, file, &ret);
2057
2058         if (!ret)
2059                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
2060
2061         return ret;
2062 }
2063
2064
2065 static void *
2066 t_next(struct seq_file *m, void *v, loff_t *pos)
2067 {
2068         struct tracer *t = m->private;
2069
2070         (*pos)++;
2071
2072         if (t)
2073                 t = t->next;
2074
2075         m->private = t;
2076
2077         return t;
2078 }
2079
2080 static void *t_start(struct seq_file *m, loff_t *pos)
2081 {
2082         struct tracer *t = m->private;
2083         loff_t l = 0;
2084
2085         mutex_lock(&trace_types_lock);
2086         for (; t && l < *pos; t = t_next(m, t, &l))
2087                 ;
2088
2089         return t;
2090 }
2091
2092 static void t_stop(struct seq_file *m, void *p)
2093 {
2094         mutex_unlock(&trace_types_lock);
2095 }
2096
2097 static int t_show(struct seq_file *m, void *v)
2098 {
2099         struct tracer *t = v;
2100
2101         if (!t)
2102                 return 0;
2103
2104         seq_printf(m, "%s", t->name);
2105         if (t->next)
2106                 seq_putc(m, ' ');
2107         else
2108                 seq_putc(m, '\n');
2109
2110         return 0;
2111 }
2112
2113 static struct seq_operations show_traces_seq_ops = {
2114         .start          = t_start,
2115         .next           = t_next,
2116         .stop           = t_stop,
2117         .show           = t_show,
2118 };
2119
2120 static int show_traces_open(struct inode *inode, struct file *file)
2121 {
2122         int ret;
2123
2124         if (tracing_disabled)
2125                 return -ENODEV;
2126
2127         ret = seq_open(file, &show_traces_seq_ops);
2128         if (!ret) {
2129                 struct seq_file *m = file->private_data;
2130                 m->private = trace_types;
2131         }
2132
2133         return ret;
2134 }
2135
2136 static struct file_operations tracing_fops = {
2137         .open           = tracing_open,
2138         .read           = seq_read,
2139         .llseek         = seq_lseek,
2140         .release        = tracing_release,
2141 };
2142
2143 static struct file_operations tracing_lt_fops = {
2144         .open           = tracing_lt_open,
2145         .read           = seq_read,
2146         .llseek         = seq_lseek,
2147         .release        = tracing_release,
2148 };
2149
2150 static struct file_operations show_traces_fops = {
2151         .open           = show_traces_open,
2152         .read           = seq_read,
2153         .release        = seq_release,
2154 };
2155
2156 /*
2157  * Only trace on a CPU if the bitmask is set:
2158  */
2159 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
2160
2161 /*
2162  * When tracing/tracing_cpu_mask is modified then this holds
2163  * the new bitmask we are about to install:
2164  */
2165 static cpumask_t tracing_cpumask_new;
2166
2167 /*
2168  * The tracer itself will not take this lock, but still we want
2169  * to provide a consistent cpumask to user-space:
2170  */
2171 static DEFINE_MUTEX(tracing_cpumask_update_lock);
2172
2173 /*
2174  * Temporary storage for the character representation of the
2175  * CPU bitmask (and one more byte for the newline):
2176  */
2177 static char mask_str[NR_CPUS + 1];
2178
2179 static ssize_t
2180 tracing_cpumask_read(struct file *filp, char __user *ubuf,
2181                      size_t count, loff_t *ppos)
2182 {
2183         int len;
2184
2185         mutex_lock(&tracing_cpumask_update_lock);
2186
2187         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
2188         if (count - len < 2) {
2189                 count = -EINVAL;
2190                 goto out_err;
2191         }
2192         len += sprintf(mask_str + len, "\n");
2193         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
2194
2195 out_err:
2196         mutex_unlock(&tracing_cpumask_update_lock);
2197
2198         return count;
2199 }
2200
2201 static ssize_t
2202 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
2203                       size_t count, loff_t *ppos)
2204 {
2205         int err, cpu;
2206
2207         mutex_lock(&tracing_cpumask_update_lock);
2208         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
2209         if (err)
2210                 goto err_unlock;
2211
2212         raw_local_irq_disable();
2213         __raw_spin_lock(&ftrace_max_lock);
2214         for_each_tracing_cpu(cpu) {
2215                 /*
2216                  * Increase/decrease the disabled counter if we are
2217                  * about to flip a bit in the cpumask:
2218                  */
2219                 if (cpu_isset(cpu, tracing_cpumask) &&
2220                                 !cpu_isset(cpu, tracing_cpumask_new)) {
2221                         atomic_inc(&global_trace.data[cpu]->disabled);
2222                 }
2223                 if (!cpu_isset(cpu, tracing_cpumask) &&
2224                                 cpu_isset(cpu, tracing_cpumask_new)) {
2225                         atomic_dec(&global_trace.data[cpu]->disabled);
2226                 }
2227         }
2228         __raw_spin_unlock(&ftrace_max_lock);
2229         raw_local_irq_enable();
2230
2231         tracing_cpumask = tracing_cpumask_new;
2232
2233         mutex_unlock(&tracing_cpumask_update_lock);
2234
2235         return count;
2236
2237 err_unlock:
2238         mutex_unlock(&tracing_cpumask_update_lock);
2239
2240         return err;
2241 }
2242
2243 static struct file_operations tracing_cpumask_fops = {
2244         .open           = tracing_open_generic,
2245         .read           = tracing_cpumask_read,
2246         .write          = tracing_cpumask_write,
2247 };
2248
2249 static ssize_t
2250 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
2251                        size_t cnt, loff_t *ppos)
2252 {
2253         char *buf;
2254         int r = 0;
2255         int len = 0;
2256         int i;
2257
2258         /* calulate max size */
2259         for (i = 0; trace_options[i]; i++) {
2260                 len += strlen(trace_options[i]);
2261                 len += 3; /* "no" and space */
2262         }
2263
2264         /* +2 for \n and \0 */
2265         buf = kmalloc(len + 2, GFP_KERNEL);
2266         if (!buf)
2267                 return -ENOMEM;
2268
2269         for (i = 0; trace_options[i]; i++) {
2270                 if (trace_flags & (1 << i))
2271                         r += sprintf(buf + r, "%s ", trace_options[i]);
2272                 else
2273                         r += sprintf(buf + r, "no%s ", trace_options[i]);
2274         }
2275
2276         r += sprintf(buf + r, "\n");
2277         WARN_ON(r >= len + 2);
2278
2279         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2280
2281         kfree(buf);
2282
2283         return r;
2284 }
2285
2286 static ssize_t
2287 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
2288                         size_t cnt, loff_t *ppos)
2289 {
2290         char buf[64];
2291         char *cmp = buf;
2292         int neg = 0;
2293         int i;
2294
2295         if (cnt >= sizeof(buf))
2296                 return -EINVAL;
2297
2298         if (copy_from_user(&buf, ubuf, cnt))
2299                 return -EFAULT;
2300
2301         buf[cnt] = 0;
2302
2303         if (strncmp(buf, "no", 2) == 0) {
2304                 neg = 1;
2305                 cmp += 2;
2306         }
2307
2308         for (i = 0; trace_options[i]; i++) {
2309                 int len = strlen(trace_options[i]);
2310
2311                 if (strncmp(cmp, trace_options[i], len) == 0) {
2312                         if (neg)
2313                                 trace_flags &= ~(1 << i);
2314                         else
2315                                 trace_flags |= (1 << i);
2316                         break;
2317                 }
2318         }
2319         /*
2320          * If no option could be set, return an error:
2321          */
2322         if (!trace_options[i])
2323                 return -EINVAL;
2324
2325         filp->f_pos += cnt;
2326
2327         return cnt;
2328 }
2329
2330 static struct file_operations tracing_iter_fops = {
2331         .open           = tracing_open_generic,
2332         .read           = tracing_iter_ctrl_read,
2333         .write          = tracing_iter_ctrl_write,
2334 };
2335
2336 static const char readme_msg[] =
2337         "tracing mini-HOWTO:\n\n"
2338         "# mkdir /debug\n"
2339         "# mount -t debugfs nodev /debug\n\n"
2340         "# cat /debug/tracing/available_tracers\n"
2341         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
2342         "# cat /debug/tracing/current_tracer\n"
2343         "none\n"
2344         "# echo sched_switch > /debug/tracing/current_tracer\n"
2345         "# cat /debug/tracing/current_tracer\n"
2346         "sched_switch\n"
2347         "# cat /debug/tracing/iter_ctrl\n"
2348         "noprint-parent nosym-offset nosym-addr noverbose\n"
2349         "# echo print-parent > /debug/tracing/iter_ctrl\n"
2350         "# echo 1 > /debug/tracing/tracing_enabled\n"
2351         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
2352         "echo 0 > /debug/tracing/tracing_enabled\n"
2353 ;
2354
2355 static ssize_t
2356 tracing_readme_read(struct file *filp, char __user *ubuf,
2357                        size_t cnt, loff_t *ppos)
2358 {
2359         return simple_read_from_buffer(ubuf, cnt, ppos,
2360                                         readme_msg, strlen(readme_msg));
2361 }
2362
2363 static struct file_operations tracing_readme_fops = {
2364         .open           = tracing_open_generic,
2365         .read           = tracing_readme_read,
2366 };
2367
2368 static ssize_t
2369 tracing_ctrl_read(struct file *filp, char __user *ubuf,
2370                   size_t cnt, loff_t *ppos)
2371 {
2372         struct trace_array *tr = filp->private_data;
2373         char buf[64];
2374         int r;
2375
2376         r = sprintf(buf, "%ld\n", tr->ctrl);
2377         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2378 }
2379
2380 static ssize_t
2381 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2382                    size_t cnt, loff_t *ppos)
2383 {
2384         struct trace_array *tr = filp->private_data;
2385         char buf[64];
2386         long val;
2387         int ret;
2388
2389         if (cnt >= sizeof(buf))
2390                 return -EINVAL;
2391
2392         if (copy_from_user(&buf, ubuf, cnt))
2393                 return -EFAULT;
2394
2395         buf[cnt] = 0;
2396
2397         ret = strict_strtoul(buf, 10, &val);
2398         if (ret < 0)
2399                 return ret;
2400
2401         val = !!val;
2402
2403         mutex_lock(&trace_types_lock);
2404         if (tr->ctrl ^ val) {
2405                 if (val)
2406                         tracer_enabled = 1;
2407                 else
2408                         tracer_enabled = 0;
2409
2410                 tr->ctrl = val;
2411
2412                 if (current_trace && current_trace->ctrl_update)
2413                         current_trace->ctrl_update(tr);
2414         }
2415         mutex_unlock(&trace_types_lock);
2416
2417         filp->f_pos += cnt;
2418
2419         return cnt;
2420 }
2421
2422 static ssize_t
2423 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2424                        size_t cnt, loff_t *ppos)
2425 {
2426         char buf[max_tracer_type_len+2];
2427         int r;
2428
2429         mutex_lock(&trace_types_lock);
2430         if (current_trace)
2431                 r = sprintf(buf, "%s\n", current_trace->name);
2432         else
2433                 r = sprintf(buf, "\n");
2434         mutex_unlock(&trace_types_lock);
2435
2436         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2437 }
2438
2439 static ssize_t
2440 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2441                         size_t cnt, loff_t *ppos)
2442 {
2443         struct trace_array *tr = &global_trace;
2444         struct tracer *t;
2445         char buf[max_tracer_type_len+1];
2446         int i;
2447
2448         if (cnt > max_tracer_type_len)
2449                 cnt = max_tracer_type_len;
2450
2451         if (copy_from_user(&buf, ubuf, cnt))
2452                 return -EFAULT;
2453
2454         buf[cnt] = 0;
2455
2456         /* strip ending whitespace. */
2457         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2458                 buf[i] = 0;
2459
2460         mutex_lock(&trace_types_lock);
2461         for (t = trace_types; t; t = t->next) {
2462                 if (strcmp(t->name, buf) == 0)
2463                         break;
2464         }
2465         if (!t || t == current_trace)
2466                 goto out;
2467
2468         if (current_trace && current_trace->reset)
2469                 current_trace->reset(tr);
2470
2471         current_trace = t;
2472         if (t->init)
2473                 t->init(tr);
2474
2475  out:
2476         mutex_unlock(&trace_types_lock);
2477
2478         filp->f_pos += cnt;
2479
2480         return cnt;
2481 }
2482
2483 static ssize_t
2484 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2485                      size_t cnt, loff_t *ppos)
2486 {
2487         unsigned long *ptr = filp->private_data;
2488         char buf[64];
2489         int r;
2490
2491         r = snprintf(buf, sizeof(buf), "%ld\n",
2492                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2493         if (r > sizeof(buf))
2494                 r = sizeof(buf);
2495         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2496 }
2497
2498 static ssize_t
2499 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2500                       size_t cnt, loff_t *ppos)
2501 {
2502         long *ptr = filp->private_data;
2503         char buf[64];
2504         long val;
2505         int ret;
2506
2507         if (cnt >= sizeof(buf))
2508                 return -EINVAL;
2509
2510         if (copy_from_user(&buf, ubuf, cnt))
2511                 return -EFAULT;
2512
2513         buf[cnt] = 0;
2514
2515         ret = strict_strtoul(buf, 10, &val);
2516         if (ret < 0)
2517                 return ret;
2518
2519         *ptr = val * 1000;
2520
2521         return cnt;
2522 }
2523
2524 static atomic_t tracing_reader;
2525
2526 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2527 {
2528         struct trace_iterator *iter;
2529
2530         if (tracing_disabled)
2531                 return -ENODEV;
2532
2533         /* We only allow for reader of the pipe */
2534         if (atomic_inc_return(&tracing_reader) != 1) {
2535                 atomic_dec(&tracing_reader);
2536                 return -EBUSY;
2537         }
2538
2539         /* create a buffer to store the information to pass to userspace */
2540         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2541         if (!iter)
2542                 return -ENOMEM;
2543
2544         mutex_lock(&trace_types_lock);
2545         iter->tr = &global_trace;
2546         iter->trace = current_trace;
2547         filp->private_data = iter;
2548
2549         if (iter->trace->pipe_open)
2550                 iter->trace->pipe_open(iter);
2551         mutex_unlock(&trace_types_lock);
2552
2553         return 0;
2554 }
2555
2556 static int tracing_release_pipe(struct inode *inode, struct file *file)
2557 {
2558         struct trace_iterator *iter = file->private_data;
2559
2560         kfree(iter);
2561         atomic_dec(&tracing_reader);
2562
2563         return 0;
2564 }
2565
2566 static unsigned int
2567 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2568 {
2569         struct trace_iterator *iter = filp->private_data;
2570
2571         if (trace_flags & TRACE_ITER_BLOCK) {
2572                 /*
2573                  * Always select as readable when in blocking mode
2574                  */
2575                 return POLLIN | POLLRDNORM;
2576         } else {
2577                 if (!trace_empty(iter))
2578                         return POLLIN | POLLRDNORM;
2579                 poll_wait(filp, &trace_wait, poll_table);
2580                 if (!trace_empty(iter))
2581                         return POLLIN | POLLRDNORM;
2582
2583                 return 0;
2584         }
2585 }
2586
2587 /*
2588  * Consumer reader.
2589  */
2590 static ssize_t
2591 tracing_read_pipe(struct file *filp, char __user *ubuf,
2592                   size_t cnt, loff_t *ppos)
2593 {
2594         struct trace_iterator *iter = filp->private_data;
2595         struct trace_array_cpu *data;
2596         static cpumask_t mask;
2597         unsigned long flags;
2598 #ifdef CONFIG_FTRACE
2599         int ftrace_save;
2600 #endif
2601         int cpu;
2602         ssize_t sret;
2603
2604         /* return any leftover data */
2605         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2606         if (sret != -EBUSY)
2607                 return sret;
2608         sret = 0;
2609
2610         trace_seq_reset(&iter->seq);
2611
2612         mutex_lock(&trace_types_lock);
2613         if (iter->trace->read) {
2614                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
2615                 if (sret)
2616                         goto out;
2617         }
2618
2619         while (trace_empty(iter)) {
2620
2621                 if ((filp->f_flags & O_NONBLOCK)) {
2622                         sret = -EAGAIN;
2623                         goto out;
2624                 }
2625
2626                 /*
2627                  * This is a make-shift waitqueue. The reason we don't use
2628                  * an actual wait queue is because:
2629                  *  1) we only ever have one waiter
2630                  *  2) the tracing, traces all functions, we don't want
2631                  *     the overhead of calling wake_up and friends
2632                  *     (and tracing them too)
2633                  *     Anyway, this is really very primitive wakeup.
2634                  */
2635                 set_current_state(TASK_INTERRUPTIBLE);
2636                 iter->tr->waiter = current;
2637
2638                 mutex_unlock(&trace_types_lock);
2639
2640                 /* sleep for 100 msecs, and try again. */
2641                 schedule_timeout(HZ/10);
2642
2643                 mutex_lock(&trace_types_lock);
2644
2645                 iter->tr->waiter = NULL;
2646
2647                 if (signal_pending(current)) {
2648                         sret = -EINTR;
2649                         goto out;
2650                 }
2651
2652                 if (iter->trace != current_trace)
2653                         goto out;
2654
2655                 /*
2656                  * We block until we read something and tracing is disabled.
2657                  * We still block if tracing is disabled, but we have never
2658                  * read anything. This allows a user to cat this file, and
2659                  * then enable tracing. But after we have read something,
2660                  * we give an EOF when tracing is again disabled.
2661                  *
2662                  * iter->pos will be 0 if we haven't read anything.
2663                  */
2664                 if (!tracer_enabled && iter->pos)
2665                         break;
2666
2667                 continue;
2668         }
2669
2670         /* stop when tracing is finished */
2671         if (trace_empty(iter))
2672                 goto out;
2673
2674         if (cnt >= PAGE_SIZE)
2675                 cnt = PAGE_SIZE - 1;
2676
2677         /* reset all but tr, trace, and overruns */
2678         memset(&iter->seq, 0,
2679                sizeof(struct trace_iterator) -
2680                offsetof(struct trace_iterator, seq));
2681         iter->pos = -1;
2682
2683         /*
2684          * We need to stop all tracing on all CPUS to read the
2685          * the next buffer. This is a bit expensive, but is
2686          * not done often. We fill all what we can read,
2687          * and then release the locks again.
2688          */
2689
2690         cpus_clear(mask);
2691         local_irq_save(flags);
2692 #ifdef CONFIG_FTRACE
2693         ftrace_save = ftrace_enabled;
2694         ftrace_enabled = 0;
2695 #endif
2696         smp_wmb();
2697         for_each_tracing_cpu(cpu) {
2698                 data = iter->tr->data[cpu];
2699
2700                 if (!head_page(data) || !data->trace_idx)
2701                         continue;
2702
2703                 atomic_inc(&data->disabled);
2704                 cpu_set(cpu, mask);
2705         }
2706
2707         for_each_cpu_mask(cpu, mask) {
2708                 data = iter->tr->data[cpu];
2709                 __raw_spin_lock(&data->lock);
2710
2711                 if (data->overrun > iter->last_overrun[cpu])
2712                         iter->overrun[cpu] +=
2713                                 data->overrun - iter->last_overrun[cpu];
2714                 iter->last_overrun[cpu] = data->overrun;
2715         }
2716
2717         while (find_next_entry_inc(iter) != NULL) {
2718                 int ret;
2719                 int len = iter->seq.len;
2720
2721                 ret = print_trace_line(iter);
2722                 if (!ret) {
2723                         /* don't print partial lines */
2724                         iter->seq.len = len;
2725                         break;
2726                 }
2727
2728                 trace_consume(iter);
2729
2730                 if (iter->seq.len >= cnt)
2731                         break;
2732         }
2733
2734         for_each_cpu_mask(cpu, mask) {
2735                 data = iter->tr->data[cpu];
2736                 __raw_spin_unlock(&data->lock);
2737         }
2738
2739         for_each_cpu_mask(cpu, mask) {
2740                 data = iter->tr->data[cpu];
2741                 atomic_dec(&data->disabled);
2742         }
2743 #ifdef CONFIG_FTRACE
2744         ftrace_enabled = ftrace_save;
2745 #endif
2746         local_irq_restore(flags);
2747
2748         /* Now copy what we have to the user */
2749         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
2750         if (iter->seq.readpos >= iter->seq.len)
2751                 trace_seq_reset(&iter->seq);
2752         if (sret == -EBUSY)
2753                 sret = 0;
2754
2755 out:
2756         mutex_unlock(&trace_types_lock);
2757
2758         return sret;
2759 }
2760
2761 static ssize_t
2762 tracing_entries_read(struct file *filp, char __user *ubuf,
2763                      size_t cnt, loff_t *ppos)
2764 {
2765         struct trace_array *tr = filp->private_data;
2766         char buf[64];
2767         int r;
2768
2769         r = sprintf(buf, "%lu\n", tr->entries);
2770         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2771 }
2772
2773 static ssize_t
2774 tracing_entries_write(struct file *filp, const char __user *ubuf,
2775                       size_t cnt, loff_t *ppos)
2776 {
2777         unsigned long val;
2778         char buf[64];
2779         int i, ret;
2780         struct trace_array *tr = filp->private_data;
2781
2782         if (cnt >= sizeof(buf))
2783                 return -EINVAL;
2784
2785         if (copy_from_user(&buf, ubuf, cnt))
2786                 return -EFAULT;
2787
2788         buf[cnt] = 0;
2789
2790         ret = strict_strtoul(buf, 10, &val);
2791         if (ret < 0)
2792                 return ret;
2793
2794         /* must have at least 1 entry */
2795         if (!val)
2796                 return -EINVAL;
2797
2798         mutex_lock(&trace_types_lock);
2799
2800         if (tr->ctrl) {
2801                 cnt = -EBUSY;
2802                 pr_info("ftrace: please disable tracing"
2803                         " before modifying buffer size\n");
2804                 goto out;
2805         }
2806
2807         if (val > global_trace.entries) {
2808                 long pages_requested;
2809                 unsigned long freeable_pages;
2810
2811                 /* make sure we have enough memory before mapping */
2812                 pages_requested =
2813                         (val + (ENTRIES_PER_PAGE-1)) / ENTRIES_PER_PAGE;
2814
2815                 /* account for each buffer (and max_tr) */
2816                 pages_requested *= tracing_nr_buffers * 2;
2817
2818                 /* Check for overflow */
2819                 if (pages_requested < 0) {
2820                         cnt = -ENOMEM;
2821                         goto out;
2822                 }
2823
2824                 freeable_pages = determine_dirtyable_memory();
2825
2826                 /* we only allow to request 1/4 of useable memory */
2827                 if (pages_requested >
2828                     ((freeable_pages + tracing_pages_allocated) / 4)) {
2829                         cnt = -ENOMEM;
2830                         goto out;
2831                 }
2832
2833                 while (global_trace.entries < val) {
2834                         if (trace_alloc_page()) {
2835                                 cnt = -ENOMEM;
2836                                 goto out;
2837                         }
2838                         /* double check that we don't go over the known pages */
2839                         if (tracing_pages_allocated > pages_requested)
2840                                 break;
2841                 }
2842
2843         } else {
2844                 /* include the number of entries in val (inc of page entries) */
2845                 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2846                         trace_free_page();
2847         }
2848
2849         /* check integrity */
2850         for_each_tracing_cpu(i)
2851                 check_pages(global_trace.data[i]);
2852
2853         filp->f_pos += cnt;
2854
2855         /* If check pages failed, return ENOMEM */
2856         if (tracing_disabled)
2857                 cnt = -ENOMEM;
2858  out:
2859         max_tr.entries = global_trace.entries;
2860         mutex_unlock(&trace_types_lock);
2861
2862         return cnt;
2863 }
2864
2865 static int mark_printk(const char *fmt, ...)
2866 {
2867         int ret;
2868         va_list args;
2869         va_start(args, fmt);
2870         ret = trace_vprintk(0, fmt, args);
2871         va_end(args);
2872         return ret;
2873 }
2874
2875 static ssize_t
2876 tracing_mark_write(struct file *filp, const char __user *ubuf,
2877                                         size_t cnt, loff_t *fpos)
2878 {
2879         char *buf;
2880         char *end;
2881         struct trace_array *tr = &global_trace;
2882
2883         if (!tr->ctrl || tracing_disabled)
2884                 return -EINVAL;
2885
2886         if (cnt > TRACE_BUF_SIZE)
2887                 cnt = TRACE_BUF_SIZE;
2888
2889         buf = kmalloc(cnt + 1, GFP_KERNEL);
2890         if (buf == NULL)
2891                 return -ENOMEM;
2892
2893         if (copy_from_user(buf, ubuf, cnt)) {
2894                 kfree(buf);
2895                 return -EFAULT;
2896         }
2897
2898         /* Cut from the first nil or newline. */
2899         buf[cnt] = '\0';
2900         end = strchr(buf, '\n');
2901         if (end)
2902                 *end = '\0';
2903
2904         cnt = mark_printk("%s\n", buf);
2905         kfree(buf);
2906         *fpos += cnt;
2907
2908         return cnt;
2909 }
2910
2911 static struct file_operations tracing_max_lat_fops = {
2912         .open           = tracing_open_generic,
2913         .read           = tracing_max_lat_read,
2914         .write          = tracing_max_lat_write,
2915 };
2916
2917 static struct file_operations tracing_ctrl_fops = {
2918         .open           = tracing_open_generic,
2919         .read           = tracing_ctrl_read,
2920         .write          = tracing_ctrl_write,
2921 };
2922
2923 static struct file_operations set_tracer_fops = {
2924         .open           = tracing_open_generic,
2925         .read           = tracing_set_trace_read,
2926         .write          = tracing_set_trace_write,
2927 };
2928
2929 static struct file_operations tracing_pipe_fops = {
2930         .open           = tracing_open_pipe,
2931         .poll           = tracing_poll_pipe,
2932         .read           = tracing_read_pipe,
2933         .release        = tracing_release_pipe,
2934 };
2935
2936 static struct file_operations tracing_entries_fops = {
2937         .open           = tracing_open_generic,
2938         .read           = tracing_entries_read,
2939         .write          = tracing_entries_write,
2940 };
2941
2942 static struct file_operations tracing_mark_fops = {
2943         .open           = tracing_open_generic,
2944         .write          = tracing_mark_write,
2945 };
2946
2947 #ifdef CONFIG_DYNAMIC_FTRACE
2948
2949 static ssize_t
2950 tracing_read_long(struct file *filp, char __user *ubuf,
2951                   size_t cnt, loff_t *ppos)
2952 {
2953         unsigned long *p = filp->private_data;
2954         char buf[64];
2955         int r;
2956
2957         r = sprintf(buf, "%ld\n", *p);
2958
2959         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2960 }
2961
2962 static struct file_operations tracing_read_long_fops = {
2963         .open           = tracing_open_generic,
2964         .read           = tracing_read_long,
2965 };
2966 #endif
2967
2968 static struct dentry *d_tracer;
2969
2970 struct dentry *tracing_init_dentry(void)
2971 {
2972         static int once;
2973
2974         if (d_tracer)
2975                 return d_tracer;
2976
2977         d_tracer = debugfs_create_dir("tracing", NULL);
2978
2979         if (!d_tracer && !once) {
2980                 once = 1;
2981                 pr_warning("Could not create debugfs directory 'tracing'\n");
2982                 return NULL;
2983         }
2984
2985         return d_tracer;
2986 }
2987
2988 #ifdef CONFIG_FTRACE_SELFTEST
2989 /* Let selftest have access to static functions in this file */
2990 #include "trace_selftest.c"
2991 #endif
2992
2993 static __init void tracer_init_debugfs(void)
2994 {
2995         struct dentry *d_tracer;
2996         struct dentry *entry;
2997
2998         d_tracer = tracing_init_dentry();
2999
3000         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
3001                                     &global_trace, &tracing_ctrl_fops);
3002         if (!entry)
3003                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
3004
3005         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
3006                                     NULL, &tracing_iter_fops);
3007         if (!entry)
3008                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
3009
3010         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
3011                                     NULL, &tracing_cpumask_fops);
3012         if (!entry)
3013                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
3014
3015         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
3016                                     &global_trace, &tracing_lt_fops);
3017         if (!entry)
3018                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
3019
3020         entry = debugfs_create_file("trace", 0444, d_tracer,
3021                                     &global_trace, &tracing_fops);
3022         if (!entry)
3023                 pr_warning("Could not create debugfs 'trace' entry\n");
3024
3025         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
3026                                     &global_trace, &show_traces_fops);
3027         if (!entry)
3028                 pr_warning("Could not create debugfs 'available_tracers' entry\n");
3029
3030         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
3031                                     &global_trace, &set_tracer_fops);
3032         if (!entry)
3033                 pr_warning("Could not create debugfs 'current_tracer' entry\n");
3034
3035         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
3036                                     &tracing_max_latency,
3037                                     &tracing_max_lat_fops);
3038         if (!entry)
3039                 pr_warning("Could not create debugfs "
3040                            "'tracing_max_latency' entry\n");
3041
3042         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
3043                                     &tracing_thresh, &tracing_max_lat_fops);
3044         if (!entry)
3045                 pr_warning("Could not create debugfs "
3046                            "'tracing_thresh' entry\n");
3047         entry = debugfs_create_file("README", 0644, d_tracer,
3048                                     NULL, &tracing_readme_fops);
3049         if (!entry)
3050                 pr_warning("Could not create debugfs 'README' entry\n");
3051
3052         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
3053                                     NULL, &tracing_pipe_fops);
3054         if (!entry)
3055                 pr_warning("Could not create debugfs "
3056                            "'trace_pipe' entry\n");
3057
3058         entry = debugfs_create_file("trace_entries", 0644, d_tracer,
3059                                     &global_trace, &tracing_entries_fops);
3060         if (!entry)
3061                 pr_warning("Could not create debugfs "
3062                            "'trace_entries' entry\n");
3063
3064         entry = debugfs_create_file("trace_marker", 0220, d_tracer,
3065                                     NULL, &tracing_mark_fops);
3066         if (!entry)
3067                 pr_warning("Could not create debugfs "
3068                            "'trace_marker' entry\n");
3069
3070 #ifdef CONFIG_DYNAMIC_FTRACE
3071         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
3072                                     &ftrace_update_tot_cnt,
3073                                     &tracing_read_long_fops);
3074         if (!entry)
3075                 pr_warning("Could not create debugfs "
3076                            "'dyn_ftrace_total_info' entry\n");
3077 #endif
3078 #ifdef CONFIG_SYSPROF_TRACER
3079         init_tracer_sysprof_debugfs(d_tracer);
3080 #endif
3081 }
3082
3083 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3084 {
3085         static DEFINE_SPINLOCK(trace_buf_lock);
3086         static char trace_buf[TRACE_BUF_SIZE];
3087
3088         struct trace_array *tr = &global_trace;
3089         struct trace_array_cpu *data;
3090         struct trace_entry *entry;
3091         unsigned long flags;
3092         long disabled;
3093         int cpu, len = 0, write, written = 0;
3094
3095         if (!tr->ctrl || tracing_disabled)
3096                 return 0;
3097
3098         local_irq_save(flags);
3099         cpu = raw_smp_processor_id();
3100         data = tr->data[cpu];
3101         disabled = atomic_inc_return(&data->disabled);
3102
3103         if (unlikely(disabled != 1))
3104                 goto out;
3105
3106         spin_lock(&trace_buf_lock);
3107         len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
3108
3109         len = min(len, TRACE_BUF_SIZE-1);
3110         trace_buf[len] = 0;
3111
3112         __raw_spin_lock(&data->lock);
3113         entry                           = tracing_get_trace_entry(tr, data);
3114         tracing_generic_entry_update(entry, flags);
3115         entry->type                     = TRACE_PRINT;
3116         entry->field.print.ip           = ip;
3117
3118         write = min(len, (int)(TRACE_PRINT_BUF_SIZE-1));
3119
3120         memcpy(&entry->field.print.buf, trace_buf, write);
3121         entry->field.print.buf[write] = 0;
3122         written = write;
3123
3124         if (written != len)
3125                 entry->field.flags |= TRACE_FLAG_CONT;
3126
3127         while (written != len) {
3128                 entry = tracing_get_trace_entry(tr, data);
3129
3130                 entry->type = TRACE_CONT;
3131                 write = min(len - written, (int)(TRACE_CONT_BUF_SIZE-1));
3132                 memcpy(&entry->cont.buf, trace_buf+written, write);
3133                 entry->cont.buf[write] = 0;
3134                 written += write;
3135         }
3136         __raw_spin_unlock(&data->lock);
3137
3138         spin_unlock(&trace_buf_lock);
3139
3140  out:
3141         atomic_dec(&data->disabled);
3142         local_irq_restore(flags);
3143
3144         return len;
3145 }
3146 EXPORT_SYMBOL_GPL(trace_vprintk);
3147
3148 int __ftrace_printk(unsigned long ip, const char *fmt, ...)
3149 {
3150         int ret;
3151         va_list ap;
3152
3153         if (!(trace_flags & TRACE_ITER_PRINTK))
3154                 return 0;
3155
3156         va_start(ap, fmt);
3157         ret = trace_vprintk(ip, fmt, ap);
3158         va_end(ap);
3159         return ret;
3160 }
3161 EXPORT_SYMBOL_GPL(__ftrace_printk);
3162
3163 static int trace_panic_handler(struct notifier_block *this,
3164                                unsigned long event, void *unused)
3165 {
3166         ftrace_dump();
3167         return NOTIFY_OK;
3168 }
3169
3170 static struct notifier_block trace_panic_notifier = {
3171         .notifier_call  = trace_panic_handler,
3172         .next           = NULL,
3173         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
3174 };
3175
3176 static int trace_die_handler(struct notifier_block *self,
3177                              unsigned long val,
3178                              void *data)
3179 {
3180         switch (val) {
3181         case DIE_OOPS:
3182                 ftrace_dump();
3183                 break;
3184         default:
3185                 break;
3186         }
3187         return NOTIFY_OK;
3188 }
3189
3190 static struct notifier_block trace_die_notifier = {
3191         .notifier_call = trace_die_handler,
3192         .priority = 200
3193 };
3194
3195 /*
3196  * printk is set to max of 1024, we really don't need it that big.
3197  * Nothing should be printing 1000 characters anyway.
3198  */
3199 #define TRACE_MAX_PRINT         1000
3200
3201 /*
3202  * Define here KERN_TRACE so that we have one place to modify
3203  * it if we decide to change what log level the ftrace dump
3204  * should be at.
3205  */
3206 #define KERN_TRACE              KERN_INFO
3207
3208 static void
3209 trace_printk_seq(struct trace_seq *s)
3210 {
3211         /* Probably should print a warning here. */
3212         if (s->len >= 1000)
3213                 s->len = 1000;
3214
3215         /* should be zero ended, but we are paranoid. */
3216         s->buffer[s->len] = 0;
3217
3218         printk(KERN_TRACE "%s", s->buffer);
3219
3220         trace_seq_reset(s);
3221 }
3222
3223
3224 void ftrace_dump(void)
3225 {
3226         static DEFINE_SPINLOCK(ftrace_dump_lock);
3227         /* use static because iter can be a bit big for the stack */
3228         static struct trace_iterator iter;
3229         struct trace_array_cpu *data;
3230         static cpumask_t mask;
3231         static int dump_ran;
3232         unsigned long flags;
3233         int cnt = 0;
3234         int cpu;
3235
3236         /* only one dump */
3237         spin_lock_irqsave(&ftrace_dump_lock, flags);
3238         if (dump_ran)
3239                 goto out;
3240
3241         dump_ran = 1;
3242
3243         /* No turning back! */
3244         ftrace_kill_atomic();
3245
3246         printk(KERN_TRACE "Dumping ftrace buffer:\n");
3247
3248         iter.tr = &global_trace;
3249         iter.trace = current_trace;
3250
3251         /*
3252          * We need to stop all tracing on all CPUS to read the
3253          * the next buffer. This is a bit expensive, but is
3254          * not done often. We fill all what we can read,
3255          * and then release the locks again.
3256          */
3257
3258         cpus_clear(mask);
3259
3260         for_each_tracing_cpu(cpu) {
3261                 data = iter.tr->data[cpu];
3262
3263                 if (!head_page(data) || !data->trace_idx)
3264                         continue;
3265
3266                 atomic_inc(&data->disabled);
3267                 cpu_set(cpu, mask);
3268         }
3269
3270         for_each_cpu_mask(cpu, mask) {
3271                 data = iter.tr->data[cpu];
3272                 __raw_spin_lock(&data->lock);
3273
3274                 if (data->overrun > iter.last_overrun[cpu])
3275                         iter.overrun[cpu] +=
3276                                 data->overrun - iter.last_overrun[cpu];
3277                 iter.last_overrun[cpu] = data->overrun;
3278         }
3279
3280         while (!trace_empty(&iter)) {
3281
3282                 if (!cnt)
3283                         printk(KERN_TRACE "---------------------------------\n");
3284
3285                 cnt++;
3286
3287                 /* reset all but tr, trace, and overruns */
3288                 memset(&iter.seq, 0,
3289                        sizeof(struct trace_iterator) -
3290                        offsetof(struct trace_iterator, seq));
3291                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
3292                 iter.pos = -1;
3293
3294                 if (find_next_entry_inc(&iter) != NULL) {
3295                         print_trace_line(&iter);
3296                         trace_consume(&iter);
3297                 }
3298
3299                 trace_printk_seq(&iter.seq);
3300         }
3301
3302         if (!cnt)
3303                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
3304         else
3305                 printk(KERN_TRACE "---------------------------------\n");
3306
3307         for_each_cpu_mask(cpu, mask) {
3308                 data = iter.tr->data[cpu];
3309                 __raw_spin_unlock(&data->lock);
3310         }
3311
3312         for_each_cpu_mask(cpu, mask) {
3313                 data = iter.tr->data[cpu];
3314                 atomic_dec(&data->disabled);
3315         }
3316
3317
3318  out:
3319         spin_unlock_irqrestore(&ftrace_dump_lock, flags);
3320 }
3321
3322 static int trace_alloc_page(void)
3323 {
3324         struct trace_array_cpu *data;
3325         struct page *page, *tmp;
3326         LIST_HEAD(pages);
3327         void *array;
3328         unsigned pages_allocated = 0;
3329         int i;
3330
3331         /* first allocate a page for each CPU */
3332         for_each_tracing_cpu(i) {
3333                 array = (void *)__get_free_page(GFP_KERNEL);
3334                 if (array == NULL) {
3335                         printk(KERN_ERR "tracer: failed to allocate page"
3336                                "for trace buffer!\n");
3337                         goto free_pages;
3338                 }
3339
3340                 pages_allocated++;
3341                 page = virt_to_page(array);
3342                 list_add(&page->lru, &pages);
3343
3344 /* Only allocate if we are actually using the max trace */
3345 #ifdef CONFIG_TRACER_MAX_TRACE
3346                 array = (void *)__get_free_page(GFP_KERNEL);
3347                 if (array == NULL) {
3348                         printk(KERN_ERR "tracer: failed to allocate page"
3349                                "for trace buffer!\n");
3350                         goto free_pages;
3351                 }
3352                 pages_allocated++;
3353                 page = virt_to_page(array);
3354                 list_add(&page->lru, &pages);
3355 #endif
3356         }
3357
3358         /* Now that we successfully allocate a page per CPU, add them */
3359         for_each_tracing_cpu(i) {
3360                 data = global_trace.data[i];
3361                 page = list_entry(pages.next, struct page, lru);
3362                 list_del_init(&page->lru);
3363                 list_add_tail(&page->lru, &data->trace_pages);
3364                 ClearPageLRU(page);
3365
3366 #ifdef CONFIG_TRACER_MAX_TRACE
3367                 data = max_tr.data[i];
3368                 page = list_entry(pages.next, struct page, lru);
3369                 list_del_init(&page->lru);
3370                 list_add_tail(&page->lru, &data->trace_pages);
3371                 SetPageLRU(page);
3372 #endif
3373         }
3374         tracing_pages_allocated += pages_allocated;
3375         global_trace.entries += ENTRIES_PER_PAGE;
3376
3377         return 0;
3378
3379  free_pages:
3380         list_for_each_entry_safe(page, tmp, &pages, lru) {
3381                 list_del_init(&page->lru);
3382                 __free_page(page);
3383         }
3384         return -ENOMEM;
3385 }
3386
3387 static int trace_free_page(void)
3388 {
3389         struct trace_array_cpu *data;
3390         struct page *page;
3391         struct list_head *p;
3392         int i;
3393         int ret = 0;
3394
3395         /* free one page from each buffer */
3396         for_each_tracing_cpu(i) {
3397                 data = global_trace.data[i];
3398                 p = data->trace_pages.next;
3399                 if (p == &data->trace_pages) {
3400                         /* should never happen */
3401                         WARN_ON(1);
3402                         tracing_disabled = 1;
3403                         ret = -1;
3404                         break;
3405                 }
3406                 page = list_entry(p, struct page, lru);
3407                 ClearPageLRU(page);
3408                 list_del(&page->lru);
3409                 tracing_pages_allocated--;
3410                 tracing_pages_allocated--;
3411                 __free_page(page);
3412
3413                 tracing_reset(data);
3414
3415 #ifdef CONFIG_TRACER_MAX_TRACE
3416                 data = max_tr.data[i];
3417                 p = data->trace_pages.next;
3418                 if (p == &data->trace_pages) {
3419                         /* should never happen */
3420                         WARN_ON(1);
3421                         tracing_disabled = 1;
3422                         ret = -1;
3423                         break;
3424                 }
3425                 page = list_entry(p, struct page, lru);
3426                 ClearPageLRU(page);
3427                 list_del(&page->lru);
3428                 __free_page(page);
3429
3430                 tracing_reset(data);
3431 #endif
3432         }
3433         global_trace.entries -= ENTRIES_PER_PAGE;
3434
3435         return ret;
3436 }
3437
3438 __init static int tracer_alloc_buffers(void)
3439 {
3440         struct trace_array_cpu *data;
3441         void *array;
3442         struct page *page;
3443         int pages = 0;
3444         int ret = -ENOMEM;
3445         int i;
3446
3447         /* TODO: make the number of buffers hot pluggable with CPUS */
3448         tracing_nr_buffers = num_possible_cpus();
3449         tracing_buffer_mask = cpu_possible_map;
3450
3451         /* Allocate the first page for all buffers */
3452         for_each_tracing_cpu(i) {
3453                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
3454                 max_tr.data[i] = &per_cpu(max_data, i);
3455
3456                 array = (void *)__get_free_page(GFP_KERNEL);
3457                 if (array == NULL) {
3458                         printk(KERN_ERR "tracer: failed to allocate page"
3459                                "for trace buffer!\n");
3460                         goto free_buffers;
3461                 }
3462
3463                 /* set the array to the list */
3464                 INIT_LIST_HEAD(&data->trace_pages);
3465                 page = virt_to_page(array);
3466                 list_add(&page->lru, &data->trace_pages);
3467                 /* use the LRU flag to differentiate the two buffers */
3468                 ClearPageLRU(page);
3469
3470                 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3471                 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
3472
3473 /* Only allocate if we are actually using the max trace */
3474 #ifdef CONFIG_TRACER_MAX_TRACE
3475                 array = (void *)__get_free_page(GFP_KERNEL);
3476                 if (array == NULL) {
3477                         printk(KERN_ERR "tracer: failed to allocate page"
3478                                "for trace buffer!\n");
3479                         goto free_buffers;
3480                 }
3481
3482                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
3483                 page = virt_to_page(array);
3484                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
3485                 SetPageLRU(page);
3486 #endif
3487         }
3488
3489         /*
3490          * Since we allocate by orders of pages, we may be able to
3491          * round up a bit.
3492          */
3493         global_trace.entries = ENTRIES_PER_PAGE;
3494         pages++;
3495
3496         while (global_trace.entries < trace_nr_entries) {
3497                 if (trace_alloc_page())
3498                         break;
3499                 pages++;
3500         }
3501         max_tr.entries = global_trace.entries;
3502
3503         pr_info("tracer: %d pages allocated for %ld entries of %ld bytes\n",
3504                 pages, trace_nr_entries, (long)TRACE_ENTRY_SIZE);
3505         pr_info("   actual entries %ld\n", global_trace.entries);
3506
3507         tracer_init_debugfs();
3508
3509         trace_init_cmdlines();
3510
3511         register_tracer(&nop_trace);
3512         current_trace = &nop_trace;
3513
3514         /* All seems OK, enable tracing */
3515         global_trace.ctrl = tracer_enabled;
3516         tracing_disabled = 0;
3517
3518         atomic_notifier_chain_register(&panic_notifier_list,
3519                                        &trace_panic_notifier);
3520
3521         register_die_notifier(&trace_die_notifier);
3522
3523         return 0;
3524
3525  free_buffers:
3526         for (i-- ; i >= 0; i--) {
3527                 struct page *page, *tmp;
3528                 struct trace_array_cpu *data = global_trace.data[i];
3529
3530                 if (data) {
3531                         list_for_each_entry_safe(page, tmp,
3532                                                  &data->trace_pages, lru) {
3533                                 list_del_init(&page->lru);
3534                                 __free_page(page);
3535                         }
3536                 }
3537
3538 #ifdef CONFIG_TRACER_MAX_TRACE
3539                 data = max_tr.data[i];
3540                 if (data) {
3541                         list_for_each_entry_safe(page, tmp,
3542                                                  &data->trace_pages, lru) {
3543                                 list_del_init(&page->lru);
3544                                 __free_page(page);
3545                         }
3546                 }
3547 #endif
3548         }
3549         return ret;
3550 }
3551 fs_initcall(tracer_alloc_buffers);