ftrace: trace next state
[linux-2.6] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/debugfs.h>
18 #include <linux/pagemap.h>
19 #include <linux/hardirq.h>
20 #include <linux/linkage.h>
21 #include <linux/uaccess.h>
22 #include <linux/ftrace.h>
23 #include <linux/module.h>
24 #include <linux/percpu.h>
25 #include <linux/ctype.h>
26 #include <linux/init.h>
27 #include <linux/poll.h>
28 #include <linux/gfp.h>
29 #include <linux/fs.h>
30
31 #include <linux/stacktrace.h>
32
33 #include "trace.h"
34
35 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
36 unsigned long __read_mostly     tracing_thresh;
37
38 static int tracing_disabled = 1;
39
40 static long
41 ns2usecs(cycle_t nsec)
42 {
43         nsec += 500;
44         do_div(nsec, 1000);
45         return nsec;
46 }
47
48 cycle_t ftrace_now(int cpu)
49 {
50         return cpu_clock(cpu);
51 }
52
53 static struct trace_array       global_trace;
54
55 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
56
57 static struct trace_array       max_tr;
58
59 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
60
61 static int                      tracer_enabled = 1;
62 static unsigned long            trace_nr_entries = 65536UL;
63
64 static struct tracer            *trace_types __read_mostly;
65 static struct tracer            *current_trace __read_mostly;
66 static int                      max_tracer_type_len;
67
68 static DEFINE_MUTEX(trace_types_lock);
69 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
70
71 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
72
73 void trace_wake_up(void)
74 {
75         /*
76          * The runqueue_is_locked() can fail, but this is the best we
77          * have for now:
78          */
79         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
80                 wake_up(&trace_wait);
81 }
82
83 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
84
85 static int __init set_nr_entries(char *str)
86 {
87         if (!str)
88                 return 0;
89         trace_nr_entries = simple_strtoul(str, &str, 0);
90         return 1;
91 }
92 __setup("trace_entries=", set_nr_entries);
93
94 unsigned long nsecs_to_usecs(unsigned long nsecs)
95 {
96         return nsecs / 1000;
97 }
98
99 enum trace_type {
100         __TRACE_FIRST_TYPE = 0,
101
102         TRACE_FN,
103         TRACE_CTX,
104         TRACE_WAKE,
105         TRACE_STACK,
106         TRACE_SPECIAL,
107
108         __TRACE_LAST_TYPE
109 };
110
111 enum trace_flag_type {
112         TRACE_FLAG_IRQS_OFF             = 0x01,
113         TRACE_FLAG_NEED_RESCHED         = 0x02,
114         TRACE_FLAG_HARDIRQ              = 0x04,
115         TRACE_FLAG_SOFTIRQ              = 0x08,
116 };
117
118 #define TRACE_ITER_SYM_MASK \
119         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
120
121 /* These must match the bit postions above */
122 static const char *trace_options[] = {
123         "print-parent",
124         "sym-offset",
125         "sym-addr",
126         "verbose",
127         "raw",
128         "hex",
129         "bin",
130         "block",
131         "stacktrace",
132         "sched-tree",
133         NULL
134 };
135
136 static DEFINE_SPINLOCK(ftrace_max_lock);
137
138 /*
139  * Copy the new maximum trace into the separate maximum-trace
140  * structure. (this way the maximum trace is permanently saved,
141  * for later retrieval via /debugfs/tracing/latency_trace)
142  */
143 static void
144 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
145 {
146         struct trace_array_cpu *data = tr->data[cpu];
147
148         max_tr.cpu = cpu;
149         max_tr.time_start = data->preempt_timestamp;
150
151         data = max_tr.data[cpu];
152         data->saved_latency = tracing_max_latency;
153
154         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
155         data->pid = tsk->pid;
156         data->uid = tsk->uid;
157         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
158         data->policy = tsk->policy;
159         data->rt_priority = tsk->rt_priority;
160
161         /* record this tasks comm */
162         tracing_record_cmdline(current);
163 }
164
165 void check_pages(struct trace_array_cpu *data)
166 {
167         struct page *page, *tmp;
168
169         BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
170         BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
171
172         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
173                 BUG_ON(page->lru.next->prev != &page->lru);
174                 BUG_ON(page->lru.prev->next != &page->lru);
175         }
176 }
177
178 void *head_page(struct trace_array_cpu *data)
179 {
180         struct page *page;
181
182         check_pages(data);
183         if (list_empty(&data->trace_pages))
184                 return NULL;
185
186         page = list_entry(data->trace_pages.next, struct page, lru);
187         BUG_ON(&page->lru == &data->trace_pages);
188
189         return page_address(page);
190 }
191
192 static int
193 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
194 {
195         int len = (PAGE_SIZE - 1) - s->len;
196         va_list ap;
197         int ret;
198
199         if (!len)
200                 return 0;
201
202         va_start(ap, fmt);
203         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
204         va_end(ap);
205
206         /* If we can't write it all, don't bother writing anything */
207         if (ret > len)
208                 return 0;
209
210         s->len += ret;
211
212         return len;
213 }
214
215 static int
216 trace_seq_puts(struct trace_seq *s, const char *str)
217 {
218         int len = strlen(str);
219
220         if (len > ((PAGE_SIZE - 1) - s->len))
221                 return 0;
222
223         memcpy(s->buffer + s->len, str, len);
224         s->len += len;
225
226         return len;
227 }
228
229 static int
230 trace_seq_putc(struct trace_seq *s, unsigned char c)
231 {
232         if (s->len >= (PAGE_SIZE - 1))
233                 return 0;
234
235         s->buffer[s->len++] = c;
236
237         return 1;
238 }
239
240 static int
241 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
242 {
243         if (len > ((PAGE_SIZE - 1) - s->len))
244                 return 0;
245
246         memcpy(s->buffer + s->len, mem, len);
247         s->len += len;
248
249         return len;
250 }
251
252 #define HEX_CHARS 17
253
254 static int
255 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
256 {
257         unsigned char hex[HEX_CHARS];
258         unsigned char *data;
259         unsigned char byte;
260         int i, j;
261
262         BUG_ON(len >= HEX_CHARS);
263
264         data = mem;
265
266 #ifdef __BIG_ENDIAN
267         for (i = 0, j = 0; i < len; i++) {
268 #else
269         for (i = len-1, j = 0; i >= 0; i--) {
270 #endif
271                 byte = data[i];
272
273                 hex[j]   = byte & 0x0f;
274                 if (hex[j] >= 10)
275                         hex[j] += 'a' - 10;
276                 else
277                         hex[j] += '0';
278                 j++;
279
280                 hex[j] = byte >> 4;
281                 if (hex[j] >= 10)
282                         hex[j] += 'a' - 10;
283                 else
284                         hex[j] += '0';
285                 j++;
286         }
287         hex[j] = ' ';
288         j++;
289
290         return trace_seq_putmem(s, hex, j);
291 }
292
293 static void
294 trace_seq_reset(struct trace_seq *s)
295 {
296         s->len = 0;
297 }
298
299 static void
300 trace_print_seq(struct seq_file *m, struct trace_seq *s)
301 {
302         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
303
304         s->buffer[len] = 0;
305         seq_puts(m, s->buffer);
306
307         trace_seq_reset(s);
308 }
309
310 static void
311 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
312 {
313         struct list_head flip_pages;
314
315         INIT_LIST_HEAD(&flip_pages);
316
317         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
318                 sizeof(struct trace_array_cpu) -
319                 offsetof(struct trace_array_cpu, trace_head_idx));
320
321         check_pages(tr1);
322         check_pages(tr2);
323         list_splice_init(&tr1->trace_pages, &flip_pages);
324         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
325         list_splice_init(&flip_pages, &tr2->trace_pages);
326         BUG_ON(!list_empty(&flip_pages));
327         check_pages(tr1);
328         check_pages(tr2);
329 }
330
331 void
332 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
333 {
334         struct trace_array_cpu *data;
335         int i;
336
337         WARN_ON_ONCE(!irqs_disabled());
338         spin_lock(&ftrace_max_lock);
339         /* clear out all the previous traces */
340         for_each_possible_cpu(i) {
341                 data = tr->data[i];
342                 flip_trace(max_tr.data[i], data);
343                 tracing_reset(data);
344         }
345
346         __update_max_tr(tr, tsk, cpu);
347         spin_unlock(&ftrace_max_lock);
348 }
349
350 /**
351  * update_max_tr_single - only copy one trace over, and reset the rest
352  * @tr - tracer
353  * @tsk - task with the latency
354  * @cpu - the cpu of the buffer to copy.
355  */
356 void
357 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
358 {
359         struct trace_array_cpu *data = tr->data[cpu];
360         int i;
361
362         WARN_ON_ONCE(!irqs_disabled());
363         spin_lock(&ftrace_max_lock);
364         for_each_possible_cpu(i)
365                 tracing_reset(max_tr.data[i]);
366
367         flip_trace(max_tr.data[cpu], data);
368         tracing_reset(data);
369
370         __update_max_tr(tr, tsk, cpu);
371         spin_unlock(&ftrace_max_lock);
372 }
373
374 int register_tracer(struct tracer *type)
375 {
376         struct tracer *t;
377         int len;
378         int ret = 0;
379
380         if (!type->name) {
381                 pr_info("Tracer must have a name\n");
382                 return -1;
383         }
384
385         mutex_lock(&trace_types_lock);
386         for (t = trace_types; t; t = t->next) {
387                 if (strcmp(type->name, t->name) == 0) {
388                         /* already found */
389                         pr_info("Trace %s already registered\n",
390                                 type->name);
391                         ret = -1;
392                         goto out;
393                 }
394         }
395
396 #ifdef CONFIG_FTRACE_STARTUP_TEST
397         if (type->selftest) {
398                 struct tracer *saved_tracer = current_trace;
399                 struct trace_array_cpu *data;
400                 struct trace_array *tr = &global_trace;
401                 int saved_ctrl = tr->ctrl;
402                 int i;
403                 /*
404                  * Run a selftest on this tracer.
405                  * Here we reset the trace buffer, and set the current
406                  * tracer to be this tracer. The tracer can then run some
407                  * internal tracing to verify that everything is in order.
408                  * If we fail, we do not register this tracer.
409                  */
410                 for_each_possible_cpu(i) {
411                         data = tr->data[i];
412                         if (!head_page(data))
413                                 continue;
414                         tracing_reset(data);
415                 }
416                 current_trace = type;
417                 tr->ctrl = 0;
418                 /* the test is responsible for initializing and enabling */
419                 pr_info("Testing tracer %s: ", type->name);
420                 ret = type->selftest(type, tr);
421                 /* the test is responsible for resetting too */
422                 current_trace = saved_tracer;
423                 tr->ctrl = saved_ctrl;
424                 if (ret) {
425                         printk(KERN_CONT "FAILED!\n");
426                         goto out;
427                 }
428                 /* Only reset on passing, to avoid touching corrupted buffers */
429                 for_each_possible_cpu(i) {
430                         data = tr->data[i];
431                         if (!head_page(data))
432                                 continue;
433                         tracing_reset(data);
434                 }
435                 printk(KERN_CONT "PASSED\n");
436         }
437 #endif
438
439         type->next = trace_types;
440         trace_types = type;
441         len = strlen(type->name);
442         if (len > max_tracer_type_len)
443                 max_tracer_type_len = len;
444
445  out:
446         mutex_unlock(&trace_types_lock);
447
448         return ret;
449 }
450
451 void unregister_tracer(struct tracer *type)
452 {
453         struct tracer **t;
454         int len;
455
456         mutex_lock(&trace_types_lock);
457         for (t = &trace_types; *t; t = &(*t)->next) {
458                 if (*t == type)
459                         goto found;
460         }
461         pr_info("Trace %s not registered\n", type->name);
462         goto out;
463
464  found:
465         *t = (*t)->next;
466         if (strlen(type->name) != max_tracer_type_len)
467                 goto out;
468
469         max_tracer_type_len = 0;
470         for (t = &trace_types; *t; t = &(*t)->next) {
471                 len = strlen((*t)->name);
472                 if (len > max_tracer_type_len)
473                         max_tracer_type_len = len;
474         }
475  out:
476         mutex_unlock(&trace_types_lock);
477 }
478
479 void tracing_reset(struct trace_array_cpu *data)
480 {
481         data->trace_idx = 0;
482         data->trace_head = data->trace_tail = head_page(data);
483         data->trace_head_idx = 0;
484         data->trace_tail_idx = 0;
485 }
486
487 #define SAVED_CMDLINES 128
488 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
489 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
490 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
491 static int cmdline_idx;
492 static DEFINE_SPINLOCK(trace_cmdline_lock);
493 atomic_t trace_record_cmdline_disabled;
494
495 static void trace_init_cmdlines(void)
496 {
497         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
498         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
499         cmdline_idx = 0;
500 }
501
502 void trace_stop_cmdline_recording(void);
503
504 static void trace_save_cmdline(struct task_struct *tsk)
505 {
506         unsigned map;
507         unsigned idx;
508
509         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
510                 return;
511
512         /*
513          * It's not the end of the world if we don't get
514          * the lock, but we also don't want to spin
515          * nor do we want to disable interrupts,
516          * so if we miss here, then better luck next time.
517          */
518         if (!spin_trylock(&trace_cmdline_lock))
519                 return;
520
521         idx = map_pid_to_cmdline[tsk->pid];
522         if (idx >= SAVED_CMDLINES) {
523                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
524
525                 map = map_cmdline_to_pid[idx];
526                 if (map <= PID_MAX_DEFAULT)
527                         map_pid_to_cmdline[map] = (unsigned)-1;
528
529                 map_pid_to_cmdline[tsk->pid] = idx;
530
531                 cmdline_idx = idx;
532         }
533
534         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
535
536         spin_unlock(&trace_cmdline_lock);
537 }
538
539 static char *trace_find_cmdline(int pid)
540 {
541         char *cmdline = "<...>";
542         unsigned map;
543
544         if (!pid)
545                 return "<idle>";
546
547         if (pid > PID_MAX_DEFAULT)
548                 goto out;
549
550         map = map_pid_to_cmdline[pid];
551         if (map >= SAVED_CMDLINES)
552                 goto out;
553
554         cmdline = saved_cmdlines[map];
555
556  out:
557         return cmdline;
558 }
559
560 void tracing_record_cmdline(struct task_struct *tsk)
561 {
562         if (atomic_read(&trace_record_cmdline_disabled))
563                 return;
564
565         trace_save_cmdline(tsk);
566 }
567
568 static inline struct list_head *
569 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
570 {
571         /*
572          * Roundrobin - but skip the head (which is not a real page):
573          */
574         next = next->next;
575         if (unlikely(next == &data->trace_pages))
576                 next = next->next;
577         BUG_ON(next == &data->trace_pages);
578
579         return next;
580 }
581
582 static inline void *
583 trace_next_page(struct trace_array_cpu *data, void *addr)
584 {
585         struct list_head *next;
586         struct page *page;
587
588         page = virt_to_page(addr);
589
590         next = trace_next_list(data, &page->lru);
591         page = list_entry(next, struct page, lru);
592
593         return page_address(page);
594 }
595
596 static inline struct trace_entry *
597 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
598 {
599         unsigned long idx, idx_next;
600         struct trace_entry *entry;
601
602         data->trace_idx++;
603         idx = data->trace_head_idx;
604         idx_next = idx + 1;
605
606         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
607
608         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
609
610         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
611                 data->trace_head = trace_next_page(data, data->trace_head);
612                 idx_next = 0;
613         }
614
615         if (data->trace_head == data->trace_tail &&
616             idx_next == data->trace_tail_idx) {
617                 /* overrun */
618                 data->trace_tail_idx++;
619                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
620                         data->trace_tail =
621                                 trace_next_page(data, data->trace_tail);
622                         data->trace_tail_idx = 0;
623                 }
624         }
625
626         data->trace_head_idx = idx_next;
627
628         return entry;
629 }
630
631 static inline void
632 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
633 {
634         struct task_struct *tsk = current;
635         unsigned long pc;
636
637         pc = preempt_count();
638
639         entry->preempt_count    = pc & 0xff;
640         entry->pid              = tsk->pid;
641         entry->t                = ftrace_now(raw_smp_processor_id());
642         entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
643                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
644                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
645                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
646 }
647
648 void
649 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
650                unsigned long ip, unsigned long parent_ip, unsigned long flags)
651 {
652         struct trace_entry *entry;
653         unsigned long irq_flags;
654
655         spin_lock_irqsave(&data->lock, irq_flags);
656         entry                   = tracing_get_trace_entry(tr, data);
657         tracing_generic_entry_update(entry, flags);
658         entry->type             = TRACE_FN;
659         entry->fn.ip            = ip;
660         entry->fn.parent_ip     = parent_ip;
661         spin_unlock_irqrestore(&data->lock, irq_flags);
662
663         trace_wake_up();
664 }
665
666 void
667 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
668        unsigned long ip, unsigned long parent_ip, unsigned long flags)
669 {
670         if (likely(!atomic_read(&data->disabled)))
671                 trace_function(tr, data, ip, parent_ip, flags);
672 }
673
674 void
675 __trace_special(void *__tr, void *__data,
676                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
677 {
678         struct trace_array_cpu *data = __data;
679         struct trace_array *tr = __tr;
680         struct trace_entry *entry;
681         unsigned long irq_flags;
682
683         spin_lock_irqsave(&data->lock, irq_flags);
684         entry                   = tracing_get_trace_entry(tr, data);
685         tracing_generic_entry_update(entry, 0);
686         entry->type             = TRACE_SPECIAL;
687         entry->special.arg1     = arg1;
688         entry->special.arg2     = arg2;
689         entry->special.arg3     = arg3;
690         spin_unlock_irqrestore(&data->lock, irq_flags);
691
692         trace_wake_up();
693 }
694
695 void __trace_stack(struct trace_array *tr,
696                    struct trace_array_cpu *data,
697                    unsigned long flags,
698                    int skip)
699 {
700         struct trace_entry *entry;
701         struct stack_trace trace;
702
703         if (!(trace_flags & TRACE_ITER_STACKTRACE))
704                 return;
705
706         entry                   = tracing_get_trace_entry(tr, data);
707         tracing_generic_entry_update(entry, flags);
708         entry->type             = TRACE_STACK;
709
710         memset(&entry->stack, 0, sizeof(entry->stack));
711
712         trace.nr_entries        = 0;
713         trace.max_entries       = FTRACE_STACK_ENTRIES;
714         trace.skip              = skip;
715         trace.entries           = entry->stack.caller;
716
717         save_stack_trace(&trace);
718 }
719
720 void
721 tracing_sched_switch_trace(struct trace_array *tr,
722                            struct trace_array_cpu *data,
723                            struct task_struct *prev,
724                            struct task_struct *next,
725                            unsigned long flags)
726 {
727         struct trace_entry *entry;
728         unsigned long irq_flags;
729
730         spin_lock_irqsave(&data->lock, irq_flags);
731         entry                   = tracing_get_trace_entry(tr, data);
732         tracing_generic_entry_update(entry, flags);
733         entry->type             = TRACE_CTX;
734         entry->ctx.prev_pid     = prev->pid;
735         entry->ctx.prev_prio    = prev->prio;
736         entry->ctx.prev_state   = prev->state;
737         entry->ctx.next_pid     = next->pid;
738         entry->ctx.next_prio    = next->prio;
739         entry->ctx.next_state   = next->state;
740         __trace_stack(tr, data, flags, 4);
741         spin_unlock_irqrestore(&data->lock, irq_flags);
742 }
743
744 void
745 tracing_sched_wakeup_trace(struct trace_array *tr,
746                            struct trace_array_cpu *data,
747                            struct task_struct *wakee,
748                            struct task_struct *curr,
749                            unsigned long flags)
750 {
751         struct trace_entry *entry;
752         unsigned long irq_flags;
753
754         spin_lock_irqsave(&data->lock, irq_flags);
755         entry                   = tracing_get_trace_entry(tr, data);
756         tracing_generic_entry_update(entry, flags);
757         entry->type             = TRACE_WAKE;
758         entry->ctx.prev_pid     = curr->pid;
759         entry->ctx.prev_prio    = curr->prio;
760         entry->ctx.prev_state   = curr->state;
761         entry->ctx.next_pid     = wakee->pid;
762         entry->ctx.next_prio    = wakee->prio;
763         entry->ctx.next_state   = wakee->state;
764         __trace_stack(tr, data, flags, 5);
765         spin_unlock_irqrestore(&data->lock, irq_flags);
766
767         trace_wake_up();
768 }
769
770 #ifdef CONFIG_FTRACE
771 static void
772 function_trace_call(unsigned long ip, unsigned long parent_ip)
773 {
774         struct trace_array *tr = &global_trace;
775         struct trace_array_cpu *data;
776         unsigned long flags;
777         long disabled;
778         int cpu;
779
780         if (unlikely(!tracer_enabled))
781                 return;
782
783         local_irq_save(flags);
784         cpu = raw_smp_processor_id();
785         data = tr->data[cpu];
786         disabled = atomic_inc_return(&data->disabled);
787
788         if (likely(disabled == 1))
789                 trace_function(tr, data, ip, parent_ip, flags);
790
791         atomic_dec(&data->disabled);
792         local_irq_restore(flags);
793 }
794
795 static struct ftrace_ops trace_ops __read_mostly =
796 {
797         .func = function_trace_call,
798 };
799
800 void tracing_start_function_trace(void)
801 {
802         register_ftrace_function(&trace_ops);
803 }
804
805 void tracing_stop_function_trace(void)
806 {
807         unregister_ftrace_function(&trace_ops);
808 }
809 #endif
810
811 enum trace_file_type {
812         TRACE_FILE_LAT_FMT      = 1,
813 };
814
815 static struct trace_entry *
816 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
817                 struct trace_iterator *iter, int cpu)
818 {
819         struct page *page;
820         struct trace_entry *array;
821
822         if (iter->next_idx[cpu] >= tr->entries ||
823             iter->next_idx[cpu] >= data->trace_idx ||
824             (data->trace_head == data->trace_tail &&
825              data->trace_head_idx == data->trace_tail_idx))
826                 return NULL;
827
828         if (!iter->next_page[cpu]) {
829                 /* Initialize the iterator for this cpu trace buffer */
830                 WARN_ON(!data->trace_tail);
831                 page = virt_to_page(data->trace_tail);
832                 iter->next_page[cpu] = &page->lru;
833                 iter->next_page_idx[cpu] = data->trace_tail_idx;
834         }
835
836         page = list_entry(iter->next_page[cpu], struct page, lru);
837         BUG_ON(&data->trace_pages == &page->lru);
838
839         array = page_address(page);
840
841         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
842         return &array[iter->next_page_idx[cpu]];
843 }
844
845 static struct trace_entry *
846 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
847 {
848         struct trace_array *tr = iter->tr;
849         struct trace_entry *ent, *next = NULL;
850         int next_cpu = -1;
851         int cpu;
852
853         for_each_possible_cpu(cpu) {
854                 if (!head_page(tr->data[cpu]))
855                         continue;
856                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
857                 /*
858                  * Pick the entry with the smallest timestamp:
859                  */
860                 if (ent && (!next || ent->t < next->t)) {
861                         next = ent;
862                         next_cpu = cpu;
863                 }
864         }
865
866         if (ent_cpu)
867                 *ent_cpu = next_cpu;
868
869         return next;
870 }
871
872 static void trace_iterator_increment(struct trace_iterator *iter)
873 {
874         iter->idx++;
875         iter->next_idx[iter->cpu]++;
876         iter->next_page_idx[iter->cpu]++;
877
878         if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
879                 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
880
881                 iter->next_page_idx[iter->cpu] = 0;
882                 iter->next_page[iter->cpu] =
883                         trace_next_list(data, iter->next_page[iter->cpu]);
884         }
885 }
886
887 static void trace_consume(struct trace_iterator *iter)
888 {
889         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
890
891         data->trace_tail_idx++;
892         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
893                 data->trace_tail = trace_next_page(data, data->trace_tail);
894                 data->trace_tail_idx = 0;
895         }
896
897         /* Check if we empty it, then reset the index */
898         if (data->trace_head == data->trace_tail &&
899             data->trace_head_idx == data->trace_tail_idx)
900                 data->trace_idx = 0;
901 }
902
903 static void *find_next_entry_inc(struct trace_iterator *iter)
904 {
905         struct trace_entry *next;
906         int next_cpu = -1;
907
908         next = find_next_entry(iter, &next_cpu);
909
910         iter->prev_ent = iter->ent;
911         iter->prev_cpu = iter->cpu;
912
913         iter->ent = next;
914         iter->cpu = next_cpu;
915
916         if (next)
917                 trace_iterator_increment(iter);
918
919         return next ? iter : NULL;
920 }
921
922 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
923 {
924         struct trace_iterator *iter = m->private;
925         void *last_ent = iter->ent;
926         int i = (int)*pos;
927         void *ent;
928
929         (*pos)++;
930
931         /* can't go backwards */
932         if (iter->idx > i)
933                 return NULL;
934
935         if (iter->idx < 0)
936                 ent = find_next_entry_inc(iter);
937         else
938                 ent = iter;
939
940         while (ent && iter->idx < i)
941                 ent = find_next_entry_inc(iter);
942
943         iter->pos = *pos;
944
945         if (last_ent && !ent)
946                 seq_puts(m, "\n\nvim:ft=help\n");
947
948         return ent;
949 }
950
951 static void *s_start(struct seq_file *m, loff_t *pos)
952 {
953         struct trace_iterator *iter = m->private;
954         void *p = NULL;
955         loff_t l = 0;
956         int i;
957
958         mutex_lock(&trace_types_lock);
959
960         if (!current_trace || current_trace != iter->trace)
961                 return NULL;
962
963         atomic_inc(&trace_record_cmdline_disabled);
964
965         /* let the tracer grab locks here if needed */
966         if (current_trace->start)
967                 current_trace->start(iter);
968
969         if (*pos != iter->pos) {
970                 iter->ent = NULL;
971                 iter->cpu = 0;
972                 iter->idx = -1;
973                 iter->prev_ent = NULL;
974                 iter->prev_cpu = -1;
975
976                 for_each_possible_cpu(i) {
977                         iter->next_idx[i] = 0;
978                         iter->next_page[i] = NULL;
979                 }
980
981                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
982                         ;
983
984         } else {
985                 l = *pos - 1;
986                 p = s_next(m, p, &l);
987         }
988
989         return p;
990 }
991
992 static void s_stop(struct seq_file *m, void *p)
993 {
994         struct trace_iterator *iter = m->private;
995
996         atomic_dec(&trace_record_cmdline_disabled);
997
998         /* let the tracer release locks here if needed */
999         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1000                 iter->trace->stop(iter);
1001
1002         mutex_unlock(&trace_types_lock);
1003 }
1004
1005 static int
1006 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1007 {
1008 #ifdef CONFIG_KALLSYMS
1009         char str[KSYM_SYMBOL_LEN];
1010
1011         kallsyms_lookup(address, NULL, NULL, NULL, str);
1012
1013         return trace_seq_printf(s, fmt, str);
1014 #endif
1015         return 1;
1016 }
1017
1018 static int
1019 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1020                      unsigned long address)
1021 {
1022 #ifdef CONFIG_KALLSYMS
1023         char str[KSYM_SYMBOL_LEN];
1024
1025         sprint_symbol(str, address);
1026         return trace_seq_printf(s, fmt, str);
1027 #endif
1028         return 1;
1029 }
1030
1031 #ifndef CONFIG_64BIT
1032 # define IP_FMT "%08lx"
1033 #else
1034 # define IP_FMT "%016lx"
1035 #endif
1036
1037 static int
1038 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1039 {
1040         int ret;
1041
1042         if (!ip)
1043                 return trace_seq_printf(s, "0");
1044
1045         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1046                 ret = seq_print_sym_offset(s, "%s", ip);
1047         else
1048                 ret = seq_print_sym_short(s, "%s", ip);
1049
1050         if (!ret)
1051                 return 0;
1052
1053         if (sym_flags & TRACE_ITER_SYM_ADDR)
1054                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1055         return ret;
1056 }
1057
1058 static void print_lat_help_header(struct seq_file *m)
1059 {
1060         seq_puts(m, "#                _------=> CPU#            \n");
1061         seq_puts(m, "#               / _-----=> irqs-off        \n");
1062         seq_puts(m, "#              | / _----=> need-resched    \n");
1063         seq_puts(m, "#              || / _---=> hardirq/softirq \n");
1064         seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
1065         seq_puts(m, "#              |||| /                      \n");
1066         seq_puts(m, "#              |||||     delay             \n");
1067         seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
1068         seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
1069 }
1070
1071 static void print_func_help_header(struct seq_file *m)
1072 {
1073         seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
1074         seq_puts(m, "#              | |      |          |         |\n");
1075 }
1076
1077
1078 static void
1079 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1080 {
1081         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1082         struct trace_array *tr = iter->tr;
1083         struct trace_array_cpu *data = tr->data[tr->cpu];
1084         struct tracer *type = current_trace;
1085         unsigned long total   = 0;
1086         unsigned long entries = 0;
1087         int cpu;
1088         const char *name = "preemption";
1089
1090         if (type)
1091                 name = type->name;
1092
1093         for_each_possible_cpu(cpu) {
1094                 if (head_page(tr->data[cpu])) {
1095                         total += tr->data[cpu]->trace_idx;
1096                         if (tr->data[cpu]->trace_idx > tr->entries)
1097                                 entries += tr->entries;
1098                         else
1099                                 entries += tr->data[cpu]->trace_idx;
1100                 }
1101         }
1102
1103         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1104                    name, UTS_RELEASE);
1105         seq_puts(m, "-----------------------------------"
1106                  "---------------------------------\n");
1107         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1108                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1109                    nsecs_to_usecs(data->saved_latency),
1110                    entries,
1111                    total,
1112                    tr->cpu,
1113 #if defined(CONFIG_PREEMPT_NONE)
1114                    "server",
1115 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1116                    "desktop",
1117 #elif defined(CONFIG_PREEMPT_DESKTOP)
1118                    "preempt",
1119 #else
1120                    "unknown",
1121 #endif
1122                    /* These are reserved for later use */
1123                    0, 0, 0, 0);
1124 #ifdef CONFIG_SMP
1125         seq_printf(m, " #P:%d)\n", num_online_cpus());
1126 #else
1127         seq_puts(m, ")\n");
1128 #endif
1129         seq_puts(m, "    -----------------\n");
1130         seq_printf(m, "    | task: %.16s-%d "
1131                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1132                    data->comm, data->pid, data->uid, data->nice,
1133                    data->policy, data->rt_priority);
1134         seq_puts(m, "    -----------------\n");
1135
1136         if (data->critical_start) {
1137                 seq_puts(m, " => started at: ");
1138                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1139                 trace_print_seq(m, &iter->seq);
1140                 seq_puts(m, "\n => ended at:   ");
1141                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1142                 trace_print_seq(m, &iter->seq);
1143                 seq_puts(m, "\n");
1144         }
1145
1146         seq_puts(m, "\n");
1147 }
1148
1149 static void
1150 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1151 {
1152         int hardirq, softirq;
1153         char *comm;
1154
1155         comm = trace_find_cmdline(entry->pid);
1156
1157         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1158         trace_seq_printf(s, "%d", cpu);
1159         trace_seq_printf(s, "%c%c",
1160                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1161                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1162
1163         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1164         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1165         if (hardirq && softirq)
1166                 trace_seq_putc(s, 'H');
1167         else {
1168                 if (hardirq)
1169                         trace_seq_putc(s, 'h');
1170                 else {
1171                         if (softirq)
1172                                 trace_seq_putc(s, 's');
1173                         else
1174                                 trace_seq_putc(s, '.');
1175                 }
1176         }
1177
1178         if (entry->preempt_count)
1179                 trace_seq_printf(s, "%x", entry->preempt_count);
1180         else
1181                 trace_seq_puts(s, ".");
1182 }
1183
1184 unsigned long preempt_mark_thresh = 100;
1185
1186 static void
1187 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1188                     unsigned long rel_usecs)
1189 {
1190         trace_seq_printf(s, " %4lldus", abs_usecs);
1191         if (rel_usecs > preempt_mark_thresh)
1192                 trace_seq_puts(s, "!: ");
1193         else if (rel_usecs > 1)
1194                 trace_seq_puts(s, "+: ");
1195         else
1196                 trace_seq_puts(s, " : ");
1197 }
1198
1199 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1200
1201 static int
1202 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1203 {
1204         struct trace_seq *s = &iter->seq;
1205         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1206         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1207         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1208         struct trace_entry *entry = iter->ent;
1209         unsigned long abs_usecs;
1210         unsigned long rel_usecs;
1211         char *comm;
1212         int S, T;
1213         int i;
1214
1215         if (!next_entry)
1216                 next_entry = entry;
1217         rel_usecs = ns2usecs(next_entry->t - entry->t);
1218         abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1219
1220         if (verbose) {
1221                 comm = trace_find_cmdline(entry->pid);
1222                 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1223                                  " %ld.%03ldms (+%ld.%03ldms): ",
1224                                  comm,
1225                                  entry->pid, cpu, entry->flags,
1226                                  entry->preempt_count, trace_idx,
1227                                  ns2usecs(entry->t),
1228                                  abs_usecs/1000,
1229                                  abs_usecs % 1000, rel_usecs/1000,
1230                                  rel_usecs % 1000);
1231         } else {
1232                 lat_print_generic(s, entry, cpu);
1233                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1234         }
1235         switch (entry->type) {
1236         case TRACE_FN:
1237                 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1238                 trace_seq_puts(s, " (");
1239                 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1240                 trace_seq_puts(s, ")\n");
1241                 break;
1242         case TRACE_CTX:
1243         case TRACE_WAKE:
1244                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1245                         state_to_char[entry->ctx.prev_state] : 'X';
1246                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1247                         state_to_char[entry->ctx.next_state] : 'X';
1248
1249                 comm = trace_find_cmdline(entry->ctx.next_pid);
1250                 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1251                                  entry->ctx.prev_pid,
1252                                  entry->ctx.prev_prio,
1253                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1254                                  entry->ctx.next_pid,
1255                                  entry->ctx.next_prio,
1256                                  T, comm);
1257                 break;
1258         case TRACE_SPECIAL:
1259                 trace_seq_printf(s, "# %ld %ld %ld\n",
1260                                  entry->special.arg1,
1261                                  entry->special.arg2,
1262                                  entry->special.arg3);
1263                 break;
1264         case TRACE_STACK:
1265                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1266                         if (i)
1267                                 trace_seq_puts(s, " <= ");
1268                         seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1269                 }
1270                 trace_seq_puts(s, "\n");
1271                 break;
1272         default:
1273                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1274         }
1275         return 1;
1276 }
1277
1278 static int print_trace_fmt(struct trace_iterator *iter)
1279 {
1280         struct trace_seq *s = &iter->seq;
1281         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1282         struct trace_entry *entry;
1283         unsigned long usec_rem;
1284         unsigned long long t;
1285         unsigned long secs;
1286         char *comm;
1287         int ret;
1288         int S, T;
1289         int i;
1290
1291         entry = iter->ent;
1292
1293         comm = trace_find_cmdline(iter->ent->pid);
1294
1295         t = ns2usecs(entry->t);
1296         usec_rem = do_div(t, 1000000ULL);
1297         secs = (unsigned long)t;
1298
1299         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1300         if (!ret)
1301                 return 0;
1302         ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1303         if (!ret)
1304                 return 0;
1305         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1306         if (!ret)
1307                 return 0;
1308
1309         switch (entry->type) {
1310         case TRACE_FN:
1311                 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1312                 if (!ret)
1313                         return 0;
1314                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1315                                                 entry->fn.parent_ip) {
1316                         ret = trace_seq_printf(s, " <-");
1317                         if (!ret)
1318                                 return 0;
1319                         ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1320                                                sym_flags);
1321                         if (!ret)
1322                                 return 0;
1323                 }
1324                 ret = trace_seq_printf(s, "\n");
1325                 if (!ret)
1326                         return 0;
1327                 break;
1328         case TRACE_CTX:
1329         case TRACE_WAKE:
1330                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1331                         state_to_char[entry->ctx.prev_state] : 'X';
1332                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1333                         state_to_char[entry->ctx.next_state] : 'X';
1334                 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1335                                        entry->ctx.prev_pid,
1336                                        entry->ctx.prev_prio,
1337                                        S,
1338                                        entry->type == TRACE_CTX ? "==>" : "  +",
1339                                        entry->ctx.next_pid,
1340                                        entry->ctx.next_prio,
1341                                        T);
1342                 if (!ret)
1343                         return 0;
1344                 break;
1345         case TRACE_SPECIAL:
1346                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1347                                  entry->special.arg1,
1348                                  entry->special.arg2,
1349                                  entry->special.arg3);
1350                 if (!ret)
1351                         return 0;
1352                 break;
1353         case TRACE_STACK:
1354                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1355                         if (i) {
1356                                 ret = trace_seq_puts(s, " <= ");
1357                                 if (!ret)
1358                                         return 0;
1359                         }
1360                         ret = seq_print_ip_sym(s, entry->stack.caller[i],
1361                                                sym_flags);
1362                         if (!ret)
1363                                 return 0;
1364                 }
1365                 ret = trace_seq_puts(s, "\n");
1366                 if (!ret)
1367                         return 0;
1368                 break;
1369         }
1370         return 1;
1371 }
1372
1373 static int print_raw_fmt(struct trace_iterator *iter)
1374 {
1375         struct trace_seq *s = &iter->seq;
1376         struct trace_entry *entry;
1377         int ret;
1378         int S, T;
1379
1380         entry = iter->ent;
1381
1382         ret = trace_seq_printf(s, "%d %d %llu ",
1383                 entry->pid, iter->cpu, entry->t);
1384         if (!ret)
1385                 return 0;
1386
1387         switch (entry->type) {
1388         case TRACE_FN:
1389                 ret = trace_seq_printf(s, "%x %x\n",
1390                                         entry->fn.ip, entry->fn.parent_ip);
1391                 if (!ret)
1392                         return 0;
1393                 break;
1394         case TRACE_CTX:
1395         case TRACE_WAKE:
1396                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1397                         state_to_char[entry->ctx.prev_state] : 'X';
1398                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1399                         state_to_char[entry->ctx.next_state] : 'X';
1400                 if (entry->type == TRACE_WAKE)
1401                         S = '+';
1402                 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1403                                        entry->ctx.prev_pid,
1404                                        entry->ctx.prev_prio,
1405                                        S,
1406                                        entry->ctx.next_pid,
1407                                        entry->ctx.next_prio,
1408                                        T);
1409                 if (!ret)
1410                         return 0;
1411                 break;
1412         case TRACE_SPECIAL:
1413         case TRACE_STACK:
1414                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1415                                  entry->special.arg1,
1416                                  entry->special.arg2,
1417                                  entry->special.arg3);
1418                 if (!ret)
1419                         return 0;
1420                 break;
1421         }
1422         return 1;
1423 }
1424
1425 #define SEQ_PUT_FIELD_RET(s, x)                         \
1426 do {                                                    \
1427         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1428                 return 0;                               \
1429 } while (0)
1430
1431 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1432 do {                                                    \
1433         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1434                 return 0;                               \
1435 } while (0)
1436
1437 static int print_hex_fmt(struct trace_iterator *iter)
1438 {
1439         struct trace_seq *s = &iter->seq;
1440         unsigned char newline = '\n';
1441         struct trace_entry *entry;
1442         int S, T;
1443
1444         entry = iter->ent;
1445
1446         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1447         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1448         SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1449
1450         switch (entry->type) {
1451         case TRACE_FN:
1452                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1453                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1454                 break;
1455         case TRACE_CTX:
1456         case TRACE_WAKE:
1457                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1458                         state_to_char[entry->ctx.prev_state] : 'X';
1459                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1460                         state_to_char[entry->ctx.next_state] : 'X';
1461                 if (entry->type == TRACE_WAKE)
1462                         S = '+';
1463                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1464                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1465                 SEQ_PUT_HEX_FIELD_RET(s, S);
1466                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1467                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1468                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1469                 SEQ_PUT_HEX_FIELD_RET(s, T);
1470                 break;
1471         case TRACE_SPECIAL:
1472         case TRACE_STACK:
1473                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1474                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1475                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1476                 break;
1477         }
1478         SEQ_PUT_FIELD_RET(s, newline);
1479
1480         return 1;
1481 }
1482
1483 static int print_bin_fmt(struct trace_iterator *iter)
1484 {
1485         struct trace_seq *s = &iter->seq;
1486         struct trace_entry *entry;
1487
1488         entry = iter->ent;
1489
1490         SEQ_PUT_FIELD_RET(s, entry->pid);
1491         SEQ_PUT_FIELD_RET(s, entry->cpu);
1492         SEQ_PUT_FIELD_RET(s, entry->t);
1493
1494         switch (entry->type) {
1495         case TRACE_FN:
1496                 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1497                 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1498                 break;
1499         case TRACE_CTX:
1500                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1501                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1502                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1503                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1504                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1505                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1506                 break;
1507         case TRACE_SPECIAL:
1508         case TRACE_STACK:
1509                 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1510                 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1511                 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1512                 break;
1513         }
1514         return 1;
1515 }
1516
1517 static int trace_empty(struct trace_iterator *iter)
1518 {
1519         struct trace_array_cpu *data;
1520         int cpu;
1521
1522         for_each_possible_cpu(cpu) {
1523                 data = iter->tr->data[cpu];
1524
1525                 if (head_page(data) && data->trace_idx &&
1526                     (data->trace_tail != data->trace_head ||
1527                      data->trace_tail_idx != data->trace_head_idx))
1528                         return 0;
1529         }
1530         return 1;
1531 }
1532
1533 static int print_trace_line(struct trace_iterator *iter)
1534 {
1535         if (trace_flags & TRACE_ITER_BIN)
1536                 return print_bin_fmt(iter);
1537
1538         if (trace_flags & TRACE_ITER_HEX)
1539                 return print_hex_fmt(iter);
1540
1541         if (trace_flags & TRACE_ITER_RAW)
1542                 return print_raw_fmt(iter);
1543
1544         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1545                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1546
1547         return print_trace_fmt(iter);
1548 }
1549
1550 static int s_show(struct seq_file *m, void *v)
1551 {
1552         struct trace_iterator *iter = v;
1553
1554         if (iter->ent == NULL) {
1555                 if (iter->tr) {
1556                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1557                         seq_puts(m, "#\n");
1558                 }
1559                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1560                         /* print nothing if the buffers are empty */
1561                         if (trace_empty(iter))
1562                                 return 0;
1563                         print_trace_header(m, iter);
1564                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1565                                 print_lat_help_header(m);
1566                 } else {
1567                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1568                                 print_func_help_header(m);
1569                 }
1570         } else {
1571                 print_trace_line(iter);
1572                 trace_print_seq(m, &iter->seq);
1573         }
1574
1575         return 0;
1576 }
1577
1578 static struct seq_operations tracer_seq_ops = {
1579         .start          = s_start,
1580         .next           = s_next,
1581         .stop           = s_stop,
1582         .show           = s_show,
1583 };
1584
1585 static struct trace_iterator *
1586 __tracing_open(struct inode *inode, struct file *file, int *ret)
1587 {
1588         struct trace_iterator *iter;
1589
1590         if (tracing_disabled) {
1591                 *ret = -ENODEV;
1592                 return NULL;
1593         }
1594
1595         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1596         if (!iter) {
1597                 *ret = -ENOMEM;
1598                 goto out;
1599         }
1600
1601         mutex_lock(&trace_types_lock);
1602         if (current_trace && current_trace->print_max)
1603                 iter->tr = &max_tr;
1604         else
1605                 iter->tr = inode->i_private;
1606         iter->trace = current_trace;
1607         iter->pos = -1;
1608
1609         /* TODO stop tracer */
1610         *ret = seq_open(file, &tracer_seq_ops);
1611         if (!*ret) {
1612                 struct seq_file *m = file->private_data;
1613                 m->private = iter;
1614
1615                 /* stop the trace while dumping */
1616                 if (iter->tr->ctrl)
1617                         tracer_enabled = 0;
1618
1619                 if (iter->trace && iter->trace->open)
1620                         iter->trace->open(iter);
1621         } else {
1622                 kfree(iter);
1623                 iter = NULL;
1624         }
1625         mutex_unlock(&trace_types_lock);
1626
1627  out:
1628         return iter;
1629 }
1630
1631 int tracing_open_generic(struct inode *inode, struct file *filp)
1632 {
1633         if (tracing_disabled)
1634                 return -ENODEV;
1635
1636         filp->private_data = inode->i_private;
1637         return 0;
1638 }
1639
1640 int tracing_release(struct inode *inode, struct file *file)
1641 {
1642         struct seq_file *m = (struct seq_file *)file->private_data;
1643         struct trace_iterator *iter = m->private;
1644
1645         mutex_lock(&trace_types_lock);
1646         if (iter->trace && iter->trace->close)
1647                 iter->trace->close(iter);
1648
1649         /* reenable tracing if it was previously enabled */
1650         if (iter->tr->ctrl)
1651                 tracer_enabled = 1;
1652         mutex_unlock(&trace_types_lock);
1653
1654         seq_release(inode, file);
1655         kfree(iter);
1656         return 0;
1657 }
1658
1659 static int tracing_open(struct inode *inode, struct file *file)
1660 {
1661         int ret;
1662
1663         __tracing_open(inode, file, &ret);
1664
1665         return ret;
1666 }
1667
1668 static int tracing_lt_open(struct inode *inode, struct file *file)
1669 {
1670         struct trace_iterator *iter;
1671         int ret;
1672
1673         iter = __tracing_open(inode, file, &ret);
1674
1675         if (!ret)
1676                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1677
1678         return ret;
1679 }
1680
1681
1682 static void *
1683 t_next(struct seq_file *m, void *v, loff_t *pos)
1684 {
1685         struct tracer *t = m->private;
1686
1687         (*pos)++;
1688
1689         if (t)
1690                 t = t->next;
1691
1692         m->private = t;
1693
1694         return t;
1695 }
1696
1697 static void *t_start(struct seq_file *m, loff_t *pos)
1698 {
1699         struct tracer *t = m->private;
1700         loff_t l = 0;
1701
1702         mutex_lock(&trace_types_lock);
1703         for (; t && l < *pos; t = t_next(m, t, &l))
1704                 ;
1705
1706         return t;
1707 }
1708
1709 static void t_stop(struct seq_file *m, void *p)
1710 {
1711         mutex_unlock(&trace_types_lock);
1712 }
1713
1714 static int t_show(struct seq_file *m, void *v)
1715 {
1716         struct tracer *t = v;
1717
1718         if (!t)
1719                 return 0;
1720
1721         seq_printf(m, "%s", t->name);
1722         if (t->next)
1723                 seq_putc(m, ' ');
1724         else
1725                 seq_putc(m, '\n');
1726
1727         return 0;
1728 }
1729
1730 static struct seq_operations show_traces_seq_ops = {
1731         .start          = t_start,
1732         .next           = t_next,
1733         .stop           = t_stop,
1734         .show           = t_show,
1735 };
1736
1737 static int show_traces_open(struct inode *inode, struct file *file)
1738 {
1739         int ret;
1740
1741         if (tracing_disabled)
1742                 return -ENODEV;
1743
1744         ret = seq_open(file, &show_traces_seq_ops);
1745         if (!ret) {
1746                 struct seq_file *m = file->private_data;
1747                 m->private = trace_types;
1748         }
1749
1750         return ret;
1751 }
1752
1753 static struct file_operations tracing_fops = {
1754         .open           = tracing_open,
1755         .read           = seq_read,
1756         .llseek         = seq_lseek,
1757         .release        = tracing_release,
1758 };
1759
1760 static struct file_operations tracing_lt_fops = {
1761         .open           = tracing_lt_open,
1762         .read           = seq_read,
1763         .llseek         = seq_lseek,
1764         .release        = tracing_release,
1765 };
1766
1767 static struct file_operations show_traces_fops = {
1768         .open           = show_traces_open,
1769         .read           = seq_read,
1770         .release        = seq_release,
1771 };
1772
1773 /*
1774  * Only trace on a CPU if the bitmask is set:
1775  */
1776 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
1777
1778 /*
1779  * When tracing/tracing_cpu_mask is modified then this holds
1780  * the new bitmask we are about to install:
1781  */
1782 static cpumask_t tracing_cpumask_new;
1783
1784 /*
1785  * The tracer itself will not take this lock, but still we want
1786  * to provide a consistent cpumask to user-space:
1787  */
1788 static DEFINE_MUTEX(tracing_cpumask_update_lock);
1789
1790 /*
1791  * Temporary storage for the character representation of the
1792  * CPU bitmask (and one more byte for the newline):
1793  */
1794 static char mask_str[NR_CPUS + 1];
1795
1796 static ssize_t
1797 tracing_cpumask_read(struct file *filp, char __user *ubuf,
1798                      size_t count, loff_t *ppos)
1799 {
1800         int len;
1801
1802         mutex_lock(&tracing_cpumask_update_lock);
1803
1804         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
1805         if (count - len < 2) {
1806                 count = -EINVAL;
1807                 goto out_err;
1808         }
1809         len += sprintf(mask_str + len, "\n");
1810         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
1811
1812 out_err:
1813         mutex_unlock(&tracing_cpumask_update_lock);
1814
1815         return count;
1816 }
1817
1818 static ssize_t
1819 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1820                       size_t count, loff_t *ppos)
1821 {
1822         int err, cpu;
1823
1824         mutex_lock(&tracing_cpumask_update_lock);
1825         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
1826         if (err)
1827                 goto err_unlock;
1828
1829         spin_lock_irq(&ftrace_max_lock);
1830         for_each_possible_cpu(cpu) {
1831                 /*
1832                  * Increase/decrease the disabled counter if we are
1833                  * about to flip a bit in the cpumask:
1834                  */
1835                 if (cpu_isset(cpu, tracing_cpumask) &&
1836                                 !cpu_isset(cpu, tracing_cpumask_new)) {
1837                         atomic_inc(&global_trace.data[cpu]->disabled);
1838                 }
1839                 if (!cpu_isset(cpu, tracing_cpumask) &&
1840                                 cpu_isset(cpu, tracing_cpumask_new)) {
1841                         atomic_dec(&global_trace.data[cpu]->disabled);
1842                 }
1843         }
1844         spin_unlock_irq(&ftrace_max_lock);
1845
1846         tracing_cpumask = tracing_cpumask_new;
1847
1848         mutex_unlock(&tracing_cpumask_update_lock);
1849
1850         return count;
1851
1852 err_unlock:
1853         mutex_unlock(&tracing_cpumask_update_lock);
1854
1855         return err;
1856 }
1857
1858 static struct file_operations tracing_cpumask_fops = {
1859         .open           = tracing_open_generic,
1860         .read           = tracing_cpumask_read,
1861         .write          = tracing_cpumask_write,
1862 };
1863
1864 static ssize_t
1865 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
1866                        size_t cnt, loff_t *ppos)
1867 {
1868         char *buf;
1869         int r = 0;
1870         int len = 0;
1871         int i;
1872
1873         /* calulate max size */
1874         for (i = 0; trace_options[i]; i++) {
1875                 len += strlen(trace_options[i]);
1876                 len += 3; /* "no" and space */
1877         }
1878
1879         /* +2 for \n and \0 */
1880         buf = kmalloc(len + 2, GFP_KERNEL);
1881         if (!buf)
1882                 return -ENOMEM;
1883
1884         for (i = 0; trace_options[i]; i++) {
1885                 if (trace_flags & (1 << i))
1886                         r += sprintf(buf + r, "%s ", trace_options[i]);
1887                 else
1888                         r += sprintf(buf + r, "no%s ", trace_options[i]);
1889         }
1890
1891         r += sprintf(buf + r, "\n");
1892         WARN_ON(r >= len + 2);
1893
1894         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1895
1896         kfree(buf);
1897
1898         return r;
1899 }
1900
1901 static ssize_t
1902 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
1903                         size_t cnt, loff_t *ppos)
1904 {
1905         char buf[64];
1906         char *cmp = buf;
1907         int neg = 0;
1908         int i;
1909
1910         if (cnt > 63)
1911                 cnt = 63;
1912
1913         if (copy_from_user(&buf, ubuf, cnt))
1914                 return -EFAULT;
1915
1916         buf[cnt] = 0;
1917
1918         if (strncmp(buf, "no", 2) == 0) {
1919                 neg = 1;
1920                 cmp += 2;
1921         }
1922
1923         for (i = 0; trace_options[i]; i++) {
1924                 int len = strlen(trace_options[i]);
1925
1926                 if (strncmp(cmp, trace_options[i], len) == 0) {
1927                         if (neg)
1928                                 trace_flags &= ~(1 << i);
1929                         else
1930                                 trace_flags |= (1 << i);
1931                         break;
1932                 }
1933         }
1934         /*
1935          * If no option could be set, return an error:
1936          */
1937         if (!trace_options[i])
1938                 return -EINVAL;
1939
1940         filp->f_pos += cnt;
1941
1942         return cnt;
1943 }
1944
1945 static struct file_operations tracing_iter_fops = {
1946         .open           = tracing_open_generic,
1947         .read           = tracing_iter_ctrl_read,
1948         .write          = tracing_iter_ctrl_write,
1949 };
1950
1951 static const char readme_msg[] =
1952         "tracing mini-HOWTO:\n\n"
1953         "# mkdir /debug\n"
1954         "# mount -t debugfs nodev /debug\n\n"
1955         "# cat /debug/tracing/available_tracers\n"
1956         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
1957         "# cat /debug/tracing/current_tracer\n"
1958         "none\n"
1959         "# echo sched_switch > /debug/tracing/current_tracer\n"
1960         "# cat /debug/tracing/current_tracer\n"
1961         "sched_switch\n"
1962         "# cat /debug/tracing/iter_ctrl\n"
1963         "noprint-parent nosym-offset nosym-addr noverbose\n"
1964         "# echo print-parent > /debug/tracing/iter_ctrl\n"
1965         "# echo 1 > /debug/tracing/tracing_enabled\n"
1966         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
1967         "echo 0 > /debug/tracing/tracing_enabled\n"
1968 ;
1969
1970 static ssize_t
1971 tracing_readme_read(struct file *filp, char __user *ubuf,
1972                        size_t cnt, loff_t *ppos)
1973 {
1974         return simple_read_from_buffer(ubuf, cnt, ppos,
1975                                         readme_msg, strlen(readme_msg));
1976 }
1977
1978 static struct file_operations tracing_readme_fops = {
1979         .open           = tracing_open_generic,
1980         .read           = tracing_readme_read,
1981 };
1982
1983 static ssize_t
1984 tracing_ctrl_read(struct file *filp, char __user *ubuf,
1985                   size_t cnt, loff_t *ppos)
1986 {
1987         struct trace_array *tr = filp->private_data;
1988         char buf[64];
1989         int r;
1990
1991         r = sprintf(buf, "%ld\n", tr->ctrl);
1992         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1993 }
1994
1995 static ssize_t
1996 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
1997                    size_t cnt, loff_t *ppos)
1998 {
1999         struct trace_array *tr = filp->private_data;
2000         long val;
2001         char buf[64];
2002
2003         if (cnt > 63)
2004                 cnt = 63;
2005
2006         if (copy_from_user(&buf, ubuf, cnt))
2007                 return -EFAULT;
2008
2009         buf[cnt] = 0;
2010
2011         val = simple_strtoul(buf, NULL, 10);
2012
2013         val = !!val;
2014
2015         mutex_lock(&trace_types_lock);
2016         if (tr->ctrl ^ val) {
2017                 if (val)
2018                         tracer_enabled = 1;
2019                 else
2020                         tracer_enabled = 0;
2021
2022                 tr->ctrl = val;
2023
2024                 if (current_trace && current_trace->ctrl_update)
2025                         current_trace->ctrl_update(tr);
2026         }
2027         mutex_unlock(&trace_types_lock);
2028
2029         filp->f_pos += cnt;
2030
2031         return cnt;
2032 }
2033
2034 static ssize_t
2035 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2036                        size_t cnt, loff_t *ppos)
2037 {
2038         char buf[max_tracer_type_len+2];
2039         int r;
2040
2041         mutex_lock(&trace_types_lock);
2042         if (current_trace)
2043                 r = sprintf(buf, "%s\n", current_trace->name);
2044         else
2045                 r = sprintf(buf, "\n");
2046         mutex_unlock(&trace_types_lock);
2047
2048         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2049 }
2050
2051 static ssize_t
2052 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2053                         size_t cnt, loff_t *ppos)
2054 {
2055         struct trace_array *tr = &global_trace;
2056         struct tracer *t;
2057         char buf[max_tracer_type_len+1];
2058         int i;
2059
2060         if (cnt > max_tracer_type_len)
2061                 cnt = max_tracer_type_len;
2062
2063         if (copy_from_user(&buf, ubuf, cnt))
2064                 return -EFAULT;
2065
2066         buf[cnt] = 0;
2067
2068         /* strip ending whitespace. */
2069         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2070                 buf[i] = 0;
2071
2072         mutex_lock(&trace_types_lock);
2073         for (t = trace_types; t; t = t->next) {
2074                 if (strcmp(t->name, buf) == 0)
2075                         break;
2076         }
2077         if (!t || t == current_trace)
2078                 goto out;
2079
2080         if (current_trace && current_trace->reset)
2081                 current_trace->reset(tr);
2082
2083         current_trace = t;
2084         if (t->init)
2085                 t->init(tr);
2086
2087  out:
2088         mutex_unlock(&trace_types_lock);
2089
2090         filp->f_pos += cnt;
2091
2092         return cnt;
2093 }
2094
2095 static ssize_t
2096 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2097                      size_t cnt, loff_t *ppos)
2098 {
2099         unsigned long *ptr = filp->private_data;
2100         char buf[64];
2101         int r;
2102
2103         r = snprintf(buf, 64, "%ld\n",
2104                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2105         if (r > 64)
2106                 r = 64;
2107         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2108 }
2109
2110 static ssize_t
2111 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2112                       size_t cnt, loff_t *ppos)
2113 {
2114         long *ptr = filp->private_data;
2115         long val;
2116         char buf[64];
2117
2118         if (cnt > 63)
2119                 cnt = 63;
2120
2121         if (copy_from_user(&buf, ubuf, cnt))
2122                 return -EFAULT;
2123
2124         buf[cnt] = 0;
2125
2126         val = simple_strtoul(buf, NULL, 10);
2127
2128         *ptr = val * 1000;
2129
2130         return cnt;
2131 }
2132
2133 static atomic_t tracing_reader;
2134
2135 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2136 {
2137         struct trace_iterator *iter;
2138
2139         if (tracing_disabled)
2140                 return -ENODEV;
2141
2142         /* We only allow for reader of the pipe */
2143         if (atomic_inc_return(&tracing_reader) != 1) {
2144                 atomic_dec(&tracing_reader);
2145                 return -EBUSY;
2146         }
2147
2148         /* create a buffer to store the information to pass to userspace */
2149         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2150         if (!iter)
2151                 return -ENOMEM;
2152
2153         iter->tr = &global_trace;
2154
2155         filp->private_data = iter;
2156
2157         return 0;
2158 }
2159
2160 static int tracing_release_pipe(struct inode *inode, struct file *file)
2161 {
2162         struct trace_iterator *iter = file->private_data;
2163
2164         kfree(iter);
2165         atomic_dec(&tracing_reader);
2166
2167         return 0;
2168 }
2169
2170 static unsigned int
2171 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2172 {
2173         struct trace_iterator *iter = filp->private_data;
2174
2175         if (trace_flags & TRACE_ITER_BLOCK) {
2176                 /*
2177                  * Always select as readable when in blocking mode
2178                  */
2179                 return POLLIN | POLLRDNORM;
2180         }
2181         else {
2182                 if (!trace_empty(iter))
2183                         return POLLIN | POLLRDNORM;
2184                 poll_wait(filp, &trace_wait, poll_table);
2185                 if (!trace_empty(iter))
2186                         return POLLIN | POLLRDNORM;
2187
2188                 return 0;
2189         }
2190 }
2191
2192 /*
2193  * Consumer reader.
2194  */
2195 static ssize_t
2196 tracing_read_pipe(struct file *filp, char __user *ubuf,
2197                   size_t cnt, loff_t *ppos)
2198 {
2199         struct trace_iterator *iter = filp->private_data;
2200         struct trace_array_cpu *data;
2201         static cpumask_t mask;
2202         static int start;
2203         unsigned long flags;
2204 #ifdef CONFIG_FTRACE
2205         int ftrace_save;
2206 #endif
2207         int read = 0;
2208         int cpu;
2209         int len;
2210         int ret;
2211
2212         /* return any leftover data */
2213         if (iter->seq.len > start) {
2214                 len = iter->seq.len - start;
2215                 if (cnt > len)
2216                         cnt = len;
2217                 ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt);
2218                 if (ret)
2219                         cnt = -EFAULT;
2220
2221                 start += len;
2222
2223                 return cnt;
2224         }
2225
2226         trace_seq_reset(&iter->seq);
2227         start = 0;
2228
2229         while (trace_empty(iter)) {
2230                 if (!(trace_flags & TRACE_ITER_BLOCK))
2231                         return -EWOULDBLOCK;
2232                 /*
2233                  * This is a make-shift waitqueue. The reason we don't use
2234                  * an actual wait queue is because:
2235                  *  1) we only ever have one waiter
2236                  *  2) the tracing, traces all functions, we don't want
2237                  *     the overhead of calling wake_up and friends
2238                  *     (and tracing them too)
2239                  *     Anyway, this is really very primitive wakeup.
2240                  */
2241                 set_current_state(TASK_INTERRUPTIBLE);
2242                 iter->tr->waiter = current;
2243
2244                 /* sleep for one second, and try again. */
2245                 schedule_timeout(HZ);
2246
2247                 iter->tr->waiter = NULL;
2248
2249                 if (signal_pending(current))
2250                         return -EINTR;
2251
2252                 /*
2253                  * We block until we read something and tracing is disabled.
2254                  * We still block if tracing is disabled, but we have never
2255                  * read anything. This allows a user to cat this file, and
2256                  * then enable tracing. But after we have read something,
2257                  * we give an EOF when tracing is again disabled.
2258                  *
2259                  * iter->pos will be 0 if we haven't read anything.
2260                  */
2261                 if (!tracer_enabled && iter->pos)
2262                         break;
2263
2264                 continue;
2265         }
2266
2267         /* stop when tracing is finished */
2268         if (trace_empty(iter))
2269                 return 0;
2270
2271         if (cnt >= PAGE_SIZE)
2272                 cnt = PAGE_SIZE - 1;
2273
2274         memset(iter, 0, sizeof(*iter));
2275         iter->tr = &global_trace;
2276         iter->pos = -1;
2277
2278         /*
2279          * We need to stop all tracing on all CPUS to read the
2280          * the next buffer. This is a bit expensive, but is
2281          * not done often. We fill all what we can read,
2282          * and then release the locks again.
2283          */
2284
2285         cpus_clear(mask);
2286         local_irq_save(flags);
2287 #ifdef CONFIG_FTRACE
2288         ftrace_save = ftrace_enabled;
2289         ftrace_enabled = 0;
2290 #endif
2291         smp_wmb();
2292         for_each_possible_cpu(cpu) {
2293                 data = iter->tr->data[cpu];
2294
2295                 if (!head_page(data) || !data->trace_idx)
2296                         continue;
2297
2298                 atomic_inc(&data->disabled);
2299                 cpu_set(cpu, mask);
2300         }
2301
2302         for_each_cpu_mask(cpu, mask) {
2303                 data = iter->tr->data[cpu];
2304                 spin_lock(&data->lock);
2305         }
2306
2307         while (find_next_entry_inc(iter) != NULL) {
2308                 int len = iter->seq.len;
2309
2310                 ret = print_trace_line(iter);
2311                 if (!ret) {
2312                         /* don't print partial lines */
2313                         iter->seq.len = len;
2314                         break;
2315                 }
2316
2317                 trace_consume(iter);
2318
2319                 if (iter->seq.len >= cnt)
2320                         break;
2321         }
2322
2323         for_each_cpu_mask(cpu, mask) {
2324                 data = iter->tr->data[cpu];
2325                 spin_unlock(&data->lock);
2326         }
2327
2328         for_each_cpu_mask(cpu, mask) {
2329                 data = iter->tr->data[cpu];
2330                 atomic_dec(&data->disabled);
2331         }
2332 #ifdef CONFIG_FTRACE
2333         ftrace_enabled = ftrace_save;
2334 #endif
2335         local_irq_restore(flags);
2336
2337         /* Now copy what we have to the user */
2338         read = iter->seq.len;
2339         if (read > cnt)
2340                 read = cnt;
2341
2342         ret = copy_to_user(ubuf, iter->seq.buffer, read);
2343
2344         if (read < iter->seq.len)
2345                 start = read;
2346         else
2347                 trace_seq_reset(&iter->seq);
2348
2349         if (ret)
2350                 read = -EFAULT;
2351
2352         return read;
2353 }
2354
2355 static struct file_operations tracing_max_lat_fops = {
2356         .open           = tracing_open_generic,
2357         .read           = tracing_max_lat_read,
2358         .write          = tracing_max_lat_write,
2359 };
2360
2361 static struct file_operations tracing_ctrl_fops = {
2362         .open           = tracing_open_generic,
2363         .read           = tracing_ctrl_read,
2364         .write          = tracing_ctrl_write,
2365 };
2366
2367 static struct file_operations set_tracer_fops = {
2368         .open           = tracing_open_generic,
2369         .read           = tracing_set_trace_read,
2370         .write          = tracing_set_trace_write,
2371 };
2372
2373 static struct file_operations tracing_pipe_fops = {
2374         .open           = tracing_open_pipe,
2375         .poll           = tracing_poll_pipe,
2376         .read           = tracing_read_pipe,
2377         .release        = tracing_release_pipe,
2378 };
2379
2380 #ifdef CONFIG_DYNAMIC_FTRACE
2381
2382 static ssize_t
2383 tracing_read_long(struct file *filp, char __user *ubuf,
2384                   size_t cnt, loff_t *ppos)
2385 {
2386         unsigned long *p = filp->private_data;
2387         char buf[64];
2388         int r;
2389
2390         r = sprintf(buf, "%ld\n", *p);
2391
2392         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2393 }
2394
2395 static struct file_operations tracing_read_long_fops = {
2396         .open           = tracing_open_generic,
2397         .read           = tracing_read_long,
2398 };
2399 #endif
2400
2401 static struct dentry *d_tracer;
2402
2403 struct dentry *tracing_init_dentry(void)
2404 {
2405         static int once;
2406
2407         if (d_tracer)
2408                 return d_tracer;
2409
2410         d_tracer = debugfs_create_dir("tracing", NULL);
2411
2412         if (!d_tracer && !once) {
2413                 once = 1;
2414                 pr_warning("Could not create debugfs directory 'tracing'\n");
2415                 return NULL;
2416         }
2417
2418         return d_tracer;
2419 }
2420
2421 #ifdef CONFIG_FTRACE_SELFTEST
2422 /* Let selftest have access to static functions in this file */
2423 #include "trace_selftest.c"
2424 #endif
2425
2426 static __init void tracer_init_debugfs(void)
2427 {
2428         struct dentry *d_tracer;
2429         struct dentry *entry;
2430
2431         d_tracer = tracing_init_dentry();
2432
2433         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2434                                     &global_trace, &tracing_ctrl_fops);
2435         if (!entry)
2436                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2437
2438         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2439                                     NULL, &tracing_iter_fops);
2440         if (!entry)
2441                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2442
2443         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2444                                     NULL, &tracing_cpumask_fops);
2445         if (!entry)
2446                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2447
2448         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2449                                     &global_trace, &tracing_lt_fops);
2450         if (!entry)
2451                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2452
2453         entry = debugfs_create_file("trace", 0444, d_tracer,
2454                                     &global_trace, &tracing_fops);
2455         if (!entry)
2456                 pr_warning("Could not create debugfs 'trace' entry\n");
2457
2458         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2459                                     &global_trace, &show_traces_fops);
2460         if (!entry)
2461                 pr_warning("Could not create debugfs 'trace' entry\n");
2462
2463         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2464                                     &global_trace, &set_tracer_fops);
2465         if (!entry)
2466                 pr_warning("Could not create debugfs 'trace' entry\n");
2467
2468         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2469                                     &tracing_max_latency,
2470                                     &tracing_max_lat_fops);
2471         if (!entry)
2472                 pr_warning("Could not create debugfs "
2473                            "'tracing_max_latency' entry\n");
2474
2475         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2476                                     &tracing_thresh, &tracing_max_lat_fops);
2477         if (!entry)
2478                 pr_warning("Could not create debugfs "
2479                            "'tracing_threash' entry\n");
2480         entry = debugfs_create_file("README", 0644, d_tracer,
2481                                     NULL, &tracing_readme_fops);
2482         if (!entry)
2483                 pr_warning("Could not create debugfs 'README' entry\n");
2484
2485         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2486                                     NULL, &tracing_pipe_fops);
2487         if (!entry)
2488                 pr_warning("Could not create debugfs "
2489                            "'tracing_threash' entry\n");
2490
2491 #ifdef CONFIG_DYNAMIC_FTRACE
2492         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2493                                     &ftrace_update_tot_cnt,
2494                                     &tracing_read_long_fops);
2495         if (!entry)
2496                 pr_warning("Could not create debugfs "
2497                            "'dyn_ftrace_total_info' entry\n");
2498 #endif
2499 }
2500
2501 /* dummy trace to disable tracing */
2502 static struct tracer no_tracer __read_mostly =
2503 {
2504         .name           = "none",
2505 };
2506
2507 static int trace_alloc_page(void)
2508 {
2509         struct trace_array_cpu *data;
2510         struct page *page, *tmp;
2511         LIST_HEAD(pages);
2512         void *array;
2513         int i;
2514
2515         /* first allocate a page for each CPU */
2516         for_each_possible_cpu(i) {
2517                 array = (void *)__get_free_page(GFP_KERNEL);
2518                 if (array == NULL) {
2519                         printk(KERN_ERR "tracer: failed to allocate page"
2520                                "for trace buffer!\n");
2521                         goto free_pages;
2522                 }
2523
2524                 page = virt_to_page(array);
2525                 list_add(&page->lru, &pages);
2526
2527 /* Only allocate if we are actually using the max trace */
2528 #ifdef CONFIG_TRACER_MAX_TRACE
2529                 array = (void *)__get_free_page(GFP_KERNEL);
2530                 if (array == NULL) {
2531                         printk(KERN_ERR "tracer: failed to allocate page"
2532                                "for trace buffer!\n");
2533                         goto free_pages;
2534                 }
2535                 page = virt_to_page(array);
2536                 list_add(&page->lru, &pages);
2537 #endif
2538         }
2539
2540         /* Now that we successfully allocate a page per CPU, add them */
2541         for_each_possible_cpu(i) {
2542                 data = global_trace.data[i];
2543                 spin_lock_init(&data->lock);
2544                 lockdep_set_class(&data->lock, &data->lock_key);
2545                 page = list_entry(pages.next, struct page, lru);
2546                 list_del_init(&page->lru);
2547                 list_add_tail(&page->lru, &data->trace_pages);
2548                 ClearPageLRU(page);
2549
2550 #ifdef CONFIG_TRACER_MAX_TRACE
2551                 data = max_tr.data[i];
2552                 spin_lock_init(&data->lock);
2553                 lockdep_set_class(&data->lock, &data->lock_key);
2554                 page = list_entry(pages.next, struct page, lru);
2555                 list_del_init(&page->lru);
2556                 list_add_tail(&page->lru, &data->trace_pages);
2557                 SetPageLRU(page);
2558 #endif
2559         }
2560         global_trace.entries += ENTRIES_PER_PAGE;
2561
2562         return 0;
2563
2564  free_pages:
2565         list_for_each_entry_safe(page, tmp, &pages, lru) {
2566                 list_del_init(&page->lru);
2567                 __free_page(page);
2568         }
2569         return -ENOMEM;
2570 }
2571
2572 __init static int tracer_alloc_buffers(void)
2573 {
2574         struct trace_array_cpu *data;
2575         void *array;
2576         struct page *page;
2577         int pages = 0;
2578         int ret = -ENOMEM;
2579         int i;
2580
2581         global_trace.ctrl = tracer_enabled;
2582
2583         /* Allocate the first page for all buffers */
2584         for_each_possible_cpu(i) {
2585                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2586                 max_tr.data[i] = &per_cpu(max_data, i);
2587
2588                 array = (void *)__get_free_page(GFP_KERNEL);
2589                 if (array == NULL) {
2590                         printk(KERN_ERR "tracer: failed to allocate page"
2591                                "for trace buffer!\n");
2592                         goto free_buffers;
2593                 }
2594
2595                 /* set the array to the list */
2596                 INIT_LIST_HEAD(&data->trace_pages);
2597                 page = virt_to_page(array);
2598                 list_add(&page->lru, &data->trace_pages);
2599                 /* use the LRU flag to differentiate the two buffers */
2600                 ClearPageLRU(page);
2601
2602 /* Only allocate if we are actually using the max trace */
2603 #ifdef CONFIG_TRACER_MAX_TRACE
2604                 array = (void *)__get_free_page(GFP_KERNEL);
2605                 if (array == NULL) {
2606                         printk(KERN_ERR "tracer: failed to allocate page"
2607                                "for trace buffer!\n");
2608                         goto free_buffers;
2609                 }
2610
2611                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
2612                 page = virt_to_page(array);
2613                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
2614                 SetPageLRU(page);
2615 #endif
2616         }
2617
2618         /*
2619          * Since we allocate by orders of pages, we may be able to
2620          * round up a bit.
2621          */
2622         global_trace.entries = ENTRIES_PER_PAGE;
2623         pages++;
2624
2625         while (global_trace.entries < trace_nr_entries) {
2626                 if (trace_alloc_page())
2627                         break;
2628                 pages++;
2629         }
2630         max_tr.entries = global_trace.entries;
2631
2632         pr_info("tracer: %d pages allocated for %ld",
2633                 pages, trace_nr_entries);
2634         pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
2635         pr_info("   actual entries %ld\n", global_trace.entries);
2636
2637         tracer_init_debugfs();
2638
2639         trace_init_cmdlines();
2640
2641         register_tracer(&no_tracer);
2642         current_trace = &no_tracer;
2643
2644         /* All seems OK, enable tracing */
2645         tracing_disabled = 0;
2646
2647         return 0;
2648
2649  free_buffers:
2650         for (i-- ; i >= 0; i--) {
2651                 struct page *page, *tmp;
2652                 struct trace_array_cpu *data = global_trace.data[i];
2653
2654                 if (data) {
2655                         list_for_each_entry_safe(page, tmp,
2656                                                  &data->trace_pages, lru) {
2657                                 list_del_init(&page->lru);
2658                                 __free_page(page);
2659                         }
2660                 }
2661
2662 #ifdef CONFIG_TRACER_MAX_TRACE
2663                 data = max_tr.data[i];
2664                 if (data) {
2665                         list_for_each_entry_safe(page, tmp,
2666                                                  &data->trace_pages, lru) {
2667                                 list_del_init(&page->lru);
2668                                 __free_page(page);
2669                         }
2670                 }
2671 #endif
2672         }
2673         return ret;
2674 }
2675 fs_initcall(tracer_alloc_buffers);