trace: branch profiling should not print percent without data
[linux-2.6] / kernel / trace / ring_buffer.c
1 /*
2  * Generic ring buffer
3  *
4  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
5  */
6 #include <linux/ring_buffer.h>
7 #include <linux/spinlock.h>
8 #include <linux/debugfs.h>
9 #include <linux/uaccess.h>
10 #include <linux/module.h>
11 #include <linux/percpu.h>
12 #include <linux/mutex.h>
13 #include <linux/sched.h>        /* used for sched_clock() (for now) */
14 #include <linux/init.h>
15 #include <linux/hash.h>
16 #include <linux/list.h>
17 #include <linux/fs.h>
18
19 #include "trace.h"
20
21 /* Global flag to disable all recording to ring buffers */
22 static int ring_buffers_off __read_mostly;
23
24 /**
25  * tracing_on - enable all tracing buffers
26  *
27  * This function enables all tracing buffers that may have been
28  * disabled with tracing_off.
29  */
30 void tracing_on(void)
31 {
32         ring_buffers_off = 0;
33 }
34
35 /**
36  * tracing_off - turn off all tracing buffers
37  *
38  * This function stops all tracing buffers from recording data.
39  * It does not disable any overhead the tracers themselves may
40  * be causing. This function simply causes all recording to
41  * the ring buffers to fail.
42  */
43 void tracing_off(void)
44 {
45         ring_buffers_off = 1;
46 }
47
48 #include "trace.h"
49
50 /* Up this if you want to test the TIME_EXTENTS and normalization */
51 #define DEBUG_SHIFT 0
52
53 /* FIXME!!! */
54 u64 ring_buffer_time_stamp(int cpu)
55 {
56         u64 time;
57
58         preempt_disable_notrace();
59         /* shift to debug/test normalization and TIME_EXTENTS */
60         time = sched_clock() << DEBUG_SHIFT;
61         preempt_enable_notrace();
62
63         return time;
64 }
65
66 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
67 {
68         /* Just stupid testing the normalize function and deltas */
69         *ts >>= DEBUG_SHIFT;
70 }
71
72 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
73 #define RB_ALIGNMENT_SHIFT      2
74 #define RB_ALIGNMENT            (1 << RB_ALIGNMENT_SHIFT)
75 #define RB_MAX_SMALL_DATA       28
76
77 enum {
78         RB_LEN_TIME_EXTEND = 8,
79         RB_LEN_TIME_STAMP = 16,
80 };
81
82 /* inline for ring buffer fast paths */
83 static inline unsigned
84 rb_event_length(struct ring_buffer_event *event)
85 {
86         unsigned length;
87
88         switch (event->type) {
89         case RINGBUF_TYPE_PADDING:
90                 /* undefined */
91                 return -1;
92
93         case RINGBUF_TYPE_TIME_EXTEND:
94                 return RB_LEN_TIME_EXTEND;
95
96         case RINGBUF_TYPE_TIME_STAMP:
97                 return RB_LEN_TIME_STAMP;
98
99         case RINGBUF_TYPE_DATA:
100                 if (event->len)
101                         length = event->len << RB_ALIGNMENT_SHIFT;
102                 else
103                         length = event->array[0];
104                 return length + RB_EVNT_HDR_SIZE;
105         default:
106                 BUG();
107         }
108         /* not hit */
109         return 0;
110 }
111
112 /**
113  * ring_buffer_event_length - return the length of the event
114  * @event: the event to get the length of
115  */
116 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
117 {
118         return rb_event_length(event);
119 }
120
121 /* inline for ring buffer fast paths */
122 static inline void *
123 rb_event_data(struct ring_buffer_event *event)
124 {
125         BUG_ON(event->type != RINGBUF_TYPE_DATA);
126         /* If length is in len field, then array[0] has the data */
127         if (event->len)
128                 return (void *)&event->array[0];
129         /* Otherwise length is in array[0] and array[1] has the data */
130         return (void *)&event->array[1];
131 }
132
133 /**
134  * ring_buffer_event_data - return the data of the event
135  * @event: the event to get the data from
136  */
137 void *ring_buffer_event_data(struct ring_buffer_event *event)
138 {
139         return rb_event_data(event);
140 }
141
142 #define for_each_buffer_cpu(buffer, cpu)                \
143         for_each_cpu_mask(cpu, buffer->cpumask)
144
145 #define TS_SHIFT        27
146 #define TS_MASK         ((1ULL << TS_SHIFT) - 1)
147 #define TS_DELTA_TEST   (~TS_MASK)
148
149 /*
150  * This hack stolen from mm/slob.c.
151  * We can store per page timing information in the page frame of the page.
152  * Thanks to Peter Zijlstra for suggesting this idea.
153  */
154 struct buffer_page {
155         u64              time_stamp;    /* page time stamp */
156         local_t          write;         /* index for next write */
157         local_t          commit;        /* write commited index */
158         unsigned         read;          /* index for next read */
159         struct list_head list;          /* list of free pages */
160         void *page;                     /* Actual data page */
161 };
162
163 /*
164  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
165  * this issue out.
166  */
167 static inline void free_buffer_page(struct buffer_page *bpage)
168 {
169         if (bpage->page)
170                 free_page((unsigned long)bpage->page);
171         kfree(bpage);
172 }
173
174 /*
175  * We need to fit the time_stamp delta into 27 bits.
176  */
177 static inline int test_time_stamp(u64 delta)
178 {
179         if (delta & TS_DELTA_TEST)
180                 return 1;
181         return 0;
182 }
183
184 #define BUF_PAGE_SIZE PAGE_SIZE
185
186 /*
187  * head_page == tail_page && head == tail then buffer is empty.
188  */
189 struct ring_buffer_per_cpu {
190         int                             cpu;
191         struct ring_buffer              *buffer;
192         spinlock_t                      reader_lock; /* serialize readers */
193         raw_spinlock_t                  lock;
194         struct lock_class_key           lock_key;
195         struct list_head                pages;
196         struct buffer_page              *head_page;     /* read from head */
197         struct buffer_page              *tail_page;     /* write to tail */
198         struct buffer_page              *commit_page;   /* commited pages */
199         struct buffer_page              *reader_page;
200         unsigned long                   overrun;
201         unsigned long                   entries;
202         u64                             write_stamp;
203         u64                             read_stamp;
204         atomic_t                        record_disabled;
205 };
206
207 struct ring_buffer {
208         unsigned long                   size;
209         unsigned                        pages;
210         unsigned                        flags;
211         int                             cpus;
212         cpumask_t                       cpumask;
213         atomic_t                        record_disabled;
214
215         struct mutex                    mutex;
216
217         struct ring_buffer_per_cpu      **buffers;
218 };
219
220 struct ring_buffer_iter {
221         struct ring_buffer_per_cpu      *cpu_buffer;
222         unsigned long                   head;
223         struct buffer_page              *head_page;
224         u64                             read_stamp;
225 };
226
227 /* buffer may be either ring_buffer or ring_buffer_per_cpu */
228 #define RB_WARN_ON(buffer, cond)                                \
229         ({                                                      \
230                 int _____ret = unlikely(cond);                  \
231                 if (_____ret) {                                 \
232                         atomic_inc(&buffer->record_disabled);   \
233                         WARN_ON(1);                             \
234                 }                                               \
235                 _____ret;                                       \
236         })
237
238 /**
239  * check_pages - integrity check of buffer pages
240  * @cpu_buffer: CPU buffer with pages to test
241  *
242  * As a safty measure we check to make sure the data pages have not
243  * been corrupted.
244  */
245 static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
246 {
247         struct list_head *head = &cpu_buffer->pages;
248         struct buffer_page *page, *tmp;
249
250         if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
251                 return -1;
252         if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
253                 return -1;
254
255         list_for_each_entry_safe(page, tmp, head, list) {
256                 if (RB_WARN_ON(cpu_buffer,
257                                page->list.next->prev != &page->list))
258                         return -1;
259                 if (RB_WARN_ON(cpu_buffer,
260                                page->list.prev->next != &page->list))
261                         return -1;
262         }
263
264         return 0;
265 }
266
267 static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
268                              unsigned nr_pages)
269 {
270         struct list_head *head = &cpu_buffer->pages;
271         struct buffer_page *page, *tmp;
272         unsigned long addr;
273         LIST_HEAD(pages);
274         unsigned i;
275
276         for (i = 0; i < nr_pages; i++) {
277                 page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
278                                     GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
279                 if (!page)
280                         goto free_pages;
281                 list_add(&page->list, &pages);
282
283                 addr = __get_free_page(GFP_KERNEL);
284                 if (!addr)
285                         goto free_pages;
286                 page->page = (void *)addr;
287         }
288
289         list_splice(&pages, head);
290
291         rb_check_pages(cpu_buffer);
292
293         return 0;
294
295  free_pages:
296         list_for_each_entry_safe(page, tmp, &pages, list) {
297                 list_del_init(&page->list);
298                 free_buffer_page(page);
299         }
300         return -ENOMEM;
301 }
302
303 static struct ring_buffer_per_cpu *
304 rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
305 {
306         struct ring_buffer_per_cpu *cpu_buffer;
307         struct buffer_page *page;
308         unsigned long addr;
309         int ret;
310
311         cpu_buffer = kzalloc_node(ALIGN(sizeof(*cpu_buffer), cache_line_size()),
312                                   GFP_KERNEL, cpu_to_node(cpu));
313         if (!cpu_buffer)
314                 return NULL;
315
316         cpu_buffer->cpu = cpu;
317         cpu_buffer->buffer = buffer;
318         spin_lock_init(&cpu_buffer->reader_lock);
319         cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
320         INIT_LIST_HEAD(&cpu_buffer->pages);
321
322         page = kzalloc_node(ALIGN(sizeof(*page), cache_line_size()),
323                             GFP_KERNEL, cpu_to_node(cpu));
324         if (!page)
325                 goto fail_free_buffer;
326
327         cpu_buffer->reader_page = page;
328         addr = __get_free_page(GFP_KERNEL);
329         if (!addr)
330                 goto fail_free_reader;
331         page->page = (void *)addr;
332
333         INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
334
335         ret = rb_allocate_pages(cpu_buffer, buffer->pages);
336         if (ret < 0)
337                 goto fail_free_reader;
338
339         cpu_buffer->head_page
340                 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
341         cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
342
343         return cpu_buffer;
344
345  fail_free_reader:
346         free_buffer_page(cpu_buffer->reader_page);
347
348  fail_free_buffer:
349         kfree(cpu_buffer);
350         return NULL;
351 }
352
353 static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
354 {
355         struct list_head *head = &cpu_buffer->pages;
356         struct buffer_page *page, *tmp;
357
358         list_del_init(&cpu_buffer->reader_page->list);
359         free_buffer_page(cpu_buffer->reader_page);
360
361         list_for_each_entry_safe(page, tmp, head, list) {
362                 list_del_init(&page->list);
363                 free_buffer_page(page);
364         }
365         kfree(cpu_buffer);
366 }
367
368 /*
369  * Causes compile errors if the struct buffer_page gets bigger
370  * than the struct page.
371  */
372 extern int ring_buffer_page_too_big(void);
373
374 /**
375  * ring_buffer_alloc - allocate a new ring_buffer
376  * @size: the size in bytes that is needed.
377  * @flags: attributes to set for the ring buffer.
378  *
379  * Currently the only flag that is available is the RB_FL_OVERWRITE
380  * flag. This flag means that the buffer will overwrite old data
381  * when the buffer wraps. If this flag is not set, the buffer will
382  * drop data when the tail hits the head.
383  */
384 struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
385 {
386         struct ring_buffer *buffer;
387         int bsize;
388         int cpu;
389
390         /* Paranoid! Optimizes out when all is well */
391         if (sizeof(struct buffer_page) > sizeof(struct page))
392                 ring_buffer_page_too_big();
393
394
395         /* keep it in its own cache line */
396         buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()),
397                          GFP_KERNEL);
398         if (!buffer)
399                 return NULL;
400
401         buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
402         buffer->flags = flags;
403
404         /* need at least two pages */
405         if (buffer->pages == 1)
406                 buffer->pages++;
407
408         buffer->cpumask = cpu_possible_map;
409         buffer->cpus = nr_cpu_ids;
410
411         bsize = sizeof(void *) * nr_cpu_ids;
412         buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
413                                   GFP_KERNEL);
414         if (!buffer->buffers)
415                 goto fail_free_buffer;
416
417         for_each_buffer_cpu(buffer, cpu) {
418                 buffer->buffers[cpu] =
419                         rb_allocate_cpu_buffer(buffer, cpu);
420                 if (!buffer->buffers[cpu])
421                         goto fail_free_buffers;
422         }
423
424         mutex_init(&buffer->mutex);
425
426         return buffer;
427
428  fail_free_buffers:
429         for_each_buffer_cpu(buffer, cpu) {
430                 if (buffer->buffers[cpu])
431                         rb_free_cpu_buffer(buffer->buffers[cpu]);
432         }
433         kfree(buffer->buffers);
434
435  fail_free_buffer:
436         kfree(buffer);
437         return NULL;
438 }
439
440 /**
441  * ring_buffer_free - free a ring buffer.
442  * @buffer: the buffer to free.
443  */
444 void
445 ring_buffer_free(struct ring_buffer *buffer)
446 {
447         int cpu;
448
449         for_each_buffer_cpu(buffer, cpu)
450                 rb_free_cpu_buffer(buffer->buffers[cpu]);
451
452         kfree(buffer);
453 }
454
455 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
456
457 static void
458 rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
459 {
460         struct buffer_page *page;
461         struct list_head *p;
462         unsigned i;
463
464         atomic_inc(&cpu_buffer->record_disabled);
465         synchronize_sched();
466
467         for (i = 0; i < nr_pages; i++) {
468                 if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
469                         return;
470                 p = cpu_buffer->pages.next;
471                 page = list_entry(p, struct buffer_page, list);
472                 list_del_init(&page->list);
473                 free_buffer_page(page);
474         }
475         if (RB_WARN_ON(cpu_buffer, list_empty(&cpu_buffer->pages)))
476                 return;
477
478         rb_reset_cpu(cpu_buffer);
479
480         rb_check_pages(cpu_buffer);
481
482         atomic_dec(&cpu_buffer->record_disabled);
483
484 }
485
486 static void
487 rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
488                 struct list_head *pages, unsigned nr_pages)
489 {
490         struct buffer_page *page;
491         struct list_head *p;
492         unsigned i;
493
494         atomic_inc(&cpu_buffer->record_disabled);
495         synchronize_sched();
496
497         for (i = 0; i < nr_pages; i++) {
498                 if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
499                         return;
500                 p = pages->next;
501                 page = list_entry(p, struct buffer_page, list);
502                 list_del_init(&page->list);
503                 list_add_tail(&page->list, &cpu_buffer->pages);
504         }
505         rb_reset_cpu(cpu_buffer);
506
507         rb_check_pages(cpu_buffer);
508
509         atomic_dec(&cpu_buffer->record_disabled);
510 }
511
512 /**
513  * ring_buffer_resize - resize the ring buffer
514  * @buffer: the buffer to resize.
515  * @size: the new size.
516  *
517  * The tracer is responsible for making sure that the buffer is
518  * not being used while changing the size.
519  * Note: We may be able to change the above requirement by using
520  *  RCU synchronizations.
521  *
522  * Minimum size is 2 * BUF_PAGE_SIZE.
523  *
524  * Returns -1 on failure.
525  */
526 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
527 {
528         struct ring_buffer_per_cpu *cpu_buffer;
529         unsigned nr_pages, rm_pages, new_pages;
530         struct buffer_page *page, *tmp;
531         unsigned long buffer_size;
532         unsigned long addr;
533         LIST_HEAD(pages);
534         int i, cpu;
535
536         /*
537          * Always succeed at resizing a non-existent buffer:
538          */
539         if (!buffer)
540                 return size;
541
542         size = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
543         size *= BUF_PAGE_SIZE;
544         buffer_size = buffer->pages * BUF_PAGE_SIZE;
545
546         /* we need a minimum of two pages */
547         if (size < BUF_PAGE_SIZE * 2)
548                 size = BUF_PAGE_SIZE * 2;
549
550         if (size == buffer_size)
551                 return size;
552
553         mutex_lock(&buffer->mutex);
554
555         nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
556
557         if (size < buffer_size) {
558
559                 /* easy case, just free pages */
560                 if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
561                         mutex_unlock(&buffer->mutex);
562                         return -1;
563                 }
564
565                 rm_pages = buffer->pages - nr_pages;
566
567                 for_each_buffer_cpu(buffer, cpu) {
568                         cpu_buffer = buffer->buffers[cpu];
569                         rb_remove_pages(cpu_buffer, rm_pages);
570                 }
571                 goto out;
572         }
573
574         /*
575          * This is a bit more difficult. We only want to add pages
576          * when we can allocate enough for all CPUs. We do this
577          * by allocating all the pages and storing them on a local
578          * link list. If we succeed in our allocation, then we
579          * add these pages to the cpu_buffers. Otherwise we just free
580          * them all and return -ENOMEM;
581          */
582         if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
583                 mutex_unlock(&buffer->mutex);
584                 return -1;
585         }
586
587         new_pages = nr_pages - buffer->pages;
588
589         for_each_buffer_cpu(buffer, cpu) {
590                 for (i = 0; i < new_pages; i++) {
591                         page = kzalloc_node(ALIGN(sizeof(*page),
592                                                   cache_line_size()),
593                                             GFP_KERNEL, cpu_to_node(cpu));
594                         if (!page)
595                                 goto free_pages;
596                         list_add(&page->list, &pages);
597                         addr = __get_free_page(GFP_KERNEL);
598                         if (!addr)
599                                 goto free_pages;
600                         page->page = (void *)addr;
601                 }
602         }
603
604         for_each_buffer_cpu(buffer, cpu) {
605                 cpu_buffer = buffer->buffers[cpu];
606                 rb_insert_pages(cpu_buffer, &pages, new_pages);
607         }
608
609         if (RB_WARN_ON(buffer, !list_empty(&pages))) {
610                 mutex_unlock(&buffer->mutex);
611                 return -1;
612         }
613
614  out:
615         buffer->pages = nr_pages;
616         mutex_unlock(&buffer->mutex);
617
618         return size;
619
620  free_pages:
621         list_for_each_entry_safe(page, tmp, &pages, list) {
622                 list_del_init(&page->list);
623                 free_buffer_page(page);
624         }
625         mutex_unlock(&buffer->mutex);
626         return -ENOMEM;
627 }
628
629 static inline int rb_null_event(struct ring_buffer_event *event)
630 {
631         return event->type == RINGBUF_TYPE_PADDING;
632 }
633
634 static inline void *__rb_page_index(struct buffer_page *page, unsigned index)
635 {
636         return page->page + index;
637 }
638
639 static inline struct ring_buffer_event *
640 rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
641 {
642         return __rb_page_index(cpu_buffer->reader_page,
643                                cpu_buffer->reader_page->read);
644 }
645
646 static inline struct ring_buffer_event *
647 rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
648 {
649         return __rb_page_index(cpu_buffer->head_page,
650                                cpu_buffer->head_page->read);
651 }
652
653 static inline struct ring_buffer_event *
654 rb_iter_head_event(struct ring_buffer_iter *iter)
655 {
656         return __rb_page_index(iter->head_page, iter->head);
657 }
658
659 static inline unsigned rb_page_write(struct buffer_page *bpage)
660 {
661         return local_read(&bpage->write);
662 }
663
664 static inline unsigned rb_page_commit(struct buffer_page *bpage)
665 {
666         return local_read(&bpage->commit);
667 }
668
669 /* Size is determined by what has been commited */
670 static inline unsigned rb_page_size(struct buffer_page *bpage)
671 {
672         return rb_page_commit(bpage);
673 }
674
675 static inline unsigned
676 rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
677 {
678         return rb_page_commit(cpu_buffer->commit_page);
679 }
680
681 static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
682 {
683         return rb_page_commit(cpu_buffer->head_page);
684 }
685
686 /*
687  * When the tail hits the head and the buffer is in overwrite mode,
688  * the head jumps to the next page and all content on the previous
689  * page is discarded. But before doing so, we update the overrun
690  * variable of the buffer.
691  */
692 static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
693 {
694         struct ring_buffer_event *event;
695         unsigned long head;
696
697         for (head = 0; head < rb_head_size(cpu_buffer);
698              head += rb_event_length(event)) {
699
700                 event = __rb_page_index(cpu_buffer->head_page, head);
701                 if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
702                         return;
703                 /* Only count data entries */
704                 if (event->type != RINGBUF_TYPE_DATA)
705                         continue;
706                 cpu_buffer->overrun++;
707                 cpu_buffer->entries--;
708         }
709 }
710
711 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
712                                struct buffer_page **page)
713 {
714         struct list_head *p = (*page)->list.next;
715
716         if (p == &cpu_buffer->pages)
717                 p = p->next;
718
719         *page = list_entry(p, struct buffer_page, list);
720 }
721
722 static inline unsigned
723 rb_event_index(struct ring_buffer_event *event)
724 {
725         unsigned long addr = (unsigned long)event;
726
727         return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
728 }
729
730 static inline int
731 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
732              struct ring_buffer_event *event)
733 {
734         unsigned long addr = (unsigned long)event;
735         unsigned long index;
736
737         index = rb_event_index(event);
738         addr &= PAGE_MASK;
739
740         return cpu_buffer->commit_page->page == (void *)addr &&
741                 rb_commit_index(cpu_buffer) == index;
742 }
743
744 static inline void
745 rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
746                     struct ring_buffer_event *event)
747 {
748         unsigned long addr = (unsigned long)event;
749         unsigned long index;
750
751         index = rb_event_index(event);
752         addr &= PAGE_MASK;
753
754         while (cpu_buffer->commit_page->page != (void *)addr) {
755                 if (RB_WARN_ON(cpu_buffer,
756                           cpu_buffer->commit_page == cpu_buffer->tail_page))
757                         return;
758                 cpu_buffer->commit_page->commit =
759                         cpu_buffer->commit_page->write;
760                 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
761                 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
762         }
763
764         /* Now set the commit to the event's index */
765         local_set(&cpu_buffer->commit_page->commit, index);
766 }
767
768 static inline void
769 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
770 {
771         /*
772          * We only race with interrupts and NMIs on this CPU.
773          * If we own the commit event, then we can commit
774          * all others that interrupted us, since the interruptions
775          * are in stack format (they finish before they come
776          * back to us). This allows us to do a simple loop to
777          * assign the commit to the tail.
778          */
779         while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
780                 cpu_buffer->commit_page->commit =
781                         cpu_buffer->commit_page->write;
782                 rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
783                 cpu_buffer->write_stamp = cpu_buffer->commit_page->time_stamp;
784                 /* add barrier to keep gcc from optimizing too much */
785                 barrier();
786         }
787         while (rb_commit_index(cpu_buffer) !=
788                rb_page_write(cpu_buffer->commit_page)) {
789                 cpu_buffer->commit_page->commit =
790                         cpu_buffer->commit_page->write;
791                 barrier();
792         }
793 }
794
795 static void rb_reset_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
796 {
797         cpu_buffer->read_stamp = cpu_buffer->reader_page->time_stamp;
798         cpu_buffer->reader_page->read = 0;
799 }
800
801 static inline void rb_inc_iter(struct ring_buffer_iter *iter)
802 {
803         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
804
805         /*
806          * The iterator could be on the reader page (it starts there).
807          * But the head could have moved, since the reader was
808          * found. Check for this case and assign the iterator
809          * to the head page instead of next.
810          */
811         if (iter->head_page == cpu_buffer->reader_page)
812                 iter->head_page = cpu_buffer->head_page;
813         else
814                 rb_inc_page(cpu_buffer, &iter->head_page);
815
816         iter->read_stamp = iter->head_page->time_stamp;
817         iter->head = 0;
818 }
819
820 /**
821  * ring_buffer_update_event - update event type and data
822  * @event: the even to update
823  * @type: the type of event
824  * @length: the size of the event field in the ring buffer
825  *
826  * Update the type and data fields of the event. The length
827  * is the actual size that is written to the ring buffer,
828  * and with this, we can determine what to place into the
829  * data field.
830  */
831 static inline void
832 rb_update_event(struct ring_buffer_event *event,
833                          unsigned type, unsigned length)
834 {
835         event->type = type;
836
837         switch (type) {
838
839         case RINGBUF_TYPE_PADDING:
840                 break;
841
842         case RINGBUF_TYPE_TIME_EXTEND:
843                 event->len =
844                         (RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
845                         >> RB_ALIGNMENT_SHIFT;
846                 break;
847
848         case RINGBUF_TYPE_TIME_STAMP:
849                 event->len =
850                         (RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
851                         >> RB_ALIGNMENT_SHIFT;
852                 break;
853
854         case RINGBUF_TYPE_DATA:
855                 length -= RB_EVNT_HDR_SIZE;
856                 if (length > RB_MAX_SMALL_DATA) {
857                         event->len = 0;
858                         event->array[0] = length;
859                 } else
860                         event->len =
861                                 (length + (RB_ALIGNMENT-1))
862                                 >> RB_ALIGNMENT_SHIFT;
863                 break;
864         default:
865                 BUG();
866         }
867 }
868
869 static inline unsigned rb_calculate_event_length(unsigned length)
870 {
871         struct ring_buffer_event event; /* Used only for sizeof array */
872
873         /* zero length can cause confusions */
874         if (!length)
875                 length = 1;
876
877         if (length > RB_MAX_SMALL_DATA)
878                 length += sizeof(event.array[0]);
879
880         length += RB_EVNT_HDR_SIZE;
881         length = ALIGN(length, RB_ALIGNMENT);
882
883         return length;
884 }
885
886 static struct ring_buffer_event *
887 __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
888                   unsigned type, unsigned long length, u64 *ts)
889 {
890         struct buffer_page *tail_page, *head_page, *reader_page;
891         unsigned long tail, write;
892         struct ring_buffer *buffer = cpu_buffer->buffer;
893         struct ring_buffer_event *event;
894         unsigned long flags;
895
896         tail_page = cpu_buffer->tail_page;
897         write = local_add_return(length, &tail_page->write);
898         tail = write - length;
899
900         /* See if we shot pass the end of this buffer page */
901         if (write > BUF_PAGE_SIZE) {
902                 struct buffer_page *next_page = tail_page;
903
904                 local_irq_save(flags);
905                 __raw_spin_lock(&cpu_buffer->lock);
906
907                 rb_inc_page(cpu_buffer, &next_page);
908
909                 head_page = cpu_buffer->head_page;
910                 reader_page = cpu_buffer->reader_page;
911
912                 /* we grabbed the lock before incrementing */
913                 if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
914                         goto out_unlock;
915
916                 /*
917                  * If for some reason, we had an interrupt storm that made
918                  * it all the way around the buffer, bail, and warn
919                  * about it.
920                  */
921                 if (unlikely(next_page == cpu_buffer->commit_page)) {
922                         WARN_ON_ONCE(1);
923                         goto out_unlock;
924                 }
925
926                 if (next_page == head_page) {
927                         if (!(buffer->flags & RB_FL_OVERWRITE)) {
928                                 /* reset write */
929                                 if (tail <= BUF_PAGE_SIZE)
930                                         local_set(&tail_page->write, tail);
931                                 goto out_unlock;
932                         }
933
934                         /* tail_page has not moved yet? */
935                         if (tail_page == cpu_buffer->tail_page) {
936                                 /* count overflows */
937                                 rb_update_overflow(cpu_buffer);
938
939                                 rb_inc_page(cpu_buffer, &head_page);
940                                 cpu_buffer->head_page = head_page;
941                                 cpu_buffer->head_page->read = 0;
942                         }
943                 }
944
945                 /*
946                  * If the tail page is still the same as what we think
947                  * it is, then it is up to us to update the tail
948                  * pointer.
949                  */
950                 if (tail_page == cpu_buffer->tail_page) {
951                         local_set(&next_page->write, 0);
952                         local_set(&next_page->commit, 0);
953                         cpu_buffer->tail_page = next_page;
954
955                         /* reread the time stamp */
956                         *ts = ring_buffer_time_stamp(cpu_buffer->cpu);
957                         cpu_buffer->tail_page->time_stamp = *ts;
958                 }
959
960                 /*
961                  * The actual tail page has moved forward.
962                  */
963                 if (tail < BUF_PAGE_SIZE) {
964                         /* Mark the rest of the page with padding */
965                         event = __rb_page_index(tail_page, tail);
966                         event->type = RINGBUF_TYPE_PADDING;
967                 }
968
969                 if (tail <= BUF_PAGE_SIZE)
970                         /* Set the write back to the previous setting */
971                         local_set(&tail_page->write, tail);
972
973                 /*
974                  * If this was a commit entry that failed,
975                  * increment that too
976                  */
977                 if (tail_page == cpu_buffer->commit_page &&
978                     tail == rb_commit_index(cpu_buffer)) {
979                         rb_set_commit_to_write(cpu_buffer);
980                 }
981
982                 __raw_spin_unlock(&cpu_buffer->lock);
983                 local_irq_restore(flags);
984
985                 /* fail and let the caller try again */
986                 return ERR_PTR(-EAGAIN);
987         }
988
989         /* We reserved something on the buffer */
990
991         if (RB_WARN_ON(cpu_buffer, write > BUF_PAGE_SIZE))
992                 return NULL;
993
994         event = __rb_page_index(tail_page, tail);
995         rb_update_event(event, type, length);
996
997         /*
998          * If this is a commit and the tail is zero, then update
999          * this page's time stamp.
1000          */
1001         if (!tail && rb_is_commit(cpu_buffer, event))
1002                 cpu_buffer->commit_page->time_stamp = *ts;
1003
1004         return event;
1005
1006  out_unlock:
1007         __raw_spin_unlock(&cpu_buffer->lock);
1008         local_irq_restore(flags);
1009         return NULL;
1010 }
1011
1012 static int
1013 rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1014                   u64 *ts, u64 *delta)
1015 {
1016         struct ring_buffer_event *event;
1017         static int once;
1018         int ret;
1019
1020         if (unlikely(*delta > (1ULL << 59) && !once++)) {
1021                 printk(KERN_WARNING "Delta way too big! %llu"
1022                        " ts=%llu write stamp = %llu\n",
1023                        (unsigned long long)*delta,
1024                        (unsigned long long)*ts,
1025                        (unsigned long long)cpu_buffer->write_stamp);
1026                 WARN_ON(1);
1027         }
1028
1029         /*
1030          * The delta is too big, we to add a
1031          * new timestamp.
1032          */
1033         event = __rb_reserve_next(cpu_buffer,
1034                                   RINGBUF_TYPE_TIME_EXTEND,
1035                                   RB_LEN_TIME_EXTEND,
1036                                   ts);
1037         if (!event)
1038                 return -EBUSY;
1039
1040         if (PTR_ERR(event) == -EAGAIN)
1041                 return -EAGAIN;
1042
1043         /* Only a commited time event can update the write stamp */
1044         if (rb_is_commit(cpu_buffer, event)) {
1045                 /*
1046                  * If this is the first on the page, then we need to
1047                  * update the page itself, and just put in a zero.
1048                  */
1049                 if (rb_event_index(event)) {
1050                         event->time_delta = *delta & TS_MASK;
1051                         event->array[0] = *delta >> TS_SHIFT;
1052                 } else {
1053                         cpu_buffer->commit_page->time_stamp = *ts;
1054                         event->time_delta = 0;
1055                         event->array[0] = 0;
1056                 }
1057                 cpu_buffer->write_stamp = *ts;
1058                 /* let the caller know this was the commit */
1059                 ret = 1;
1060         } else {
1061                 /* Darn, this is just wasted space */
1062                 event->time_delta = 0;
1063                 event->array[0] = 0;
1064                 ret = 0;
1065         }
1066
1067         *delta = 0;
1068
1069         return ret;
1070 }
1071
1072 static struct ring_buffer_event *
1073 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1074                       unsigned type, unsigned long length)
1075 {
1076         struct ring_buffer_event *event;
1077         u64 ts, delta;
1078         int commit = 0;
1079         int nr_loops = 0;
1080
1081  again:
1082         /*
1083          * We allow for interrupts to reenter here and do a trace.
1084          * If one does, it will cause this original code to loop
1085          * back here. Even with heavy interrupts happening, this
1086          * should only happen a few times in a row. If this happens
1087          * 1000 times in a row, there must be either an interrupt
1088          * storm or we have something buggy.
1089          * Bail!
1090          */
1091         if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
1092                 return NULL;
1093
1094         ts = ring_buffer_time_stamp(cpu_buffer->cpu);
1095
1096         /*
1097          * Only the first commit can update the timestamp.
1098          * Yes there is a race here. If an interrupt comes in
1099          * just after the conditional and it traces too, then it
1100          * will also check the deltas. More than one timestamp may
1101          * also be made. But only the entry that did the actual
1102          * commit will be something other than zero.
1103          */
1104         if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
1105             rb_page_write(cpu_buffer->tail_page) ==
1106             rb_commit_index(cpu_buffer)) {
1107
1108                 delta = ts - cpu_buffer->write_stamp;
1109
1110                 /* make sure this delta is calculated here */
1111                 barrier();
1112
1113                 /* Did the write stamp get updated already? */
1114                 if (unlikely(ts < cpu_buffer->write_stamp))
1115                         delta = 0;
1116
1117                 if (test_time_stamp(delta)) {
1118
1119                         commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
1120
1121                         if (commit == -EBUSY)
1122                                 return NULL;
1123
1124                         if (commit == -EAGAIN)
1125                                 goto again;
1126
1127                         RB_WARN_ON(cpu_buffer, commit < 0);
1128                 }
1129         } else
1130                 /* Non commits have zero deltas */
1131                 delta = 0;
1132
1133         event = __rb_reserve_next(cpu_buffer, type, length, &ts);
1134         if (PTR_ERR(event) == -EAGAIN)
1135                 goto again;
1136
1137         if (!event) {
1138                 if (unlikely(commit))
1139                         /*
1140                          * Ouch! We needed a timestamp and it was commited. But
1141                          * we didn't get our event reserved.
1142                          */
1143                         rb_set_commit_to_write(cpu_buffer);
1144                 return NULL;
1145         }
1146
1147         /*
1148          * If the timestamp was commited, make the commit our entry
1149          * now so that we will update it when needed.
1150          */
1151         if (commit)
1152                 rb_set_commit_event(cpu_buffer, event);
1153         else if (!rb_is_commit(cpu_buffer, event))
1154                 delta = 0;
1155
1156         event->time_delta = delta;
1157
1158         return event;
1159 }
1160
1161 static DEFINE_PER_CPU(int, rb_need_resched);
1162
1163 /**
1164  * ring_buffer_lock_reserve - reserve a part of the buffer
1165  * @buffer: the ring buffer to reserve from
1166  * @length: the length of the data to reserve (excluding event header)
1167  * @flags: a pointer to save the interrupt flags
1168  *
1169  * Returns a reseverd event on the ring buffer to copy directly to.
1170  * The user of this interface will need to get the body to write into
1171  * and can use the ring_buffer_event_data() interface.
1172  *
1173  * The length is the length of the data needed, not the event length
1174  * which also includes the event header.
1175  *
1176  * Must be paired with ring_buffer_unlock_commit, unless NULL is returned.
1177  * If NULL is returned, then nothing has been allocated or locked.
1178  */
1179 struct ring_buffer_event *
1180 ring_buffer_lock_reserve(struct ring_buffer *buffer,
1181                          unsigned long length,
1182                          unsigned long *flags)
1183 {
1184         struct ring_buffer_per_cpu *cpu_buffer;
1185         struct ring_buffer_event *event;
1186         int cpu, resched;
1187
1188         if (ring_buffers_off)
1189                 return NULL;
1190
1191         if (atomic_read(&buffer->record_disabled))
1192                 return NULL;
1193
1194         /* If we are tracing schedule, we don't want to recurse */
1195         resched = ftrace_preempt_disable();
1196
1197         cpu = raw_smp_processor_id();
1198
1199         if (!cpu_isset(cpu, buffer->cpumask))
1200                 goto out;
1201
1202         cpu_buffer = buffer->buffers[cpu];
1203
1204         if (atomic_read(&cpu_buffer->record_disabled))
1205                 goto out;
1206
1207         length = rb_calculate_event_length(length);
1208         if (length > BUF_PAGE_SIZE)
1209                 goto out;
1210
1211         event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
1212         if (!event)
1213                 goto out;
1214
1215         /*
1216          * Need to store resched state on this cpu.
1217          * Only the first needs to.
1218          */
1219
1220         if (preempt_count() == 1)
1221                 per_cpu(rb_need_resched, cpu) = resched;
1222
1223         return event;
1224
1225  out:
1226         ftrace_preempt_enable(resched);
1227         return NULL;
1228 }
1229
1230 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
1231                       struct ring_buffer_event *event)
1232 {
1233         cpu_buffer->entries++;
1234
1235         /* Only process further if we own the commit */
1236         if (!rb_is_commit(cpu_buffer, event))
1237                 return;
1238
1239         cpu_buffer->write_stamp += event->time_delta;
1240
1241         rb_set_commit_to_write(cpu_buffer);
1242 }
1243
1244 /**
1245  * ring_buffer_unlock_commit - commit a reserved
1246  * @buffer: The buffer to commit to
1247  * @event: The event pointer to commit.
1248  * @flags: the interrupt flags received from ring_buffer_lock_reserve.
1249  *
1250  * This commits the data to the ring buffer, and releases any locks held.
1251  *
1252  * Must be paired with ring_buffer_lock_reserve.
1253  */
1254 int ring_buffer_unlock_commit(struct ring_buffer *buffer,
1255                               struct ring_buffer_event *event,
1256                               unsigned long flags)
1257 {
1258         struct ring_buffer_per_cpu *cpu_buffer;
1259         int cpu = raw_smp_processor_id();
1260
1261         cpu_buffer = buffer->buffers[cpu];
1262
1263         rb_commit(cpu_buffer, event);
1264
1265         /*
1266          * Only the last preempt count needs to restore preemption.
1267          */
1268         if (preempt_count() == 1)
1269                 ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
1270         else
1271                 preempt_enable_no_resched_notrace();
1272
1273         return 0;
1274 }
1275
1276 /**
1277  * ring_buffer_write - write data to the buffer without reserving
1278  * @buffer: The ring buffer to write to.
1279  * @length: The length of the data being written (excluding the event header)
1280  * @data: The data to write to the buffer.
1281  *
1282  * This is like ring_buffer_lock_reserve and ring_buffer_unlock_commit as
1283  * one function. If you already have the data to write to the buffer, it
1284  * may be easier to simply call this function.
1285  *
1286  * Note, like ring_buffer_lock_reserve, the length is the length of the data
1287  * and not the length of the event which would hold the header.
1288  */
1289 int ring_buffer_write(struct ring_buffer *buffer,
1290                         unsigned long length,
1291                         void *data)
1292 {
1293         struct ring_buffer_per_cpu *cpu_buffer;
1294         struct ring_buffer_event *event;
1295         unsigned long event_length;
1296         void *body;
1297         int ret = -EBUSY;
1298         int cpu, resched;
1299
1300         if (ring_buffers_off)
1301                 return -EBUSY;
1302
1303         if (atomic_read(&buffer->record_disabled))
1304                 return -EBUSY;
1305
1306         resched = ftrace_preempt_disable();
1307
1308         cpu = raw_smp_processor_id();
1309
1310         if (!cpu_isset(cpu, buffer->cpumask))
1311                 goto out;
1312
1313         cpu_buffer = buffer->buffers[cpu];
1314
1315         if (atomic_read(&cpu_buffer->record_disabled))
1316                 goto out;
1317
1318         event_length = rb_calculate_event_length(length);
1319         event = rb_reserve_next_event(cpu_buffer,
1320                                       RINGBUF_TYPE_DATA, event_length);
1321         if (!event)
1322                 goto out;
1323
1324         body = rb_event_data(event);
1325
1326         memcpy(body, data, length);
1327
1328         rb_commit(cpu_buffer, event);
1329
1330         ret = 0;
1331  out:
1332         ftrace_preempt_enable(resched);
1333
1334         return ret;
1335 }
1336
1337 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
1338 {
1339         struct buffer_page *reader = cpu_buffer->reader_page;
1340         struct buffer_page *head = cpu_buffer->head_page;
1341         struct buffer_page *commit = cpu_buffer->commit_page;
1342
1343         return reader->read == rb_page_commit(reader) &&
1344                 (commit == reader ||
1345                  (commit == head &&
1346                   head->read == rb_page_commit(commit)));
1347 }
1348
1349 /**
1350  * ring_buffer_record_disable - stop all writes into the buffer
1351  * @buffer: The ring buffer to stop writes to.
1352  *
1353  * This prevents all writes to the buffer. Any attempt to write
1354  * to the buffer after this will fail and return NULL.
1355  *
1356  * The caller should call synchronize_sched() after this.
1357  */
1358 void ring_buffer_record_disable(struct ring_buffer *buffer)
1359 {
1360         atomic_inc(&buffer->record_disabled);
1361 }
1362
1363 /**
1364  * ring_buffer_record_enable - enable writes to the buffer
1365  * @buffer: The ring buffer to enable writes
1366  *
1367  * Note, multiple disables will need the same number of enables
1368  * to truely enable the writing (much like preempt_disable).
1369  */
1370 void ring_buffer_record_enable(struct ring_buffer *buffer)
1371 {
1372         atomic_dec(&buffer->record_disabled);
1373 }
1374
1375 /**
1376  * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer
1377  * @buffer: The ring buffer to stop writes to.
1378  * @cpu: The CPU buffer to stop
1379  *
1380  * This prevents all writes to the buffer. Any attempt to write
1381  * to the buffer after this will fail and return NULL.
1382  *
1383  * The caller should call synchronize_sched() after this.
1384  */
1385 void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
1386 {
1387         struct ring_buffer_per_cpu *cpu_buffer;
1388
1389         if (!cpu_isset(cpu, buffer->cpumask))
1390                 return;
1391
1392         cpu_buffer = buffer->buffers[cpu];
1393         atomic_inc(&cpu_buffer->record_disabled);
1394 }
1395
1396 /**
1397  * ring_buffer_record_enable_cpu - enable writes to the buffer
1398  * @buffer: The ring buffer to enable writes
1399  * @cpu: The CPU to enable.
1400  *
1401  * Note, multiple disables will need the same number of enables
1402  * to truely enable the writing (much like preempt_disable).
1403  */
1404 void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
1405 {
1406         struct ring_buffer_per_cpu *cpu_buffer;
1407
1408         if (!cpu_isset(cpu, buffer->cpumask))
1409                 return;
1410
1411         cpu_buffer = buffer->buffers[cpu];
1412         atomic_dec(&cpu_buffer->record_disabled);
1413 }
1414
1415 /**
1416  * ring_buffer_entries_cpu - get the number of entries in a cpu buffer
1417  * @buffer: The ring buffer
1418  * @cpu: The per CPU buffer to get the entries from.
1419  */
1420 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
1421 {
1422         struct ring_buffer_per_cpu *cpu_buffer;
1423
1424         if (!cpu_isset(cpu, buffer->cpumask))
1425                 return 0;
1426
1427         cpu_buffer = buffer->buffers[cpu];
1428         return cpu_buffer->entries;
1429 }
1430
1431 /**
1432  * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer
1433  * @buffer: The ring buffer
1434  * @cpu: The per CPU buffer to get the number of overruns from
1435  */
1436 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
1437 {
1438         struct ring_buffer_per_cpu *cpu_buffer;
1439
1440         if (!cpu_isset(cpu, buffer->cpumask))
1441                 return 0;
1442
1443         cpu_buffer = buffer->buffers[cpu];
1444         return cpu_buffer->overrun;
1445 }
1446
1447 /**
1448  * ring_buffer_entries - get the number of entries in a buffer
1449  * @buffer: The ring buffer
1450  *
1451  * Returns the total number of entries in the ring buffer
1452  * (all CPU entries)
1453  */
1454 unsigned long ring_buffer_entries(struct ring_buffer *buffer)
1455 {
1456         struct ring_buffer_per_cpu *cpu_buffer;
1457         unsigned long entries = 0;
1458         int cpu;
1459
1460         /* if you care about this being correct, lock the buffer */
1461         for_each_buffer_cpu(buffer, cpu) {
1462                 cpu_buffer = buffer->buffers[cpu];
1463                 entries += cpu_buffer->entries;
1464         }
1465
1466         return entries;
1467 }
1468
1469 /**
1470  * ring_buffer_overrun_cpu - get the number of overruns in buffer
1471  * @buffer: The ring buffer
1472  *
1473  * Returns the total number of overruns in the ring buffer
1474  * (all CPU entries)
1475  */
1476 unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
1477 {
1478         struct ring_buffer_per_cpu *cpu_buffer;
1479         unsigned long overruns = 0;
1480         int cpu;
1481
1482         /* if you care about this being correct, lock the buffer */
1483         for_each_buffer_cpu(buffer, cpu) {
1484                 cpu_buffer = buffer->buffers[cpu];
1485                 overruns += cpu_buffer->overrun;
1486         }
1487
1488         return overruns;
1489 }
1490
1491 static void rb_iter_reset(struct ring_buffer_iter *iter)
1492 {
1493         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1494
1495         /* Iterator usage is expected to have record disabled */
1496         if (list_empty(&cpu_buffer->reader_page->list)) {
1497                 iter->head_page = cpu_buffer->head_page;
1498                 iter->head = cpu_buffer->head_page->read;
1499         } else {
1500                 iter->head_page = cpu_buffer->reader_page;
1501                 iter->head = cpu_buffer->reader_page->read;
1502         }
1503         if (iter->head)
1504                 iter->read_stamp = cpu_buffer->read_stamp;
1505         else
1506                 iter->read_stamp = iter->head_page->time_stamp;
1507 }
1508
1509 /**
1510  * ring_buffer_iter_reset - reset an iterator
1511  * @iter: The iterator to reset
1512  *
1513  * Resets the iterator, so that it will start from the beginning
1514  * again.
1515  */
1516 void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
1517 {
1518         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1519         unsigned long flags;
1520
1521         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1522         rb_iter_reset(iter);
1523         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1524 }
1525
1526 /**
1527  * ring_buffer_iter_empty - check if an iterator has no more to read
1528  * @iter: The iterator to check
1529  */
1530 int ring_buffer_iter_empty(struct ring_buffer_iter *iter)
1531 {
1532         struct ring_buffer_per_cpu *cpu_buffer;
1533
1534         cpu_buffer = iter->cpu_buffer;
1535
1536         return iter->head_page == cpu_buffer->commit_page &&
1537                 iter->head == rb_commit_index(cpu_buffer);
1538 }
1539
1540 static void
1541 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
1542                      struct ring_buffer_event *event)
1543 {
1544         u64 delta;
1545
1546         switch (event->type) {
1547         case RINGBUF_TYPE_PADDING:
1548                 return;
1549
1550         case RINGBUF_TYPE_TIME_EXTEND:
1551                 delta = event->array[0];
1552                 delta <<= TS_SHIFT;
1553                 delta += event->time_delta;
1554                 cpu_buffer->read_stamp += delta;
1555                 return;
1556
1557         case RINGBUF_TYPE_TIME_STAMP:
1558                 /* FIXME: not implemented */
1559                 return;
1560
1561         case RINGBUF_TYPE_DATA:
1562                 cpu_buffer->read_stamp += event->time_delta;
1563                 return;
1564
1565         default:
1566                 BUG();
1567         }
1568         return;
1569 }
1570
1571 static void
1572 rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
1573                           struct ring_buffer_event *event)
1574 {
1575         u64 delta;
1576
1577         switch (event->type) {
1578         case RINGBUF_TYPE_PADDING:
1579                 return;
1580
1581         case RINGBUF_TYPE_TIME_EXTEND:
1582                 delta = event->array[0];
1583                 delta <<= TS_SHIFT;
1584                 delta += event->time_delta;
1585                 iter->read_stamp += delta;
1586                 return;
1587
1588         case RINGBUF_TYPE_TIME_STAMP:
1589                 /* FIXME: not implemented */
1590                 return;
1591
1592         case RINGBUF_TYPE_DATA:
1593                 iter->read_stamp += event->time_delta;
1594                 return;
1595
1596         default:
1597                 BUG();
1598         }
1599         return;
1600 }
1601
1602 static struct buffer_page *
1603 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
1604 {
1605         struct buffer_page *reader = NULL;
1606         unsigned long flags;
1607         int nr_loops = 0;
1608
1609         local_irq_save(flags);
1610         __raw_spin_lock(&cpu_buffer->lock);
1611
1612  again:
1613         /*
1614          * This should normally only loop twice. But because the
1615          * start of the reader inserts an empty page, it causes
1616          * a case where we will loop three times. There should be no
1617          * reason to loop four times (that I know of).
1618          */
1619         if (RB_WARN_ON(cpu_buffer, ++nr_loops > 3)) {
1620                 reader = NULL;
1621                 goto out;
1622         }
1623
1624         reader = cpu_buffer->reader_page;
1625
1626         /* If there's more to read, return this page */
1627         if (cpu_buffer->reader_page->read < rb_page_size(reader))
1628                 goto out;
1629
1630         /* Never should we have an index greater than the size */
1631         if (RB_WARN_ON(cpu_buffer,
1632                        cpu_buffer->reader_page->read > rb_page_size(reader)))
1633                 goto out;
1634
1635         /* check if we caught up to the tail */
1636         reader = NULL;
1637         if (cpu_buffer->commit_page == cpu_buffer->reader_page)
1638                 goto out;
1639
1640         /*
1641          * Splice the empty reader page into the list around the head.
1642          * Reset the reader page to size zero.
1643          */
1644
1645         reader = cpu_buffer->head_page;
1646         cpu_buffer->reader_page->list.next = reader->list.next;
1647         cpu_buffer->reader_page->list.prev = reader->list.prev;
1648
1649         local_set(&cpu_buffer->reader_page->write, 0);
1650         local_set(&cpu_buffer->reader_page->commit, 0);
1651
1652         /* Make the reader page now replace the head */
1653         reader->list.prev->next = &cpu_buffer->reader_page->list;
1654         reader->list.next->prev = &cpu_buffer->reader_page->list;
1655
1656         /*
1657          * If the tail is on the reader, then we must set the head
1658          * to the inserted page, otherwise we set it one before.
1659          */
1660         cpu_buffer->head_page = cpu_buffer->reader_page;
1661
1662         if (cpu_buffer->commit_page != reader)
1663                 rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
1664
1665         /* Finally update the reader page to the new head */
1666         cpu_buffer->reader_page = reader;
1667         rb_reset_reader_page(cpu_buffer);
1668
1669         goto again;
1670
1671  out:
1672         __raw_spin_unlock(&cpu_buffer->lock);
1673         local_irq_restore(flags);
1674
1675         return reader;
1676 }
1677
1678 static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
1679 {
1680         struct ring_buffer_event *event;
1681         struct buffer_page *reader;
1682         unsigned length;
1683
1684         reader = rb_get_reader_page(cpu_buffer);
1685
1686         /* This function should not be called when buffer is empty */
1687         if (RB_WARN_ON(cpu_buffer, !reader))
1688                 return;
1689
1690         event = rb_reader_event(cpu_buffer);
1691
1692         if (event->type == RINGBUF_TYPE_DATA)
1693                 cpu_buffer->entries--;
1694
1695         rb_update_read_stamp(cpu_buffer, event);
1696
1697         length = rb_event_length(event);
1698         cpu_buffer->reader_page->read += length;
1699 }
1700
1701 static void rb_advance_iter(struct ring_buffer_iter *iter)
1702 {
1703         struct ring_buffer *buffer;
1704         struct ring_buffer_per_cpu *cpu_buffer;
1705         struct ring_buffer_event *event;
1706         unsigned length;
1707
1708         cpu_buffer = iter->cpu_buffer;
1709         buffer = cpu_buffer->buffer;
1710
1711         /*
1712          * Check if we are at the end of the buffer.
1713          */
1714         if (iter->head >= rb_page_size(iter->head_page)) {
1715                 if (RB_WARN_ON(buffer,
1716                                iter->head_page == cpu_buffer->commit_page))
1717                         return;
1718                 rb_inc_iter(iter);
1719                 return;
1720         }
1721
1722         event = rb_iter_head_event(iter);
1723
1724         length = rb_event_length(event);
1725
1726         /*
1727          * This should not be called to advance the header if we are
1728          * at the tail of the buffer.
1729          */
1730         if (RB_WARN_ON(cpu_buffer,
1731                        (iter->head_page == cpu_buffer->commit_page) &&
1732                        (iter->head + length > rb_commit_index(cpu_buffer))))
1733                 return;
1734
1735         rb_update_iter_read_stamp(iter, event);
1736
1737         iter->head += length;
1738
1739         /* check for end of page padding */
1740         if ((iter->head >= rb_page_size(iter->head_page)) &&
1741             (iter->head_page != cpu_buffer->commit_page))
1742                 rb_advance_iter(iter);
1743 }
1744
1745 static struct ring_buffer_event *
1746 rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1747 {
1748         struct ring_buffer_per_cpu *cpu_buffer;
1749         struct ring_buffer_event *event;
1750         struct buffer_page *reader;
1751         int nr_loops = 0;
1752
1753         if (!cpu_isset(cpu, buffer->cpumask))
1754                 return NULL;
1755
1756         cpu_buffer = buffer->buffers[cpu];
1757
1758  again:
1759         /*
1760          * We repeat when a timestamp is encountered. It is possible
1761          * to get multiple timestamps from an interrupt entering just
1762          * as one timestamp is about to be written. The max times
1763          * that this can happen is the number of nested interrupts we
1764          * can have.  Nesting 10 deep of interrupts is clearly
1765          * an anomaly.
1766          */
1767         if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1768                 return NULL;
1769
1770         reader = rb_get_reader_page(cpu_buffer);
1771         if (!reader)
1772                 return NULL;
1773
1774         event = rb_reader_event(cpu_buffer);
1775
1776         switch (event->type) {
1777         case RINGBUF_TYPE_PADDING:
1778                 RB_WARN_ON(cpu_buffer, 1);
1779                 rb_advance_reader(cpu_buffer);
1780                 return NULL;
1781
1782         case RINGBUF_TYPE_TIME_EXTEND:
1783                 /* Internal data, OK to advance */
1784                 rb_advance_reader(cpu_buffer);
1785                 goto again;
1786
1787         case RINGBUF_TYPE_TIME_STAMP:
1788                 /* FIXME: not implemented */
1789                 rb_advance_reader(cpu_buffer);
1790                 goto again;
1791
1792         case RINGBUF_TYPE_DATA:
1793                 if (ts) {
1794                         *ts = cpu_buffer->read_stamp + event->time_delta;
1795                         ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1796                 }
1797                 return event;
1798
1799         default:
1800                 BUG();
1801         }
1802
1803         return NULL;
1804 }
1805
1806 static struct ring_buffer_event *
1807 rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1808 {
1809         struct ring_buffer *buffer;
1810         struct ring_buffer_per_cpu *cpu_buffer;
1811         struct ring_buffer_event *event;
1812         int nr_loops = 0;
1813
1814         if (ring_buffer_iter_empty(iter))
1815                 return NULL;
1816
1817         cpu_buffer = iter->cpu_buffer;
1818         buffer = cpu_buffer->buffer;
1819
1820  again:
1821         /*
1822          * We repeat when a timestamp is encountered. It is possible
1823          * to get multiple timestamps from an interrupt entering just
1824          * as one timestamp is about to be written. The max times
1825          * that this can happen is the number of nested interrupts we
1826          * can have. Nesting 10 deep of interrupts is clearly
1827          * an anomaly.
1828          */
1829         if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
1830                 return NULL;
1831
1832         if (rb_per_cpu_empty(cpu_buffer))
1833                 return NULL;
1834
1835         event = rb_iter_head_event(iter);
1836
1837         switch (event->type) {
1838         case RINGBUF_TYPE_PADDING:
1839                 rb_inc_iter(iter);
1840                 goto again;
1841
1842         case RINGBUF_TYPE_TIME_EXTEND:
1843                 /* Internal data, OK to advance */
1844                 rb_advance_iter(iter);
1845                 goto again;
1846
1847         case RINGBUF_TYPE_TIME_STAMP:
1848                 /* FIXME: not implemented */
1849                 rb_advance_iter(iter);
1850                 goto again;
1851
1852         case RINGBUF_TYPE_DATA:
1853                 if (ts) {
1854                         *ts = iter->read_stamp + event->time_delta;
1855                         ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
1856                 }
1857                 return event;
1858
1859         default:
1860                 BUG();
1861         }
1862
1863         return NULL;
1864 }
1865
1866 /**
1867  * ring_buffer_peek - peek at the next event to be read
1868  * @buffer: The ring buffer to read
1869  * @cpu: The cpu to peak at
1870  * @ts: The timestamp counter of this event.
1871  *
1872  * This will return the event that will be read next, but does
1873  * not consume the data.
1874  */
1875 struct ring_buffer_event *
1876 ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
1877 {
1878         struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1879         struct ring_buffer_event *event;
1880         unsigned long flags;
1881
1882         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1883         event = rb_buffer_peek(buffer, cpu, ts);
1884         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1885
1886         return event;
1887 }
1888
1889 /**
1890  * ring_buffer_iter_peek - peek at the next event to be read
1891  * @iter: The ring buffer iterator
1892  * @ts: The timestamp counter of this event.
1893  *
1894  * This will return the event that will be read next, but does
1895  * not increment the iterator.
1896  */
1897 struct ring_buffer_event *
1898 ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
1899 {
1900         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1901         struct ring_buffer_event *event;
1902         unsigned long flags;
1903
1904         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1905         event = rb_iter_peek(iter, ts);
1906         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1907
1908         return event;
1909 }
1910
1911 /**
1912  * ring_buffer_consume - return an event and consume it
1913  * @buffer: The ring buffer to get the next event from
1914  *
1915  * Returns the next event in the ring buffer, and that event is consumed.
1916  * Meaning, that sequential reads will keep returning a different event,
1917  * and eventually empty the ring buffer if the producer is slower.
1918  */
1919 struct ring_buffer_event *
1920 ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
1921 {
1922         struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
1923         struct ring_buffer_event *event;
1924         unsigned long flags;
1925
1926         if (!cpu_isset(cpu, buffer->cpumask))
1927                 return NULL;
1928
1929         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1930
1931         event = rb_buffer_peek(buffer, cpu, ts);
1932         if (!event)
1933                 goto out;
1934
1935         rb_advance_reader(cpu_buffer);
1936
1937  out:
1938         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1939
1940         return event;
1941 }
1942
1943 /**
1944  * ring_buffer_read_start - start a non consuming read of the buffer
1945  * @buffer: The ring buffer to read from
1946  * @cpu: The cpu buffer to iterate over
1947  *
1948  * This starts up an iteration through the buffer. It also disables
1949  * the recording to the buffer until the reading is finished.
1950  * This prevents the reading from being corrupted. This is not
1951  * a consuming read, so a producer is not expected.
1952  *
1953  * Must be paired with ring_buffer_finish.
1954  */
1955 struct ring_buffer_iter *
1956 ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
1957 {
1958         struct ring_buffer_per_cpu *cpu_buffer;
1959         struct ring_buffer_iter *iter;
1960         unsigned long flags;
1961
1962         if (!cpu_isset(cpu, buffer->cpumask))
1963                 return NULL;
1964
1965         iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1966         if (!iter)
1967                 return NULL;
1968
1969         cpu_buffer = buffer->buffers[cpu];
1970
1971         iter->cpu_buffer = cpu_buffer;
1972
1973         atomic_inc(&cpu_buffer->record_disabled);
1974         synchronize_sched();
1975
1976         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
1977         __raw_spin_lock(&cpu_buffer->lock);
1978         rb_iter_reset(iter);
1979         __raw_spin_unlock(&cpu_buffer->lock);
1980         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
1981
1982         return iter;
1983 }
1984
1985 /**
1986  * ring_buffer_finish - finish reading the iterator of the buffer
1987  * @iter: The iterator retrieved by ring_buffer_start
1988  *
1989  * This re-enables the recording to the buffer, and frees the
1990  * iterator.
1991  */
1992 void
1993 ring_buffer_read_finish(struct ring_buffer_iter *iter)
1994 {
1995         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
1996
1997         atomic_dec(&cpu_buffer->record_disabled);
1998         kfree(iter);
1999 }
2000
2001 /**
2002  * ring_buffer_read - read the next item in the ring buffer by the iterator
2003  * @iter: The ring buffer iterator
2004  * @ts: The time stamp of the event read.
2005  *
2006  * This reads the next event in the ring buffer and increments the iterator.
2007  */
2008 struct ring_buffer_event *
2009 ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
2010 {
2011         struct ring_buffer_event *event;
2012         struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
2013         unsigned long flags;
2014
2015         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2016         event = rb_iter_peek(iter, ts);
2017         if (!event)
2018                 goto out;
2019
2020         rb_advance_iter(iter);
2021  out:
2022         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2023
2024         return event;
2025 }
2026
2027 /**
2028  * ring_buffer_size - return the size of the ring buffer (in bytes)
2029  * @buffer: The ring buffer.
2030  */
2031 unsigned long ring_buffer_size(struct ring_buffer *buffer)
2032 {
2033         return BUF_PAGE_SIZE * buffer->pages;
2034 }
2035
2036 static void
2037 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
2038 {
2039         cpu_buffer->head_page
2040                 = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
2041         local_set(&cpu_buffer->head_page->write, 0);
2042         local_set(&cpu_buffer->head_page->commit, 0);
2043
2044         cpu_buffer->head_page->read = 0;
2045
2046         cpu_buffer->tail_page = cpu_buffer->head_page;
2047         cpu_buffer->commit_page = cpu_buffer->head_page;
2048
2049         INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
2050         local_set(&cpu_buffer->reader_page->write, 0);
2051         local_set(&cpu_buffer->reader_page->commit, 0);
2052         cpu_buffer->reader_page->read = 0;
2053
2054         cpu_buffer->overrun = 0;
2055         cpu_buffer->entries = 0;
2056 }
2057
2058 /**
2059  * ring_buffer_reset_cpu - reset a ring buffer per CPU buffer
2060  * @buffer: The ring buffer to reset a per cpu buffer of
2061  * @cpu: The CPU buffer to be reset
2062  */
2063 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
2064 {
2065         struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
2066         unsigned long flags;
2067
2068         if (!cpu_isset(cpu, buffer->cpumask))
2069                 return;
2070
2071         spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
2072
2073         __raw_spin_lock(&cpu_buffer->lock);
2074
2075         rb_reset_cpu(cpu_buffer);
2076
2077         __raw_spin_unlock(&cpu_buffer->lock);
2078
2079         spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
2080 }
2081
2082 /**
2083  * ring_buffer_reset - reset a ring buffer
2084  * @buffer: The ring buffer to reset all cpu buffers
2085  */
2086 void ring_buffer_reset(struct ring_buffer *buffer)
2087 {
2088         int cpu;
2089
2090         for_each_buffer_cpu(buffer, cpu)
2091                 ring_buffer_reset_cpu(buffer, cpu);
2092 }
2093
2094 /**
2095  * rind_buffer_empty - is the ring buffer empty?
2096  * @buffer: The ring buffer to test
2097  */
2098 int ring_buffer_empty(struct ring_buffer *buffer)
2099 {
2100         struct ring_buffer_per_cpu *cpu_buffer;
2101         int cpu;
2102
2103         /* yes this is racy, but if you don't like the race, lock the buffer */
2104         for_each_buffer_cpu(buffer, cpu) {
2105                 cpu_buffer = buffer->buffers[cpu];
2106                 if (!rb_per_cpu_empty(cpu_buffer))
2107                         return 0;
2108         }
2109         return 1;
2110 }
2111
2112 /**
2113  * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty?
2114  * @buffer: The ring buffer
2115  * @cpu: The CPU buffer to test
2116  */
2117 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
2118 {
2119         struct ring_buffer_per_cpu *cpu_buffer;
2120
2121         if (!cpu_isset(cpu, buffer->cpumask))
2122                 return 1;
2123
2124         cpu_buffer = buffer->buffers[cpu];
2125         return rb_per_cpu_empty(cpu_buffer);
2126 }
2127
2128 /**
2129  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
2130  * @buffer_a: One buffer to swap with
2131  * @buffer_b: The other buffer to swap with
2132  *
2133  * This function is useful for tracers that want to take a "snapshot"
2134  * of a CPU buffer and has another back up buffer lying around.
2135  * it is expected that the tracer handles the cpu buffer not being
2136  * used at the moment.
2137  */
2138 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
2139                          struct ring_buffer *buffer_b, int cpu)
2140 {
2141         struct ring_buffer_per_cpu *cpu_buffer_a;
2142         struct ring_buffer_per_cpu *cpu_buffer_b;
2143
2144         if (!cpu_isset(cpu, buffer_a->cpumask) ||
2145             !cpu_isset(cpu, buffer_b->cpumask))
2146                 return -EINVAL;
2147
2148         /* At least make sure the two buffers are somewhat the same */
2149         if (buffer_a->size != buffer_b->size ||
2150             buffer_a->pages != buffer_b->pages)
2151                 return -EINVAL;
2152
2153         cpu_buffer_a = buffer_a->buffers[cpu];
2154         cpu_buffer_b = buffer_b->buffers[cpu];
2155
2156         /*
2157          * We can't do a synchronize_sched here because this
2158          * function can be called in atomic context.
2159          * Normally this will be called from the same CPU as cpu.
2160          * If not it's up to the caller to protect this.
2161          */
2162         atomic_inc(&cpu_buffer_a->record_disabled);
2163         atomic_inc(&cpu_buffer_b->record_disabled);
2164
2165         buffer_a->buffers[cpu] = cpu_buffer_b;
2166         buffer_b->buffers[cpu] = cpu_buffer_a;
2167
2168         cpu_buffer_b->buffer = buffer_a;
2169         cpu_buffer_a->buffer = buffer_b;
2170
2171         atomic_dec(&cpu_buffer_a->record_disabled);
2172         atomic_dec(&cpu_buffer_b->record_disabled);
2173
2174         return 0;
2175 }
2176
2177 static ssize_t
2178 rb_simple_read(struct file *filp, char __user *ubuf,
2179                size_t cnt, loff_t *ppos)
2180 {
2181         int *p = filp->private_data;
2182         char buf[64];
2183         int r;
2184
2185         /* !ring_buffers_off == tracing_on */
2186         r = sprintf(buf, "%d\n", !*p);
2187
2188         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2189 }
2190
2191 static ssize_t
2192 rb_simple_write(struct file *filp, const char __user *ubuf,
2193                 size_t cnt, loff_t *ppos)
2194 {
2195         int *p = filp->private_data;
2196         char buf[64];
2197         long val;
2198         int ret;
2199
2200         if (cnt >= sizeof(buf))
2201                 return -EINVAL;
2202
2203         if (copy_from_user(&buf, ubuf, cnt))
2204                 return -EFAULT;
2205
2206         buf[cnt] = 0;
2207
2208         ret = strict_strtoul(buf, 10, &val);
2209         if (ret < 0)
2210                 return ret;
2211
2212         /* !ring_buffers_off == tracing_on */
2213         *p = !val;
2214
2215         (*ppos)++;
2216
2217         return cnt;
2218 }
2219
2220 static struct file_operations rb_simple_fops = {
2221         .open           = tracing_open_generic,
2222         .read           = rb_simple_read,
2223         .write          = rb_simple_write,
2224 };
2225
2226
2227 static __init int rb_init_debugfs(void)
2228 {
2229         struct dentry *d_tracer;
2230         struct dentry *entry;
2231
2232         d_tracer = tracing_init_dentry();
2233
2234         entry = debugfs_create_file("tracing_on", 0644, d_tracer,
2235                                     &ring_buffers_off, &rb_simple_fops);
2236         if (!entry)
2237                 pr_warning("Could not create debugfs 'tracing_on' entry\n");
2238
2239         return 0;
2240 }
2241
2242 fs_initcall(rb_init_debugfs);