Merge branch 'linus' into cpus4096
[linux-2.6] / block / blktrace.c
1 /*
2  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
16  *
17  */
18 #include <linux/kernel.h>
19 #include <linux/blkdev.h>
20 #include <linux/blktrace_api.h>
21 #include <linux/percpu.h>
22 #include <linux/init.h>
23 #include <linux/mutex.h>
24 #include <linux/debugfs.h>
25 #include <linux/time.h>
26 #include <trace/block.h>
27 #include <asm/uaccess.h>
28
29 static unsigned int blktrace_seq __read_mostly = 1;
30
31 /* Global reference count of probes */
32 static DEFINE_MUTEX(blk_probe_mutex);
33 static atomic_t blk_probes_ref = ATOMIC_INIT(0);
34
35 static int blk_register_tracepoints(void);
36 static void blk_unregister_tracepoints(void);
37
38 /*
39  * Send out a notify message.
40  */
41 static void trace_note(struct blk_trace *bt, pid_t pid, int action,
42                        const void *data, size_t len)
43 {
44         struct blk_io_trace *t;
45
46         t = relay_reserve(bt->rchan, sizeof(*t) + len);
47         if (t) {
48                 const int cpu = smp_processor_id();
49
50                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
51                 t->time = ktime_to_ns(ktime_get());
52                 t->device = bt->dev;
53                 t->action = action;
54                 t->pid = pid;
55                 t->cpu = cpu;
56                 t->pdu_len = len;
57                 memcpy((void *) t + sizeof(*t), data, len);
58         }
59 }
60
61 /*
62  * Send out a notify for this process, if we haven't done so since a trace
63  * started
64  */
65 static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
66 {
67         tsk->btrace_seq = blktrace_seq;
68         trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
69 }
70
71 static void trace_note_time(struct blk_trace *bt)
72 {
73         struct timespec now;
74         unsigned long flags;
75         u32 words[2];
76
77         getnstimeofday(&now);
78         words[0] = now.tv_sec;
79         words[1] = now.tv_nsec;
80
81         local_irq_save(flags);
82         trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
83         local_irq_restore(flags);
84 }
85
86 void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
87 {
88         int n;
89         va_list args;
90         unsigned long flags;
91         char *buf;
92
93         local_irq_save(flags);
94         buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
95         va_start(args, fmt);
96         n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
97         va_end(args);
98
99         trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
100         local_irq_restore(flags);
101 }
102 EXPORT_SYMBOL_GPL(__trace_note_message);
103
104 static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
105                          pid_t pid)
106 {
107         if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
108                 return 1;
109         if (sector < bt->start_lba || sector > bt->end_lba)
110                 return 1;
111         if (bt->pid && pid != bt->pid)
112                 return 1;
113
114         return 0;
115 }
116
117 /*
118  * Data direction bit lookup
119  */
120 static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
121
122 /* The ilog2() calls fall out because they're constant */
123 #define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
124           (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
125
126 /*
127  * The worker for the various blk_add_trace*() types. Fills out a
128  * blk_io_trace structure and places it in a per-cpu subbuffer.
129  */
130 static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
131                      int rw, u32 what, int error, int pdu_len, void *pdu_data)
132 {
133         struct task_struct *tsk = current;
134         struct blk_io_trace *t;
135         unsigned long flags;
136         unsigned long *sequence;
137         pid_t pid;
138         int cpu;
139
140         if (unlikely(bt->trace_state != Blktrace_running))
141                 return;
142
143         what |= ddir_act[rw & WRITE];
144         what |= MASK_TC_BIT(rw, BARRIER);
145         what |= MASK_TC_BIT(rw, SYNC);
146         what |= MASK_TC_BIT(rw, AHEAD);
147         what |= MASK_TC_BIT(rw, META);
148         what |= MASK_TC_BIT(rw, DISCARD);
149
150         pid = tsk->pid;
151         if (unlikely(act_log_check(bt, what, sector, pid)))
152                 return;
153
154         /*
155          * A word about the locking here - we disable interrupts to reserve
156          * some space in the relay per-cpu buffer, to prevent an irq
157          * from coming in and stepping on our toes.
158          */
159         local_irq_save(flags);
160
161         if (unlikely(tsk->btrace_seq != blktrace_seq))
162                 trace_note_tsk(bt, tsk);
163
164         t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
165         if (t) {
166                 cpu = smp_processor_id();
167                 sequence = per_cpu_ptr(bt->sequence, cpu);
168
169                 t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
170                 t->sequence = ++(*sequence);
171                 t->time = ktime_to_ns(ktime_get());
172                 t->sector = sector;
173                 t->bytes = bytes;
174                 t->action = what;
175                 t->pid = pid;
176                 t->device = bt->dev;
177                 t->cpu = cpu;
178                 t->error = error;
179                 t->pdu_len = pdu_len;
180
181                 if (pdu_len)
182                         memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
183         }
184
185         local_irq_restore(flags);
186 }
187
188 static struct dentry *blk_tree_root;
189 static DEFINE_MUTEX(blk_tree_mutex);
190 static unsigned int root_users;
191
192 static inline void blk_remove_root(void)
193 {
194         if (blk_tree_root) {
195                 debugfs_remove(blk_tree_root);
196                 blk_tree_root = NULL;
197         }
198 }
199
200 static void blk_remove_tree(struct dentry *dir)
201 {
202         mutex_lock(&blk_tree_mutex);
203         debugfs_remove(dir);
204         if (--root_users == 0)
205                 blk_remove_root();
206         mutex_unlock(&blk_tree_mutex);
207 }
208
209 static struct dentry *blk_create_tree(const char *blk_name)
210 {
211         struct dentry *dir = NULL;
212         int created = 0;
213
214         mutex_lock(&blk_tree_mutex);
215
216         if (!blk_tree_root) {
217                 blk_tree_root = debugfs_create_dir("block", NULL);
218                 if (!blk_tree_root)
219                         goto err;
220                 created = 1;
221         }
222
223         dir = debugfs_create_dir(blk_name, blk_tree_root);
224         if (dir)
225                 root_users++;
226         else {
227                 /* Delete root only if we created it */
228                 if (created)
229                         blk_remove_root();
230         }
231
232 err:
233         mutex_unlock(&blk_tree_mutex);
234         return dir;
235 }
236
237 static void blk_trace_cleanup(struct blk_trace *bt)
238 {
239         relay_close(bt->rchan);
240         debugfs_remove(bt->msg_file);
241         debugfs_remove(bt->dropped_file);
242         blk_remove_tree(bt->dir);
243         free_percpu(bt->sequence);
244         free_percpu(bt->msg_data);
245         kfree(bt);
246         mutex_lock(&blk_probe_mutex);
247         if (atomic_dec_and_test(&blk_probes_ref))
248                 blk_unregister_tracepoints();
249         mutex_unlock(&blk_probe_mutex);
250 }
251
252 int blk_trace_remove(struct request_queue *q)
253 {
254         struct blk_trace *bt;
255
256         bt = xchg(&q->blk_trace, NULL);
257         if (!bt)
258                 return -EINVAL;
259
260         if (bt->trace_state == Blktrace_setup ||
261             bt->trace_state == Blktrace_stopped)
262                 blk_trace_cleanup(bt);
263
264         return 0;
265 }
266 EXPORT_SYMBOL_GPL(blk_trace_remove);
267
268 static int blk_dropped_open(struct inode *inode, struct file *filp)
269 {
270         filp->private_data = inode->i_private;
271
272         return 0;
273 }
274
275 static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
276                                 size_t count, loff_t *ppos)
277 {
278         struct blk_trace *bt = filp->private_data;
279         char buf[16];
280
281         snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
282
283         return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
284 }
285
286 static const struct file_operations blk_dropped_fops = {
287         .owner =        THIS_MODULE,
288         .open =         blk_dropped_open,
289         .read =         blk_dropped_read,
290 };
291
292 static int blk_msg_open(struct inode *inode, struct file *filp)
293 {
294         filp->private_data = inode->i_private;
295
296         return 0;
297 }
298
299 static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
300                                 size_t count, loff_t *ppos)
301 {
302         char *msg;
303         struct blk_trace *bt;
304
305         if (count > BLK_TN_MAX_MSG)
306                 return -EINVAL;
307
308         msg = kmalloc(count, GFP_KERNEL);
309         if (msg == NULL)
310                 return -ENOMEM;
311
312         if (copy_from_user(msg, buffer, count)) {
313                 kfree(msg);
314                 return -EFAULT;
315         }
316
317         bt = filp->private_data;
318         __trace_note_message(bt, "%s", msg);
319         kfree(msg);
320
321         return count;
322 }
323
324 static const struct file_operations blk_msg_fops = {
325         .owner =        THIS_MODULE,
326         .open =         blk_msg_open,
327         .write =        blk_msg_write,
328 };
329
330 /*
331  * Keep track of how many times we encountered a full subbuffer, to aid
332  * the user space app in telling how many lost events there were.
333  */
334 static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
335                                      void *prev_subbuf, size_t prev_padding)
336 {
337         struct blk_trace *bt;
338
339         if (!relay_buf_full(buf))
340                 return 1;
341
342         bt = buf->chan->private_data;
343         atomic_inc(&bt->dropped);
344         return 0;
345 }
346
347 static int blk_remove_buf_file_callback(struct dentry *dentry)
348 {
349         debugfs_remove(dentry);
350         return 0;
351 }
352
353 static struct dentry *blk_create_buf_file_callback(const char *filename,
354                                                    struct dentry *parent,
355                                                    int mode,
356                                                    struct rchan_buf *buf,
357                                                    int *is_global)
358 {
359         return debugfs_create_file(filename, mode, parent, buf,
360                                         &relay_file_operations);
361 }
362
363 static struct rchan_callbacks blk_relay_callbacks = {
364         .subbuf_start           = blk_subbuf_start_callback,
365         .create_buf_file        = blk_create_buf_file_callback,
366         .remove_buf_file        = blk_remove_buf_file_callback,
367 };
368
369 /*
370  * Setup everything required to start tracing
371  */
372 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
373                         struct blk_user_trace_setup *buts)
374 {
375         struct blk_trace *old_bt, *bt = NULL;
376         struct dentry *dir = NULL;
377         int ret, i;
378
379         if (!buts->buf_size || !buts->buf_nr)
380                 return -EINVAL;
381
382         strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
383         buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
384
385         /*
386          * some device names have larger paths - convert the slashes
387          * to underscores for this to work as expected
388          */
389         for (i = 0; i < strlen(buts->name); i++)
390                 if (buts->name[i] == '/')
391                         buts->name[i] = '_';
392
393         ret = -ENOMEM;
394         bt = kzalloc(sizeof(*bt), GFP_KERNEL);
395         if (!bt)
396                 goto err;
397
398         bt->sequence = alloc_percpu(unsigned long);
399         if (!bt->sequence)
400                 goto err;
401
402         bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
403         if (!bt->msg_data)
404                 goto err;
405
406         ret = -ENOENT;
407         dir = blk_create_tree(buts->name);
408         if (!dir)
409                 goto err;
410
411         bt->dir = dir;
412         bt->dev = dev;
413         atomic_set(&bt->dropped, 0);
414
415         ret = -EIO;
416         bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
417         if (!bt->dropped_file)
418                 goto err;
419
420         bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
421         if (!bt->msg_file)
422                 goto err;
423
424         bt->rchan = relay_open("trace", dir, buts->buf_size,
425                                 buts->buf_nr, &blk_relay_callbacks, bt);
426         if (!bt->rchan)
427                 goto err;
428
429         bt->act_mask = buts->act_mask;
430         if (!bt->act_mask)
431                 bt->act_mask = (u16) -1;
432
433         bt->start_lba = buts->start_lba;
434         bt->end_lba = buts->end_lba;
435         if (!bt->end_lba)
436                 bt->end_lba = -1ULL;
437
438         bt->pid = buts->pid;
439         bt->trace_state = Blktrace_setup;
440
441         mutex_lock(&blk_probe_mutex);
442         if (atomic_add_return(1, &blk_probes_ref) == 1) {
443                 ret = blk_register_tracepoints();
444                 if (ret)
445                         goto probe_err;
446         }
447         mutex_unlock(&blk_probe_mutex);
448
449         ret = -EBUSY;
450         old_bt = xchg(&q->blk_trace, bt);
451         if (old_bt) {
452                 (void) xchg(&q->blk_trace, old_bt);
453                 goto err;
454         }
455
456         return 0;
457 probe_err:
458         atomic_dec(&blk_probes_ref);
459         mutex_unlock(&blk_probe_mutex);
460 err:
461         if (dir)
462                 blk_remove_tree(dir);
463         if (bt) {
464                 if (bt->msg_file)
465                         debugfs_remove(bt->msg_file);
466                 if (bt->dropped_file)
467                         debugfs_remove(bt->dropped_file);
468                 free_percpu(bt->sequence);
469                 free_percpu(bt->msg_data);
470                 if (bt->rchan)
471                         relay_close(bt->rchan);
472                 kfree(bt);
473         }
474         return ret;
475 }
476
477 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
478                     char __user *arg)
479 {
480         struct blk_user_trace_setup buts;
481         int ret;
482
483         ret = copy_from_user(&buts, arg, sizeof(buts));
484         if (ret)
485                 return -EFAULT;
486
487         ret = do_blk_trace_setup(q, name, dev, &buts);
488         if (ret)
489                 return ret;
490
491         if (copy_to_user(arg, &buts, sizeof(buts)))
492                 return -EFAULT;
493
494         return 0;
495 }
496 EXPORT_SYMBOL_GPL(blk_trace_setup);
497
498 int blk_trace_startstop(struct request_queue *q, int start)
499 {
500         struct blk_trace *bt;
501         int ret;
502
503         if ((bt = q->blk_trace) == NULL)
504                 return -EINVAL;
505
506         /*
507          * For starting a trace, we can transition from a setup or stopped
508          * trace. For stopping a trace, the state must be running
509          */
510         ret = -EINVAL;
511         if (start) {
512                 if (bt->trace_state == Blktrace_setup ||
513                     bt->trace_state == Blktrace_stopped) {
514                         blktrace_seq++;
515                         smp_mb();
516                         bt->trace_state = Blktrace_running;
517
518                         trace_note_time(bt);
519                         ret = 0;
520                 }
521         } else {
522                 if (bt->trace_state == Blktrace_running) {
523                         bt->trace_state = Blktrace_stopped;
524                         relay_flush(bt->rchan);
525                         ret = 0;
526                 }
527         }
528
529         return ret;
530 }
531 EXPORT_SYMBOL_GPL(blk_trace_startstop);
532
533 /**
534  * blk_trace_ioctl: - handle the ioctls associated with tracing
535  * @bdev:       the block device
536  * @cmd:        the ioctl cmd
537  * @arg:        the argument data, if any
538  *
539  **/
540 int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
541 {
542         struct request_queue *q;
543         int ret, start = 0;
544         char b[BDEVNAME_SIZE];
545
546         q = bdev_get_queue(bdev);
547         if (!q)
548                 return -ENXIO;
549
550         mutex_lock(&bdev->bd_mutex);
551
552         switch (cmd) {
553         case BLKTRACESETUP:
554                 bdevname(bdev, b);
555                 ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
556                 break;
557         case BLKTRACESTART:
558                 start = 1;
559         case BLKTRACESTOP:
560                 ret = blk_trace_startstop(q, start);
561                 break;
562         case BLKTRACETEARDOWN:
563                 ret = blk_trace_remove(q);
564                 break;
565         default:
566                 ret = -ENOTTY;
567                 break;
568         }
569
570         mutex_unlock(&bdev->bd_mutex);
571         return ret;
572 }
573
574 /**
575  * blk_trace_shutdown: - stop and cleanup trace structures
576  * @q:    the request queue associated with the device
577  *
578  **/
579 void blk_trace_shutdown(struct request_queue *q)
580 {
581         if (q->blk_trace) {
582                 blk_trace_startstop(q, 0);
583                 blk_trace_remove(q);
584         }
585 }
586
587 /*
588  * blktrace probes
589  */
590
591 /**
592  * blk_add_trace_rq - Add a trace for a request oriented action
593  * @q:          queue the io is for
594  * @rq:         the source request
595  * @what:       the action
596  *
597  * Description:
598  *     Records an action against a request. Will log the bio offset + size.
599  *
600  **/
601 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
602                                     u32 what)
603 {
604         struct blk_trace *bt = q->blk_trace;
605         int rw = rq->cmd_flags & 0x03;
606
607         if (likely(!bt))
608                 return;
609
610         if (blk_discard_rq(rq))
611                 rw |= (1 << BIO_RW_DISCARD);
612
613         if (blk_pc_request(rq)) {
614                 what |= BLK_TC_ACT(BLK_TC_PC);
615                 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
616                                 sizeof(rq->cmd), rq->cmd);
617         } else  {
618                 what |= BLK_TC_ACT(BLK_TC_FS);
619                 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
620                                 rw, what, rq->errors, 0, NULL);
621         }
622 }
623
624 static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
625 {
626         blk_add_trace_rq(q, rq, BLK_TA_ABORT);
627 }
628
629 static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
630 {
631         blk_add_trace_rq(q, rq, BLK_TA_INSERT);
632 }
633
634 static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
635 {
636         blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
637 }
638
639 static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
640 {
641         blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
642 }
643
644 static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
645 {
646         blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
647 }
648
649 /**
650  * blk_add_trace_bio - Add a trace for a bio oriented action
651  * @q:          queue the io is for
652  * @bio:        the source bio
653  * @what:       the action
654  *
655  * Description:
656  *     Records an action against a bio. Will log the bio offset + size.
657  *
658  **/
659 static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
660                                      u32 what)
661 {
662         struct blk_trace *bt = q->blk_trace;
663
664         if (likely(!bt))
665                 return;
666
667         __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
668                         !bio_flagged(bio, BIO_UPTODATE), 0, NULL);
669 }
670
671 static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
672 {
673         blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
674 }
675
676 static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
677 {
678         blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
679 }
680
681 static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
682 {
683         blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
684 }
685
686 static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
687 {
688         blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
689 }
690
691 static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
692 {
693         blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
694 }
695
696 static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
697 {
698         if (bio)
699                 blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
700         else {
701                 struct blk_trace *bt = q->blk_trace;
702
703                 if (bt)
704                         __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
705         }
706 }
707
708
709 static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
710 {
711         if (bio)
712                 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
713         else {
714                 struct blk_trace *bt = q->blk_trace;
715
716                 if (bt)
717                         __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
718         }
719 }
720
721 static void blk_add_trace_plug(struct request_queue *q)
722 {
723         struct blk_trace *bt = q->blk_trace;
724
725         if (bt)
726                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
727 }
728
729 static void blk_add_trace_unplug_io(struct request_queue *q)
730 {
731         struct blk_trace *bt = q->blk_trace;
732
733         if (bt) {
734                 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
735                 __be64 rpdu = cpu_to_be64(pdu);
736
737                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
738                                 sizeof(rpdu), &rpdu);
739         }
740 }
741
742 static void blk_add_trace_unplug_timer(struct request_queue *q)
743 {
744         struct blk_trace *bt = q->blk_trace;
745
746         if (bt) {
747                 unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
748                 __be64 rpdu = cpu_to_be64(pdu);
749
750                 __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
751                                 sizeof(rpdu), &rpdu);
752         }
753 }
754
755 static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
756                                 unsigned int pdu)
757 {
758         struct blk_trace *bt = q->blk_trace;
759
760         if (bt) {
761                 __be64 rpdu = cpu_to_be64(pdu);
762
763                 __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
764                                 BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
765                                 sizeof(rpdu), &rpdu);
766         }
767 }
768
769 /**
770  * blk_add_trace_remap - Add a trace for a remap operation
771  * @q:          queue the io is for
772  * @bio:        the source bio
773  * @dev:        target device
774  * @from:       source sector
775  * @to:         target sector
776  *
777  * Description:
778  *     Device mapper or raid target sometimes need to split a bio because
779  *     it spans a stripe (or similar). Add a trace for that action.
780  *
781  **/
782 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
783                                        dev_t dev, sector_t from, sector_t to)
784 {
785         struct blk_trace *bt = q->blk_trace;
786         struct blk_io_trace_remap r;
787
788         if (likely(!bt))
789                 return;
790
791         r.device = cpu_to_be32(dev);
792         r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
793         r.sector = cpu_to_be64(to);
794
795         __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
796                         !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
797 }
798
799 /**
800  * blk_add_driver_data - Add binary message with driver-specific data
801  * @q:          queue the io is for
802  * @rq:         io request
803  * @data:       driver-specific data
804  * @len:        length of driver-specific data
805  *
806  * Description:
807  *     Some drivers might want to write driver-specific data per request.
808  *
809  **/
810 void blk_add_driver_data(struct request_queue *q,
811                          struct request *rq,
812                          void *data, size_t len)
813 {
814         struct blk_trace *bt = q->blk_trace;
815
816         if (likely(!bt))
817                 return;
818
819         if (blk_pc_request(rq))
820                 __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
821                                 rq->errors, len, data);
822         else
823                 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
824                                 0, BLK_TA_DRV_DATA, rq->errors, len, data);
825 }
826 EXPORT_SYMBOL_GPL(blk_add_driver_data);
827
828 static int blk_register_tracepoints(void)
829 {
830         int ret;
831
832         ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
833         WARN_ON(ret);
834         ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
835         WARN_ON(ret);
836         ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
837         WARN_ON(ret);
838         ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
839         WARN_ON(ret);
840         ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
841         WARN_ON(ret);
842         ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
843         WARN_ON(ret);
844         ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
845         WARN_ON(ret);
846         ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
847         WARN_ON(ret);
848         ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
849         WARN_ON(ret);
850         ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
851         WARN_ON(ret);
852         ret = register_trace_block_getrq(blk_add_trace_getrq);
853         WARN_ON(ret);
854         ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
855         WARN_ON(ret);
856         ret = register_trace_block_plug(blk_add_trace_plug);
857         WARN_ON(ret);
858         ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
859         WARN_ON(ret);
860         ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
861         WARN_ON(ret);
862         ret = register_trace_block_split(blk_add_trace_split);
863         WARN_ON(ret);
864         ret = register_trace_block_remap(blk_add_trace_remap);
865         WARN_ON(ret);
866         return 0;
867 }
868
869 static void blk_unregister_tracepoints(void)
870 {
871         unregister_trace_block_remap(blk_add_trace_remap);
872         unregister_trace_block_split(blk_add_trace_split);
873         unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
874         unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
875         unregister_trace_block_plug(blk_add_trace_plug);
876         unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
877         unregister_trace_block_getrq(blk_add_trace_getrq);
878         unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
879         unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
880         unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
881         unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
882         unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
883         unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
884         unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
885         unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
886         unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
887         unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
888
889         tracepoint_synchronize_unregister();
890 }