Merge branch 'for-2.6.28' of git://linux-nfs.org/~bfields/linux
[linux-2.6] / virt / kvm / kvm_trace.c
1 /*
2  * kvm trace
3  *
4  * It is designed to allow debugging traces of kvm to be generated
5  * on UP / SMP machines.  Each trace entry can be timestamped so that
6  * it's possible to reconstruct a chronological record of trace events.
7  * The implementation refers to blktrace kernel support.
8  *
9  * Copyright (c) 2008 Intel Corporation
10  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11  *
12  * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13  *
14  * Date:    Feb 2008
15  */
16
17 #include <linux/module.h>
18 #include <linux/relay.h>
19 #include <linux/debugfs.h>
20 #include <linux/ktime.h>
21
22 #include <linux/kvm_host.h>
23
24 #define KVM_TRACE_STATE_RUNNING         (1 << 0)
25 #define KVM_TRACE_STATE_PAUSE           (1 << 1)
26 #define KVM_TRACE_STATE_CLEARUP         (1 << 2)
27
28 struct kvm_trace {
29         int trace_state;
30         struct rchan *rchan;
31         struct dentry *lost_file;
32         atomic_t lost_records;
33 };
34 static struct kvm_trace *kvm_trace;
35
36 struct kvm_trace_probe {
37         const char *name;
38         const char *format;
39         u32 timestamp_in;
40         marker_probe_func *probe_func;
41 };
42
43 static inline int calc_rec_size(int timestamp, int extra)
44 {
45         int rec_size = KVM_TRC_HEAD_SIZE;
46
47         rec_size += extra;
48         return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
49 }
50
51 static void kvm_add_trace(void *probe_private, void *call_data,
52                           const char *format, va_list *args)
53 {
54         struct kvm_trace_probe *p = probe_private;
55         struct kvm_trace *kt = kvm_trace;
56         struct kvm_trace_rec rec;
57         struct kvm_vcpu *vcpu;
58         int    i, size;
59         u32    extra;
60
61         if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
62                 return;
63
64         rec.rec_val     = TRACE_REC_EVENT_ID(va_arg(*args, u32));
65         vcpu            = va_arg(*args, struct kvm_vcpu *);
66         rec.pid         = current->tgid;
67         rec.vcpu_id     = vcpu->vcpu_id;
68
69         extra           = va_arg(*args, u32);
70         WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
71         extra           = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
72
73         rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
74                         | TRACE_REC_NUM_DATA_ARGS(extra);
75
76         if (p->timestamp_in) {
77                 rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
78
79                 for (i = 0; i < extra; i++)
80                         rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
81         } else {
82                 for (i = 0; i < extra; i++)
83                         rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
84         }
85
86         size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
87         relay_write(kt->rchan, &rec, size);
88 }
89
90 static struct kvm_trace_probe kvm_trace_probes[] = {
91         { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
92         { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
93 };
94
95 static int lost_records_get(void *data, u64 *val)
96 {
97         struct kvm_trace *kt = data;
98
99         *val = atomic_read(&kt->lost_records);
100         return 0;
101 }
102
103 DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
104
105 /*
106  *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
107  *  many times we encountered a full subbuffer, to tell user space app the
108  *  lost records there were.
109  */
110 static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
111                                      void *prev_subbuf, size_t prev_padding)
112 {
113         struct kvm_trace *kt;
114
115         if (!relay_buf_full(buf)) {
116                 if (!prev_subbuf) {
117                         /*
118                          * executed only once when the channel is opened
119                          * save metadata as first record
120                          */
121                         subbuf_start_reserve(buf, sizeof(u32));
122                         *(u32 *)subbuf = 0x12345678;
123                 }
124
125                 return 1;
126         }
127
128         kt = buf->chan->private_data;
129         atomic_inc(&kt->lost_records);
130
131         return 0;
132 }
133
134 static struct dentry *kvm_create_buf_file_callack(const char *filename,
135                                                  struct dentry *parent,
136                                                  int mode,
137                                                  struct rchan_buf *buf,
138                                                  int *is_global)
139 {
140         return debugfs_create_file(filename, mode, parent, buf,
141                                    &relay_file_operations);
142 }
143
144 static int kvm_remove_buf_file_callback(struct dentry *dentry)
145 {
146         debugfs_remove(dentry);
147         return 0;
148 }
149
150 static struct rchan_callbacks kvm_relay_callbacks = {
151         .subbuf_start           = kvm_subbuf_start_callback,
152         .create_buf_file        = kvm_create_buf_file_callack,
153         .remove_buf_file        = kvm_remove_buf_file_callback,
154 };
155
156 static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
157 {
158         struct kvm_trace *kt;
159         int i, r = -ENOMEM;
160
161         if (!kuts->buf_size || !kuts->buf_nr)
162                 return -EINVAL;
163
164         kt = kzalloc(sizeof(*kt), GFP_KERNEL);
165         if (!kt)
166                 goto err;
167
168         r = -EIO;
169         atomic_set(&kt->lost_records, 0);
170         kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
171                                             kt, &kvm_trace_lost_ops);
172         if (!kt->lost_file)
173                 goto err;
174
175         kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
176                                 kuts->buf_nr, &kvm_relay_callbacks, kt);
177         if (!kt->rchan)
178                 goto err;
179
180         kvm_trace = kt;
181
182         for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
183                 struct kvm_trace_probe *p = &kvm_trace_probes[i];
184
185                 r = marker_probe_register(p->name, p->format, p->probe_func, p);
186                 if (r)
187                         printk(KERN_INFO "Unable to register probe %s\n",
188                                p->name);
189         }
190
191         kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
192
193         return 0;
194 err:
195         if (kt) {
196                 if (kt->lost_file)
197                         debugfs_remove(kt->lost_file);
198                 if (kt->rchan)
199                         relay_close(kt->rchan);
200                 kfree(kt);
201         }
202         return r;
203 }
204
205 static int kvm_trace_enable(char __user *arg)
206 {
207         struct kvm_user_trace_setup kuts;
208         int ret;
209
210         ret = copy_from_user(&kuts, arg, sizeof(kuts));
211         if (ret)
212                 return -EFAULT;
213
214         ret = do_kvm_trace_enable(&kuts);
215         if (ret)
216                 return ret;
217
218         return 0;
219 }
220
221 static int kvm_trace_pause(void)
222 {
223         struct kvm_trace *kt = kvm_trace;
224         int r = -EINVAL;
225
226         if (kt == NULL)
227                 return r;
228
229         if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
230                 kt->trace_state = KVM_TRACE_STATE_PAUSE;
231                 relay_flush(kt->rchan);
232                 r = 0;
233         }
234
235         return r;
236 }
237
238 void kvm_trace_cleanup(void)
239 {
240         struct kvm_trace *kt = kvm_trace;
241         int i;
242
243         if (kt == NULL)
244                 return;
245
246         if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
247             kt->trace_state == KVM_TRACE_STATE_PAUSE) {
248
249                 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
250
251                 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
252                         struct kvm_trace_probe *p = &kvm_trace_probes[i];
253                         marker_probe_unregister(p->name, p->probe_func, p);
254                 }
255
256                 relay_close(kt->rchan);
257                 debugfs_remove(kt->lost_file);
258                 kfree(kt);
259         }
260 }
261
262 int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
263 {
264         void __user *argp = (void __user *)arg;
265         long r = -EINVAL;
266
267         if (!capable(CAP_SYS_ADMIN))
268                 return -EPERM;
269
270         switch (ioctl) {
271         case KVM_TRACE_ENABLE:
272                 r = kvm_trace_enable(argp);
273                 break;
274         case KVM_TRACE_PAUSE:
275                 r = kvm_trace_pause();
276                 break;
277         case KVM_TRACE_DISABLE:
278                 r = 0;
279                 kvm_trace_cleanup();
280                 break;
281         }
282
283         return r;
284 }