Merge branch 'for-2.6.27' of git://linux-nfs.org/~bfields/linux
[linux-2.6] / virt / kvm / kvm_trace.c
1 /*
2  * kvm trace
3  *
4  * It is designed to allow debugging traces of kvm to be generated
5  * on UP / SMP machines.  Each trace entry can be timestamped so that
6  * it's possible to reconstruct a chronological record of trace events.
7  * The implementation refers to blktrace kernel support.
8  *
9  * Copyright (c) 2008 Intel Corporation
10  * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11  *
12  * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13  *
14  * Date:    Feb 2008
15  */
16
17 #include <linux/module.h>
18 #include <linux/relay.h>
19 #include <linux/debugfs.h>
20
21 #include <linux/kvm_host.h>
22
23 #define KVM_TRACE_STATE_RUNNING         (1 << 0)
24 #define KVM_TRACE_STATE_PAUSE           (1 << 1)
25 #define KVM_TRACE_STATE_CLEARUP         (1 << 2)
26
27 struct kvm_trace {
28         int trace_state;
29         struct rchan *rchan;
30         struct dentry *lost_file;
31         atomic_t lost_records;
32 };
33 static struct kvm_trace *kvm_trace;
34
35 struct kvm_trace_probe {
36         const char *name;
37         const char *format;
38         u32 cycle_in;
39         marker_probe_func *probe_func;
40 };
41
42 static inline int calc_rec_size(int cycle, int extra)
43 {
44         int rec_size = KVM_TRC_HEAD_SIZE;
45
46         rec_size += extra;
47         return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
48 }
49
50 static void kvm_add_trace(void *probe_private, void *call_data,
51                           const char *format, va_list *args)
52 {
53         struct kvm_trace_probe *p = probe_private;
54         struct kvm_trace *kt = kvm_trace;
55         struct kvm_trace_rec rec;
56         struct kvm_vcpu *vcpu;
57         int    i, extra, size;
58
59         if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
60                 return;
61
62         rec.event       = va_arg(*args, u32);
63         vcpu            = va_arg(*args, struct kvm_vcpu *);
64         rec.pid         = current->tgid;
65         rec.vcpu_id     = vcpu->vcpu_id;
66
67         extra           = va_arg(*args, u32);
68         WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
69         extra           = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
70         rec.extra_u32   = extra;
71
72         rec.cycle_in    = p->cycle_in;
73
74         if (rec.cycle_in) {
75                 rec.u.cycle.cycle_u64 = get_cycles();
76
77                 for (i = 0; i < rec.extra_u32; i++)
78                         rec.u.cycle.extra_u32[i] = va_arg(*args, u32);
79         } else {
80                 for (i = 0; i < rec.extra_u32; i++)
81                         rec.u.nocycle.extra_u32[i] = va_arg(*args, u32);
82         }
83
84         size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32));
85         relay_write(kt->rchan, &rec, size);
86 }
87
88 static struct kvm_trace_probe kvm_trace_probes[] = {
89         { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
90         { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
91 };
92
93 static int lost_records_get(void *data, u64 *val)
94 {
95         struct kvm_trace *kt = data;
96
97         *val = atomic_read(&kt->lost_records);
98         return 0;
99 }
100
101 DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
102
103 /*
104  *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
105  *  many times we encountered a full subbuffer, to tell user space app the
106  *  lost records there were.
107  */
108 static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
109                                      void *prev_subbuf, size_t prev_padding)
110 {
111         struct kvm_trace *kt;
112
113         if (!relay_buf_full(buf)) {
114                 if (!prev_subbuf) {
115                         /*
116                          * executed only once when the channel is opened
117                          * save metadata as first record
118                          */
119                         subbuf_start_reserve(buf, sizeof(u32));
120                         *(u32 *)subbuf = 0x12345678;
121                 }
122
123                 return 1;
124         }
125
126         kt = buf->chan->private_data;
127         atomic_inc(&kt->lost_records);
128
129         return 0;
130 }
131
132 static struct dentry *kvm_create_buf_file_callack(const char *filename,
133                                                  struct dentry *parent,
134                                                  int mode,
135                                                  struct rchan_buf *buf,
136                                                  int *is_global)
137 {
138         return debugfs_create_file(filename, mode, parent, buf,
139                                    &relay_file_operations);
140 }
141
142 static int kvm_remove_buf_file_callback(struct dentry *dentry)
143 {
144         debugfs_remove(dentry);
145         return 0;
146 }
147
148 static struct rchan_callbacks kvm_relay_callbacks = {
149         .subbuf_start           = kvm_subbuf_start_callback,
150         .create_buf_file        = kvm_create_buf_file_callack,
151         .remove_buf_file        = kvm_remove_buf_file_callback,
152 };
153
154 static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
155 {
156         struct kvm_trace *kt;
157         int i, r = -ENOMEM;
158
159         if (!kuts->buf_size || !kuts->buf_nr)
160                 return -EINVAL;
161
162         kt = kzalloc(sizeof(*kt), GFP_KERNEL);
163         if (!kt)
164                 goto err;
165
166         r = -EIO;
167         atomic_set(&kt->lost_records, 0);
168         kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
169                                             kt, &kvm_trace_lost_ops);
170         if (!kt->lost_file)
171                 goto err;
172
173         kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
174                                 kuts->buf_nr, &kvm_relay_callbacks, kt);
175         if (!kt->rchan)
176                 goto err;
177
178         kvm_trace = kt;
179
180         for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
181                 struct kvm_trace_probe *p = &kvm_trace_probes[i];
182
183                 r = marker_probe_register(p->name, p->format, p->probe_func, p);
184                 if (r)
185                         printk(KERN_INFO "Unable to register probe %s\n",
186                                p->name);
187         }
188
189         kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
190
191         return 0;
192 err:
193         if (kt) {
194                 if (kt->lost_file)
195                         debugfs_remove(kt->lost_file);
196                 if (kt->rchan)
197                         relay_close(kt->rchan);
198                 kfree(kt);
199         }
200         return r;
201 }
202
203 static int kvm_trace_enable(char __user *arg)
204 {
205         struct kvm_user_trace_setup kuts;
206         int ret;
207
208         ret = copy_from_user(&kuts, arg, sizeof(kuts));
209         if (ret)
210                 return -EFAULT;
211
212         ret = do_kvm_trace_enable(&kuts);
213         if (ret)
214                 return ret;
215
216         return 0;
217 }
218
219 static int kvm_trace_pause(void)
220 {
221         struct kvm_trace *kt = kvm_trace;
222         int r = -EINVAL;
223
224         if (kt == NULL)
225                 return r;
226
227         if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
228                 kt->trace_state = KVM_TRACE_STATE_PAUSE;
229                 relay_flush(kt->rchan);
230                 r = 0;
231         }
232
233         return r;
234 }
235
236 void kvm_trace_cleanup(void)
237 {
238         struct kvm_trace *kt = kvm_trace;
239         int i;
240
241         if (kt == NULL)
242                 return;
243
244         if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
245             kt->trace_state == KVM_TRACE_STATE_PAUSE) {
246
247                 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
248
249                 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
250                         struct kvm_trace_probe *p = &kvm_trace_probes[i];
251                         marker_probe_unregister(p->name, p->probe_func, p);
252                 }
253
254                 relay_close(kt->rchan);
255                 debugfs_remove(kt->lost_file);
256                 kfree(kt);
257         }
258 }
259
260 int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
261 {
262         void __user *argp = (void __user *)arg;
263         long r = -EINVAL;
264
265         if (!capable(CAP_SYS_ADMIN))
266                 return -EPERM;
267
268         switch (ioctl) {
269         case KVM_TRACE_ENABLE:
270                 r = kvm_trace_enable(argp);
271                 break;
272         case KVM_TRACE_PAUSE:
273                 r = kvm_trace_pause();
274                 break;
275         case KVM_TRACE_DISABLE:
276                 r = 0;
277                 kvm_trace_cleanup();
278                 break;
279         }
280
281         return r;
282 }