x86, bts: provide in-kernel branch-trace interface
[linux-2.6] / arch / x86 / kernel / ds.c
1 /*
2  * Debug Store support
3  *
4  * This provides a low-level interface to the hardware's Debug Store
5  * feature that is used for branch trace store (BTS) and
6  * precise-event based sampling (PEBS).
7  *
8  * It manages:
9  * - DS and BTS hardware configuration
10  * - buffer overflow handling (to be done)
11  * - buffer access
12  *
13  * It does not do:
14  * - security checking (is the caller allowed to trace the task)
15  * - buffer allocation (memory accounting)
16  *
17  *
18  * Copyright (C) 2007-2008 Intel Corporation.
19  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
20  */
21
22
23 #include <asm/ds.h>
24
25 #include <linux/errno.h>
26 #include <linux/string.h>
27 #include <linux/slab.h>
28 #include <linux/sched.h>
29 #include <linux/mm.h>
30 #include <linux/kernel.h>
31
32
33 /*
34  * The configuration for a particular DS hardware implementation.
35  */
36 struct ds_configuration {
37         /* the name of the configuration */
38         const char *name;
39         /* the size of one pointer-typed field in the DS structure and
40            in the BTS and PEBS buffers in bytes;
41            this covers the first 8 DS fields related to buffer management. */
42         unsigned char  sizeof_field;
43         /* the size of a BTS/PEBS record in bytes */
44         unsigned char  sizeof_rec[2];
45         /* a series of bit-masks to control various features indexed
46          * by enum ds_feature */
47         unsigned long ctl[dsf_ctl_max];
48 };
49 static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50
51 #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53 #define MAX_SIZEOF_DS (12 * 8)  /* maximal size of a DS configuration */
54 #define MAX_SIZEOF_BTS (3 * 8)  /* maximal size of a BTS record */
55 #define DS_ALIGNMENT (1 << 3)   /* BTS and PEBS buffer alignment */
56
57 #define BTS_CONTROL \
58  (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59   ds_cfg.ctl[dsf_bts_overflow])
60
61
62 /*
63  * A BTS or PEBS tracer.
64  *
65  * This holds the configuration of the tracer and serves as a handle
66  * to identify tracers.
67  */
68 struct ds_tracer {
69         /* the DS context (partially) owned by this tracer */
70         struct ds_context *context;
71         /* the buffer provided on ds_request() and its size in bytes */
72         void *buffer;
73         size_t size;
74 };
75
76 struct bts_tracer {
77         /* the common DS part */
78         struct ds_tracer ds;
79         /* the trace including the DS configuration */
80         struct bts_trace trace;
81         /* buffer overflow notification function */
82         bts_ovfl_callback_t ovfl;
83 };
84
85 struct pebs_tracer {
86         /* the common DS part */
87         struct ds_tracer ds;
88         /* the trace including the DS configuration */
89         struct pebs_trace trace;
90         /* buffer overflow notification function */
91         pebs_ovfl_callback_t ovfl;
92 };
93
94 /*
95  * Debug Store (DS) save area configuration (see Intel64 and IA32
96  * Architectures Software Developer's Manual, section 18.5)
97  *
98  * The DS configuration consists of the following fields; different
99  * architetures vary in the size of those fields.
100  * - double-word aligned base linear address of the BTS buffer
101  * - write pointer into the BTS buffer
102  * - end linear address of the BTS buffer (one byte beyond the end of
103  *   the buffer)
104  * - interrupt pointer into BTS buffer
105  *   (interrupt occurs when write pointer passes interrupt pointer)
106  * - double-word aligned base linear address of the PEBS buffer
107  * - write pointer into the PEBS buffer
108  * - end linear address of the PEBS buffer (one byte beyond the end of
109  *   the buffer)
110  * - interrupt pointer into PEBS buffer
111  *   (interrupt occurs when write pointer passes interrupt pointer)
112  * - value to which counter is reset following counter overflow
113  *
114  * Later architectures use 64bit pointers throughout, whereas earlier
115  * architectures use 32bit pointers in 32bit mode.
116  *
117  *
118  * We compute the base address for the first 8 fields based on:
119  * - the field size stored in the DS configuration
120  * - the relative field position
121  * - an offset giving the start of the respective region
122  *
123  * This offset is further used to index various arrays holding
124  * information for BTS and PEBS at the respective index.
125  *
126  * On later 32bit processors, we only access the lower 32bit of the
127  * 64bit pointer fields. The upper halves will be zeroed out.
128  */
129
130 enum ds_field {
131         ds_buffer_base = 0,
132         ds_index,
133         ds_absolute_maximum,
134         ds_interrupt_threshold,
135 };
136
137 enum ds_qualifier {
138         ds_bts  = 0,
139         ds_pebs
140 };
141
142 static inline unsigned long ds_get(const unsigned char *base,
143                                    enum ds_qualifier qual, enum ds_field field)
144 {
145         base += (ds_cfg.sizeof_field * (field + (4 * qual)));
146         return *(unsigned long *)base;
147 }
148
149 static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
150                           enum ds_field field, unsigned long value)
151 {
152         base += (ds_cfg.sizeof_field * (field + (4 * qual)));
153         (*(unsigned long *)base) = value;
154 }
155
156
157 /*
158  * Locking is done only for allocating BTS or PEBS resources.
159  */
160 static DEFINE_SPINLOCK(ds_lock);
161
162
163 /*
164  * We either support (system-wide) per-cpu or per-thread allocation.
165  * We distinguish the two based on the task_struct pointer, where a
166  * NULL pointer indicates per-cpu allocation for the current cpu.
167  *
168  * Allocations are use-counted. As soon as resources are allocated,
169  * further allocations must be of the same type (per-cpu or
170  * per-thread). We model this by counting allocations (i.e. the number
171  * of tracers of a certain type) for one type negatively:
172  *   =0  no tracers
173  *   >0  number of per-thread tracers
174  *   <0  number of per-cpu tracers
175  *
176  * Tracers essentially gives the number of ds contexts for a certain
177  * type of allocation.
178  */
179 static atomic_t tracers = ATOMIC_INIT(0);
180
181 static inline void get_tracer(struct task_struct *task)
182 {
183         if (task)
184                 atomic_inc(&tracers);
185         else
186                 atomic_dec(&tracers);
187 }
188
189 static inline void put_tracer(struct task_struct *task)
190 {
191         if (task)
192                 atomic_dec(&tracers);
193         else
194                 atomic_inc(&tracers);
195 }
196
197 static inline int check_tracer(struct task_struct *task)
198 {
199         return task ?
200                 (atomic_read(&tracers) >= 0) :
201                 (atomic_read(&tracers) <= 0);
202 }
203
204
205 /*
206  * The DS context is either attached to a thread or to a cpu:
207  * - in the former case, the thread_struct contains a pointer to the
208  *   attached context.
209  * - in the latter case, we use a static array of per-cpu context
210  *   pointers.
211  *
212  * Contexts are use-counted. They are allocated on first access and
213  * deallocated when the last user puts the context.
214  */
215 struct ds_context {
216         /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217         unsigned char ds[MAX_SIZEOF_DS];
218         /* the owner of the BTS and PEBS configuration, respectively */
219         struct bts_tracer *bts_master;
220         struct pebs_tracer *pebs_master;
221         /* use count */
222         unsigned long count;
223         /* a pointer to the context location inside the thread_struct
224          * or the per_cpu context array */
225         struct ds_context **this;
226         /* a pointer to the task owning this context, or NULL, if the
227          * context is owned by a cpu */
228         struct task_struct *task;
229 };
230
231 static DEFINE_PER_CPU(struct ds_context *, system_context_array);
232
233 #define system_context per_cpu(system_context_array, smp_processor_id())
234
235 static inline struct ds_context *ds_get_context(struct task_struct *task)
236 {
237         struct ds_context **p_context =
238                 (task ? &task->thread.ds_ctx : &system_context);
239         struct ds_context *context = *p_context;
240         unsigned long irq;
241
242         if (!context) {
243                 context = kzalloc(sizeof(*context), GFP_KERNEL);
244                 if (!context)
245                         return NULL;
246
247                 spin_lock_irqsave(&ds_lock, irq);
248
249                 if (*p_context) {
250                         kfree(context);
251
252                         context = *p_context;
253                 } else {
254                         *p_context = context;
255
256                         context->this = p_context;
257                         context->task = task;
258
259                         if (task)
260                                 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
261
262                         if (!task || (task == current))
263                                 wrmsrl(MSR_IA32_DS_AREA,
264                                        (unsigned long)context->ds);
265                 }
266
267                 context->count++;
268
269                 spin_unlock_irqrestore(&ds_lock, irq);
270         } else {
271                 spin_lock_irqsave(&ds_lock, irq);
272
273                 context = *p_context;
274                 if (context)
275                         context->count++;
276
277                 spin_unlock_irqrestore(&ds_lock, irq);
278
279                 if (!context)
280                         context = ds_get_context(task);
281         }
282
283         return context;
284 }
285
286 static inline void ds_put_context(struct ds_context *context)
287 {
288         unsigned long irq;
289
290         if (!context)
291                 return;
292
293         spin_lock_irqsave(&ds_lock, irq);
294
295         if (--context->count) {
296                 spin_unlock_irqrestore(&ds_lock, irq);
297                 return;
298         }
299
300         *(context->this) = NULL;
301
302         if (context->task)
303                 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
304
305         if (!context->task || (context->task == current))
306                 wrmsrl(MSR_IA32_DS_AREA, 0);
307
308         spin_unlock_irqrestore(&ds_lock, irq);
309
310         kfree(context);
311 }
312
313
314 /*
315  * Call the tracer's callback on a buffer overflow.
316  *
317  * context: the ds context
318  * qual: the buffer type
319  */
320 static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
321 {
322         switch (qual) {
323         case ds_bts:
324                 if (context->bts_master &&
325                     context->bts_master->ovfl)
326                         context->bts_master->ovfl(context->bts_master);
327                 break;
328         case ds_pebs:
329                 if (context->pebs_master &&
330                     context->pebs_master->ovfl)
331                         context->pebs_master->ovfl(context->pebs_master);
332                 break;
333         }
334 }
335
336
337 /*
338  * Write raw data into the BTS or PEBS buffer.
339  *
340  * The remainder of any partially written record is zeroed out.
341  *
342  * context: the DS context
343  * qual: the buffer type
344  * record: the data to write
345  * size: the size of the data
346  */
347 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
348                     const void *record, size_t size)
349 {
350         int bytes_written = 0;
351
352         if (!record)
353                 return -EINVAL;
354
355         while (size) {
356                 unsigned long base, index, end, write_end, int_th;
357                 unsigned long write_size, adj_write_size;
358
359                 /*
360                  * write as much as possible without producing an
361                  * overflow interrupt.
362                  *
363                  * interrupt_threshold must either be
364                  * - bigger than absolute_maximum or
365                  * - point to a record between buffer_base and absolute_maximum
366                  *
367                  * index points to a valid record.
368                  */
369                 base   = ds_get(context->ds, qual, ds_buffer_base);
370                 index  = ds_get(context->ds, qual, ds_index);
371                 end    = ds_get(context->ds, qual, ds_absolute_maximum);
372                 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
373
374                 write_end = min(end, int_th);
375
376                 /* if we are already beyond the interrupt threshold,
377                  * we fill the entire buffer */
378                 if (write_end <= index)
379                         write_end = end;
380
381                 if (write_end <= index)
382                         break;
383
384                 write_size = min((unsigned long) size, write_end - index);
385                 memcpy((void *)index, record, write_size);
386
387                 record = (const char *)record + write_size;
388                 size -= write_size;
389                 bytes_written += write_size;
390
391                 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
392                 adj_write_size *= ds_cfg.sizeof_rec[qual];
393
394                 /* zero out trailing bytes */
395                 memset((char *)index + write_size, 0,
396                        adj_write_size - write_size);
397                 index += adj_write_size;
398
399                 if (index >= end)
400                         index = base;
401                 ds_set(context->ds, qual, ds_index, index);
402
403                 if (index >= int_th)
404                         ds_overflow(context, qual);
405         }
406
407         return bytes_written;
408 }
409
410
411 /*
412  * Branch Trace Store (BTS) uses the following format. Different
413  * architectures vary in the size of those fields.
414  * - source linear address
415  * - destination linear address
416  * - flags
417  *
418  * Later architectures use 64bit pointers throughout, whereas earlier
419  * architectures use 32bit pointers in 32bit mode.
420  *
421  * We compute the base address for the first 8 fields based on:
422  * - the field size stored in the DS configuration
423  * - the relative field position
424  *
425  * In order to store additional information in the BTS buffer, we use
426  * a special source address to indicate that the record requires
427  * special interpretation.
428  *
429  * Netburst indicated via a bit in the flags field whether the branch
430  * was predicted; this is ignored.
431  *
432  * We use two levels of abstraction:
433  * - the raw data level defined here
434  * - an arch-independent level defined in ds.h
435  */
436
437 enum bts_field {
438         bts_from,
439         bts_to,
440         bts_flags,
441
442         bts_qual = bts_from,
443         bts_jiffies = bts_to,
444         bts_pid = bts_flags,
445
446         bts_qual_mask = (bts_qual_max - 1),
447         bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
448 };
449
450 static inline unsigned long bts_get(const char *base, enum bts_field field)
451 {
452         base += (ds_cfg.sizeof_field * field);
453         return *(unsigned long *)base;
454 }
455
456 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
457 {
458         base += (ds_cfg.sizeof_field * field);;
459         (*(unsigned long *)base) = val;
460 }
461
462
463 /*
464  * The raw BTS data is architecture dependent.
465  *
466  * For higher-level users, we give an arch-independent view.
467  * - ds.h defines struct bts_struct
468  * - bts_read translates one raw bts record into a bts_struct
469  * - bts_write translates one bts_struct into the raw format and
470  *   writes it into the top of the parameter tracer's buffer.
471  *
472  * return: bytes read/written on success; -Eerrno, otherwise
473  */
474 static int bts_read(struct bts_tracer *tracer, const void *at,
475                     struct bts_struct *out)
476 {
477         if (!tracer)
478                 return -EINVAL;
479
480         if (at < tracer->trace.ds.begin)
481                 return -EINVAL;
482
483         if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
484                 return -EINVAL;
485
486         memset(out, 0, sizeof(*out));
487         if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
488                 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
489                 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
490                 out->variant.timestamp.pid = bts_get(at, bts_pid);
491         } else {
492                 out->qualifier = bts_branch;
493                 out->variant.lbr.from = bts_get(at, bts_from);
494                 out->variant.lbr.to   = bts_get(at, bts_to);
495         }
496
497         return ds_cfg.sizeof_rec[ds_bts];
498 }
499
500 static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
501 {
502         unsigned char raw[MAX_SIZEOF_BTS];
503
504         if (!tracer)
505                 return -EINVAL;
506
507         if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
508                 return -EOVERFLOW;
509
510         switch (in->qualifier) {
511         case bts_invalid:
512                 bts_set(raw, bts_from, 0);
513                 bts_set(raw, bts_to, 0);
514                 bts_set(raw, bts_flags, 0);
515                 break;
516         case bts_branch:
517                 bts_set(raw, bts_from, in->variant.lbr.from);
518                 bts_set(raw, bts_to,   in->variant.lbr.to);
519                 bts_set(raw, bts_flags, 0);
520                 break;
521         case bts_task_arrives:
522         case bts_task_departs:
523                 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
524                 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
525                 bts_set(raw, bts_pid, in->variant.timestamp.pid);
526                 break;
527         default:
528                 return -EINVAL;
529         }
530
531         return ds_write(tracer->ds.context, ds_bts, raw,
532                         ds_cfg.sizeof_rec[ds_bts]);
533 }
534
535
536 static void ds_write_config(struct ds_context *context,
537                             struct ds_trace *cfg, enum ds_qualifier qual)
538 {
539         unsigned char *ds = context->ds;
540
541         ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
542         ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
543         ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
544         ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
545 }
546
547 static void ds_read_config(struct ds_context *context,
548                            struct ds_trace *cfg, enum ds_qualifier qual)
549 {
550         unsigned char *ds = context->ds;
551
552         cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
553         cfg->top = (void *)ds_get(ds, qual, ds_index);
554         cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
555         cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
556 }
557
558 static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
559                              void *base, size_t size, size_t ith,
560                              unsigned int flags) {
561         unsigned long buffer, adj;
562
563         /* adjust the buffer address and size to meet alignment
564          * constraints:
565          * - buffer is double-word aligned
566          * - size is multiple of record size
567          *
568          * We checked the size at the very beginning; we have enough
569          * space to do the adjustment.
570          */
571         buffer = (unsigned long)base;
572
573         adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
574         buffer += adj;
575         size   -= adj;
576
577         trace->n = size / ds_cfg.sizeof_rec[qual];
578         trace->size = ds_cfg.sizeof_rec[qual];
579
580         size = (trace->n * trace->size);
581
582         trace->begin = (void *)buffer;
583         trace->top = trace->begin;
584         trace->end = (void *)(buffer + size);
585         /* The value for 'no threshold' is -1, which will set the
586          * threshold outside of the buffer, just like we want it.
587          */
588         trace->ith = (void *)(buffer + size - ith);
589
590         trace->flags = flags;
591 }
592
593
594 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
595                       enum ds_qualifier qual, struct task_struct *task,
596                       void *base, size_t size, size_t th, unsigned int flags)
597 {
598         struct ds_context *context;
599         int error;
600
601         error = -EINVAL;
602         if (!base)
603                 goto out;
604
605         /* we require some space to do alignment adjustments below */
606         error = -EINVAL;
607         if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
608                 goto out;
609
610         if (th != (size_t)-1) {
611                 th *= ds_cfg.sizeof_rec[qual];
612
613                 error = -EINVAL;
614                 if (size <= th)
615                         goto out;
616         }
617
618         tracer->buffer = base;
619         tracer->size = size;
620
621         error = -ENOMEM;
622         context = ds_get_context(task);
623         if (!context)
624                 goto out;
625         tracer->context = context;
626
627         ds_init_ds_trace(trace, qual, base, size, th, flags);
628
629         error = 0;
630  out:
631         return error;
632 }
633
634 struct bts_tracer *ds_request_bts(struct task_struct *task,
635                                   void *base, size_t size,
636                                   bts_ovfl_callback_t ovfl, size_t th,
637                                   unsigned int flags)
638 {
639         struct bts_tracer *tracer;
640         unsigned long irq;
641         int error;
642
643         error = -EOPNOTSUPP;
644         if (!ds_cfg.ctl[dsf_bts])
645                 goto out;
646
647         /* buffer overflow notification is not yet implemented */
648         error = -EOPNOTSUPP;
649         if (ovfl)
650                 goto out;
651
652         error = -ENOMEM;
653         tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
654         if (!tracer)
655                 goto out;
656         tracer->ovfl = ovfl;
657
658         error = ds_request(&tracer->ds, &tracer->trace.ds,
659                            ds_bts, task, base, size, th, flags);
660         if (error < 0)
661                 goto out_tracer;
662
663
664         spin_lock_irqsave(&ds_lock, irq);
665
666         error = -EPERM;
667         if (!check_tracer(task))
668                 goto out_unlock;
669         get_tracer(task);
670
671         error = -EPERM;
672         if (tracer->ds.context->bts_master)
673                 goto out_put_tracer;
674         tracer->ds.context->bts_master = tracer;
675
676         spin_unlock_irqrestore(&ds_lock, irq);
677
678
679         tracer->trace.read  = bts_read;
680         tracer->trace.write = bts_write;
681
682         ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
683         ds_resume_bts(tracer);
684
685         return tracer;
686
687  out_put_tracer:
688         put_tracer(task);
689  out_unlock:
690         spin_unlock_irqrestore(&ds_lock, irq);
691         ds_put_context(tracer->ds.context);
692  out_tracer:
693         kfree(tracer);
694  out:
695         return ERR_PTR(error);
696 }
697
698 struct pebs_tracer *ds_request_pebs(struct task_struct *task,
699                                     void *base, size_t size,
700                                     pebs_ovfl_callback_t ovfl, size_t th,
701                                     unsigned int flags)
702 {
703         struct pebs_tracer *tracer;
704         unsigned long irq;
705         int error;
706
707         /* buffer overflow notification is not yet implemented */
708         error = -EOPNOTSUPP;
709         if (ovfl)
710                 goto out;
711
712         error = -ENOMEM;
713         tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
714         if (!tracer)
715                 goto out;
716         tracer->ovfl = ovfl;
717
718         error = ds_request(&tracer->ds, &tracer->trace.ds,
719                            ds_pebs, task, base, size, th, flags);
720         if (error < 0)
721                 goto out_tracer;
722
723         spin_lock_irqsave(&ds_lock, irq);
724
725         error = -EPERM;
726         if (!check_tracer(task))
727                 goto out_unlock;
728         get_tracer(task);
729
730         error = -EPERM;
731         if (tracer->ds.context->pebs_master)
732                 goto out_put_tracer;
733         tracer->ds.context->pebs_master = tracer;
734
735         spin_unlock_irqrestore(&ds_lock, irq);
736
737         ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
738         ds_resume_pebs(tracer);
739
740         return tracer;
741
742  out_put_tracer:
743         put_tracer(task);
744  out_unlock:
745         spin_unlock_irqrestore(&ds_lock, irq);
746         ds_put_context(tracer->ds.context);
747  out_tracer:
748         kfree(tracer);
749  out:
750         return ERR_PTR(error);
751 }
752
753 void ds_release_bts(struct bts_tracer *tracer)
754 {
755         if (!tracer)
756                 return;
757
758         ds_suspend_bts(tracer);
759
760         WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
761         tracer->ds.context->bts_master = NULL;
762
763         put_tracer(tracer->ds.context->task);
764         ds_put_context(tracer->ds.context);
765
766         kfree(tracer);
767 }
768
769 void ds_suspend_bts(struct bts_tracer *tracer)
770 {
771         struct task_struct *task;
772
773         if (!tracer)
774                 return;
775
776         task = tracer->ds.context->task;
777
778         if (!task || (task == current))
779                 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
780
781         if (task) {
782                 task->thread.debugctlmsr &= ~BTS_CONTROL;
783
784                 if (!task->thread.debugctlmsr)
785                         clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
786         }
787 }
788
789 void ds_resume_bts(struct bts_tracer *tracer)
790 {
791         struct task_struct *task;
792         unsigned long control;
793
794         if (!tracer)
795                 return;
796
797         task = tracer->ds.context->task;
798
799         control = ds_cfg.ctl[dsf_bts];
800         if (!(tracer->trace.ds.flags & BTS_KERNEL))
801                 control |= ds_cfg.ctl[dsf_bts_kernel];
802         if (!(tracer->trace.ds.flags & BTS_USER))
803                 control |= ds_cfg.ctl[dsf_bts_user];
804
805         if (task) {
806                 task->thread.debugctlmsr |= control;
807                 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
808         }
809
810         if (!task || (task == current))
811                 update_debugctlmsr(get_debugctlmsr() | control);
812 }
813
814 void ds_release_pebs(struct pebs_tracer *tracer)
815 {
816         if (!tracer)
817                 return;
818
819         ds_suspend_pebs(tracer);
820
821         WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
822         tracer->ds.context->pebs_master = NULL;
823
824         put_tracer(tracer->ds.context->task);
825         ds_put_context(tracer->ds.context);
826
827         kfree(tracer);
828 }
829
830 void ds_suspend_pebs(struct pebs_tracer *tracer)
831 {
832
833 }
834
835 void ds_resume_pebs(struct pebs_tracer *tracer)
836 {
837
838 }
839
840 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
841 {
842         if (!tracer)
843                 return NULL;
844
845         ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
846         return &tracer->trace;
847 }
848
849 const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
850 {
851         if (!tracer)
852                 return NULL;
853
854         ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
855         tracer->trace.reset_value =
856                 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
857
858         return &tracer->trace;
859 }
860
861 int ds_reset_bts(struct bts_tracer *tracer)
862 {
863         if (!tracer)
864                 return -EINVAL;
865
866         tracer->trace.ds.top = tracer->trace.ds.begin;
867
868         ds_set(tracer->ds.context->ds, ds_bts, ds_index,
869                (unsigned long)tracer->trace.ds.top);
870
871         return 0;
872 }
873
874 int ds_reset_pebs(struct pebs_tracer *tracer)
875 {
876         if (!tracer)
877                 return -EINVAL;
878
879         tracer->trace.ds.top = tracer->trace.ds.begin;
880
881         ds_set(tracer->ds.context->ds, ds_bts, ds_index,
882                (unsigned long)tracer->trace.ds.top);
883
884         return 0;
885 }
886
887 int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
888 {
889         if (!tracer)
890                 return -EINVAL;
891
892         *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
893
894         return 0;
895 }
896
897 static const struct ds_configuration ds_cfg_netburst = {
898         .name = "netburst",
899         .ctl[dsf_bts]           = (1 << 2) | (1 << 3),
900         .ctl[dsf_bts_kernel]    = (1 << 5),
901         .ctl[dsf_bts_user]      = (1 << 6),
902
903         .sizeof_field           = sizeof(long),
904         .sizeof_rec[ds_bts]     = sizeof(long) * 3,
905 #ifdef __i386__
906         .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
907 #else
908         .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
909 #endif
910 };
911 static const struct ds_configuration ds_cfg_pentium_m = {
912         .name = "pentium m",
913         .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
914
915         .sizeof_field           = sizeof(long),
916         .sizeof_rec[ds_bts]     = sizeof(long) * 3,
917 #ifdef __i386__
918         .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
919 #else
920         .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
921 #endif
922 };
923 static const struct ds_configuration ds_cfg_core2 = {
924         .name = "core 2",
925         .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
926         .ctl[dsf_bts_kernel]    = (1 << 9),
927         .ctl[dsf_bts_user]      = (1 << 10),
928
929         .sizeof_field           = 8,
930         .sizeof_rec[ds_bts]     = 8 * 3,
931         .sizeof_rec[ds_pebs]    = 8 * 18,
932 };
933
934 static void
935 ds_configure(const struct ds_configuration *cfg)
936 {
937         memset(&ds_cfg, 0, sizeof(ds_cfg));
938         ds_cfg = *cfg;
939
940         printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
941
942         if (!cpu_has_bts) {
943                 ds_cfg.ctl[dsf_bts] = 0;
944                 printk(KERN_INFO "[ds] bts not available\n");
945         }
946         if (!cpu_has_pebs)
947                 printk(KERN_INFO "[ds] pebs not available\n");
948
949         WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
950 }
951
952 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
953 {
954         switch (c->x86) {
955         case 0x6:
956                 switch (c->x86_model) {
957                 case 0 ... 0xC:
958                         /* sorry, don't know about them */
959                         break;
960                 case 0xD:
961                 case 0xE: /* Pentium M */
962                         ds_configure(&ds_cfg_pentium_m);
963                         break;
964                 default: /* Core2, Atom, ... */
965                         ds_configure(&ds_cfg_core2);
966                         break;
967                 }
968                 break;
969         case 0xF:
970                 switch (c->x86_model) {
971                 case 0x0:
972                 case 0x1:
973                 case 0x2: /* Netburst */
974                         ds_configure(&ds_cfg_netburst);
975                         break;
976                 default:
977                         /* sorry, don't know about them */
978                         break;
979                 }
980                 break;
981         default:
982                 /* sorry, don't know about them */
983                 break;
984         }
985 }
986
987 /*
988  * Change the DS configuration from tracing prev to tracing next.
989  */
990 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
991 {
992         struct ds_context *prev_ctx = prev->thread.ds_ctx;
993         struct ds_context *next_ctx = next->thread.ds_ctx;
994
995         if (prev_ctx) {
996                 update_debugctlmsr(0);
997
998                 if (prev_ctx->bts_master &&
999                     (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1000                         struct bts_struct ts = {
1001                                 .qualifier = bts_task_departs,
1002                                 .variant.timestamp.jiffies = jiffies_64,
1003                                 .variant.timestamp.pid = prev->pid
1004                         };
1005                         bts_write(prev_ctx->bts_master, &ts);
1006                 }
1007         }
1008
1009         if (next_ctx) {
1010                 if (next_ctx->bts_master &&
1011                     (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1012                         struct bts_struct ts = {
1013                                 .qualifier = bts_task_arrives,
1014                                 .variant.timestamp.jiffies = jiffies_64,
1015                                 .variant.timestamp.pid = next->pid
1016                         };
1017                         bts_write(next_ctx->bts_master, &ts);
1018                 }
1019
1020                 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1021         }
1022
1023         update_debugctlmsr(next->thread.debugctlmsr);
1024 }