4 * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
8 * The floating proportion is a time derivative with an exponentially decaying
11 * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
13 * Where j is an element from {prop_local}, x_{j} is j's number of events,
14 * and i the time period over which the differential is taken. So d/dt_{-i} is
15 * the differential over the i-th last period.
17 * The decaying history gives smooth transitions. The time differential carries
18 * the notion of speed.
20 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
22 * \Sum_{i=0} 1/2^(1+i) = 1
24 * Further more, if we measure time (t) in the same events as x; so that:
32 * Writing this in an iterative fashion we get (dropping the 'd's):
34 * if (++x_{j}, ++t > period)
43 * We optimize away the '/= 2' for the global time delta by noting that:
45 * if (++t > period) t /= 2:
47 * Can be approximated by:
49 * period/2 + (++t % period/2)
51 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
52 * binary operations and wraparound artefacts disappear. ]
54 * Also note that this yields a natural counter of the elapsed periods:
58 * [ Its monotonic increasing property can be applied to mitigate the wrap-
61 * This allows us to do away with the loop over all prop_locals on each period
62 * expiration. By remembering the period count under which it was last accessed
63 * as c_{j}, we can obtain the number of 'missed' cycles from:
67 * We can then lazily catch up to the global period count every time we are
68 * going to use x_{j}, by doing:
70 * x_{j} /= 2^(c - c_{j}), c_{j} = c
73 #include <linux/proportions.h>
74 #include <linux/rcupdate.h>
77 * Limit the time part in order to ensure there are some bits left for the
80 #define PROP_MAX_SHIFT (3*BITS_PER_LONG/4)
82 int prop_descriptor_init(struct prop_descriptor *pd, int shift)
86 if (shift > PROP_MAX_SHIFT)
87 shift = PROP_MAX_SHIFT;
90 pd->pg[0].shift = shift;
91 mutex_init(&pd->mutex);
92 err = percpu_counter_init_irq(&pd->pg[0].events, 0);
96 err = percpu_counter_init_irq(&pd->pg[1].events, 0);
98 percpu_counter_destroy(&pd->pg[0].events);
105 * We have two copies, and flip between them to make it seem like an atomic
106 * update. The update is not really atomic wrt the events counter, but
107 * it is internally consistent with the bit layout depending on shift.
109 * We copy the events count, move the bits around and flip the index.
111 void prop_change_shift(struct prop_descriptor *pd, int shift)
118 if (shift > PROP_MAX_SHIFT)
119 shift = PROP_MAX_SHIFT;
121 mutex_lock(&pd->mutex);
123 index = pd->index ^ 1;
124 offset = pd->pg[pd->index].shift - shift;
128 pd->pg[index].shift = shift;
130 local_irq_save(flags);
131 events = percpu_counter_sum(&pd->pg[pd->index].events);
136 percpu_counter_set(&pd->pg[index].events, events);
139 * ensure the new pg is fully written before the switch
143 local_irq_restore(flags);
148 mutex_unlock(&pd->mutex);
152 * wrap the access to the data in an rcu_read_lock() section;
153 * this is used to track the active references.
155 static struct prop_global *prop_get_global(struct prop_descriptor *pd)
162 * match the wmb from vcd_flip()
165 return &pd->pg[index];
168 static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
174 prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
176 int offset = *pl_shift - new_shift;
182 *pl_period <<= -offset;
184 *pl_period >>= offset;
186 *pl_shift = new_shift;
193 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
195 int prop_local_init_percpu(struct prop_local_percpu *pl)
197 spin_lock_init(&pl->lock);
200 return percpu_counter_init_irq(&pl->events, 0);
203 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
205 percpu_counter_destroy(&pl->events);
209 * Catch up with missed period expirations.
216 void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
218 unsigned long period = 1UL << (pg->shift - 1);
219 unsigned long period_mask = ~(period - 1);
220 unsigned long global_period;
223 global_period = percpu_counter_read(&pg->events);
224 global_period &= period_mask;
227 * Fast path - check if the local and global period count still match
228 * outside of the lock.
230 if (pl->period == global_period)
233 spin_lock_irqsave(&pl->lock, flags);
234 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
237 * For each missed period, we half the local counter.
239 * pl->events >> (global_period - pl->period);
241 period = (global_period - pl->period) >> (pg->shift - 1);
242 if (period < BITS_PER_LONG) {
243 s64 val = percpu_counter_read(&pl->events);
245 if (val < (nr_cpu_ids * PROP_BATCH))
246 val = percpu_counter_sum(&pl->events);
248 __percpu_counter_add(&pl->events, -val + (val >> period),
251 percpu_counter_set(&pl->events, 0);
253 pl->period = global_period;
254 spin_unlock_irqrestore(&pl->lock, flags);
260 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
262 struct prop_global *pg = prop_get_global(pd);
264 prop_norm_percpu(pg, pl);
265 __percpu_counter_add(&pl->events, 1, PROP_BATCH);
266 percpu_counter_add(&pg->events, 1);
267 prop_put_global(pd, pg);
271 * Obtain a fraction of this proportion
273 * p_{j} = x_{j} / (period/2 + t % period/2)
275 void prop_fraction_percpu(struct prop_descriptor *pd,
276 struct prop_local_percpu *pl,
277 long *numerator, long *denominator)
279 struct prop_global *pg = prop_get_global(pd);
280 unsigned long period_2 = 1UL << (pg->shift - 1);
281 unsigned long counter_mask = period_2 - 1;
282 unsigned long global_count;
284 prop_norm_percpu(pg, pl);
285 *numerator = percpu_counter_read_positive(&pl->events);
287 global_count = percpu_counter_read(&pg->events);
288 *denominator = period_2 + (global_count & counter_mask);
290 prop_put_global(pd, pg);
297 int prop_local_init_single(struct prop_local_single *pl)
299 spin_lock_init(&pl->lock);
306 void prop_local_destroy_single(struct prop_local_single *pl)
311 * Catch up with missed period expirations.
314 void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
316 unsigned long period = 1UL << (pg->shift - 1);
317 unsigned long period_mask = ~(period - 1);
318 unsigned long global_period;
321 global_period = percpu_counter_read(&pg->events);
322 global_period &= period_mask;
325 * Fast path - check if the local and global period count still match
326 * outside of the lock.
328 if (pl->period == global_period)
331 spin_lock_irqsave(&pl->lock, flags);
332 prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
334 * For each missed period, we half the local counter.
336 period = (global_period - pl->period) >> (pg->shift - 1);
337 if (likely(period < BITS_PER_LONG))
338 pl->events >>= period;
341 pl->period = global_period;
342 spin_unlock_irqrestore(&pl->lock, flags);
348 void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
350 struct prop_global *pg = prop_get_global(pd);
352 prop_norm_single(pg, pl);
354 percpu_counter_add(&pg->events, 1);
355 prop_put_global(pd, pg);
359 * Obtain a fraction of this proportion
361 * p_{j} = x_{j} / (period/2 + t % period/2)
363 void prop_fraction_single(struct prop_descriptor *pd,
364 struct prop_local_single *pl,
365 long *numerator, long *denominator)
367 struct prop_global *pg = prop_get_global(pd);
368 unsigned long period_2 = 1UL << (pg->shift - 1);
369 unsigned long counter_mask = period_2 - 1;
370 unsigned long global_count;
372 prop_norm_single(pg, pl);
373 *numerator = pl->events;
375 global_count = percpu_counter_read(&pg->events);
376 *denominator = period_2 + (global_count & counter_mask);
378 prop_put_global(pd, pg);