Merge phase #4 (X2APIC, APIC unification, CPU identification unification) of git...
[linux-2.6] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33                                                 "cpufreq-core", msg)
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data);
42 #ifdef CONFIG_HOTPLUG_CPU
43 /* This one keeps track of the previously set governor of a removed CPU */
44 static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor);
45 #endif
46 static DEFINE_SPINLOCK(cpufreq_driver_lock);
47
48 /*
49  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
50  * all cpufreq/hotplug/workqueue/etc related lock issues.
51  *
52  * The rules for this semaphore:
53  * - Any routine that wants to read from the policy structure will
54  *   do a down_read on this semaphore.
55  * - Any routine that will write to the policy structure and/or may take away
56  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
57  *   mode before doing so.
58  *
59  * Additional rules:
60  * - All holders of the lock should check to make sure that the CPU they
61  *   are concerned with are online after they get the lock.
62  * - Governor routines that can be called in cpufreq hotplug path should not
63  *   take this sem as top level hotplug notifier handler takes this.
64  */
65 static DEFINE_PER_CPU(int, policy_cpu);
66 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
67
68 #define lock_policy_rwsem(mode, cpu)                                    \
69 int lock_policy_rwsem_##mode                                            \
70 (int cpu)                                                               \
71 {                                                                       \
72         int policy_cpu = per_cpu(policy_cpu, cpu);                      \
73         BUG_ON(policy_cpu == -1);                                       \
74         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
75         if (unlikely(!cpu_online(cpu))) {                               \
76                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
77                 return -1;                                              \
78         }                                                               \
79                                                                         \
80         return 0;                                                       \
81 }
82
83 lock_policy_rwsem(read, cpu);
84 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
85
86 lock_policy_rwsem(write, cpu);
87 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
88
89 void unlock_policy_rwsem_read(int cpu)
90 {
91         int policy_cpu = per_cpu(policy_cpu, cpu);
92         BUG_ON(policy_cpu == -1);
93         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
94 }
95 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
96
97 void unlock_policy_rwsem_write(int cpu)
98 {
99         int policy_cpu = per_cpu(policy_cpu, cpu);
100         BUG_ON(policy_cpu == -1);
101         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
102 }
103 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
104
105
106 /* internal prototypes */
107 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
108 static unsigned int __cpufreq_get(unsigned int cpu);
109 static void handle_update(struct work_struct *work);
110
111 /**
112  * Two notifier lists: the "policy" list is involved in the
113  * validation process for a new CPU frequency policy; the
114  * "transition" list for kernel code that needs to handle
115  * changes to devices when the CPU clock speed changes.
116  * The mutex locks both lists.
117  */
118 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
119 static struct srcu_notifier_head cpufreq_transition_notifier_list;
120
121 static bool init_cpufreq_transition_notifier_list_called;
122 static int __init init_cpufreq_transition_notifier_list(void)
123 {
124         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
125         init_cpufreq_transition_notifier_list_called = true;
126         return 0;
127 }
128 pure_initcall(init_cpufreq_transition_notifier_list);
129
130 static LIST_HEAD(cpufreq_governor_list);
131 static DEFINE_MUTEX (cpufreq_governor_mutex);
132
133 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
134 {
135         struct cpufreq_policy *data;
136         unsigned long flags;
137
138         if (cpu >= nr_cpu_ids)
139                 goto err_out;
140
141         /* get the cpufreq driver */
142         spin_lock_irqsave(&cpufreq_driver_lock, flags);
143
144         if (!cpufreq_driver)
145                 goto err_out_unlock;
146
147         if (!try_module_get(cpufreq_driver->owner))
148                 goto err_out_unlock;
149
150
151         /* get the CPU */
152         data = per_cpu(cpufreq_cpu_data, cpu);
153
154         if (!data)
155                 goto err_out_put_module;
156
157         if (!kobject_get(&data->kobj))
158                 goto err_out_put_module;
159
160         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
161         return data;
162
163 err_out_put_module:
164         module_put(cpufreq_driver->owner);
165 err_out_unlock:
166         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
167 err_out:
168         return NULL;
169 }
170 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
171
172
173 void cpufreq_cpu_put(struct cpufreq_policy *data)
174 {
175         kobject_put(&data->kobj);
176         module_put(cpufreq_driver->owner);
177 }
178 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
179
180
181 /*********************************************************************
182  *                     UNIFIED DEBUG HELPERS                         *
183  *********************************************************************/
184 #ifdef CONFIG_CPU_FREQ_DEBUG
185
186 /* what part(s) of the CPUfreq subsystem are debugged? */
187 static unsigned int debug;
188
189 /* is the debug output ratelimit'ed using printk_ratelimit? User can
190  * set or modify this value.
191  */
192 static unsigned int debug_ratelimit = 1;
193
194 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
195  * loading of a cpufreq driver, temporarily disabled when a new policy
196  * is set, and disabled upon cpufreq driver removal
197  */
198 static unsigned int disable_ratelimit = 1;
199 static DEFINE_SPINLOCK(disable_ratelimit_lock);
200
201 static void cpufreq_debug_enable_ratelimit(void)
202 {
203         unsigned long flags;
204
205         spin_lock_irqsave(&disable_ratelimit_lock, flags);
206         if (disable_ratelimit)
207                 disable_ratelimit--;
208         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
209 }
210
211 static void cpufreq_debug_disable_ratelimit(void)
212 {
213         unsigned long flags;
214
215         spin_lock_irqsave(&disable_ratelimit_lock, flags);
216         disable_ratelimit++;
217         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
218 }
219
220 void cpufreq_debug_printk(unsigned int type, const char *prefix,
221                         const char *fmt, ...)
222 {
223         char s[256];
224         va_list args;
225         unsigned int len;
226         unsigned long flags;
227
228         WARN_ON(!prefix);
229         if (type & debug) {
230                 spin_lock_irqsave(&disable_ratelimit_lock, flags);
231                 if (!disable_ratelimit && debug_ratelimit
232                                         && !printk_ratelimit()) {
233                         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
234                         return;
235                 }
236                 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
237
238                 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
239
240                 va_start(args, fmt);
241                 len += vsnprintf(&s[len], (256 - len), fmt, args);
242                 va_end(args);
243
244                 printk(s);
245
246                 WARN_ON(len < 5);
247         }
248 }
249 EXPORT_SYMBOL(cpufreq_debug_printk);
250
251
252 module_param(debug, uint, 0644);
253 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
254                         " 2 to debug drivers, and 4 to debug governors.");
255
256 module_param(debug_ratelimit, uint, 0644);
257 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
258                                         " set to 0 to disable ratelimiting.");
259
260 #else /* !CONFIG_CPU_FREQ_DEBUG */
261
262 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
263 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
264
265 #endif /* CONFIG_CPU_FREQ_DEBUG */
266
267
268 /*********************************************************************
269  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
270  *********************************************************************/
271
272 /**
273  * adjust_jiffies - adjust the system "loops_per_jiffy"
274  *
275  * This function alters the system "loops_per_jiffy" for the clock
276  * speed change. Note that loops_per_jiffy cannot be updated on SMP
277  * systems as each CPU might be scaled differently. So, use the arch
278  * per-CPU loops_per_jiffy value wherever possible.
279  */
280 #ifndef CONFIG_SMP
281 static unsigned long l_p_j_ref;
282 static unsigned int  l_p_j_ref_freq;
283
284 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
285 {
286         if (ci->flags & CPUFREQ_CONST_LOOPS)
287                 return;
288
289         if (!l_p_j_ref_freq) {
290                 l_p_j_ref = loops_per_jiffy;
291                 l_p_j_ref_freq = ci->old;
292                 dprintk("saving %lu as reference value for loops_per_jiffy; "
293                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
294         }
295         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
296             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
297             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
298                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
299                                                                 ci->new);
300                 dprintk("scaling loops_per_jiffy to %lu "
301                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
302         }
303 }
304 #else
305 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
306 {
307         return;
308 }
309 #endif
310
311
312 /**
313  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
314  * on frequency transition.
315  *
316  * This function calls the transition notifiers and the "adjust_jiffies"
317  * function. It is called twice on all CPU frequency changes that have
318  * external effects.
319  */
320 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
321 {
322         struct cpufreq_policy *policy;
323
324         BUG_ON(irqs_disabled());
325
326         freqs->flags = cpufreq_driver->flags;
327         dprintk("notification %u of frequency transition to %u kHz\n",
328                 state, freqs->new);
329
330         policy = per_cpu(cpufreq_cpu_data, freqs->cpu);
331         switch (state) {
332
333         case CPUFREQ_PRECHANGE:
334                 /* detect if the driver reported a value as "old frequency"
335                  * which is not equal to what the cpufreq core thinks is
336                  * "old frequency".
337                  */
338                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
339                         if ((policy) && (policy->cpu == freqs->cpu) &&
340                             (policy->cur) && (policy->cur != freqs->old)) {
341                                 dprintk("Warning: CPU frequency is"
342                                         " %u, cpufreq assumed %u kHz.\n",
343                                         freqs->old, policy->cur);
344                                 freqs->old = policy->cur;
345                         }
346                 }
347                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
348                                 CPUFREQ_PRECHANGE, freqs);
349                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
350                 break;
351
352         case CPUFREQ_POSTCHANGE:
353                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
354                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
355                                 CPUFREQ_POSTCHANGE, freqs);
356                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
357                         policy->cur = freqs->new;
358                 break;
359         }
360 }
361 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
362
363
364
365 /*********************************************************************
366  *                          SYSFS INTERFACE                          *
367  *********************************************************************/
368
369 static struct cpufreq_governor *__find_governor(const char *str_governor)
370 {
371         struct cpufreq_governor *t;
372
373         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
374                 if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
375                         return t;
376
377         return NULL;
378 }
379
380 /**
381  * cpufreq_parse_governor - parse a governor string
382  */
383 static int cpufreq_parse_governor(char *str_governor, unsigned int *policy,
384                                 struct cpufreq_governor **governor)
385 {
386         int err = -EINVAL;
387
388         if (!cpufreq_driver)
389                 goto out;
390
391         if (cpufreq_driver->setpolicy) {
392                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
393                         *policy = CPUFREQ_POLICY_PERFORMANCE;
394                         err = 0;
395                 } else if (!strnicmp(str_governor, "powersave",
396                                                 CPUFREQ_NAME_LEN)) {
397                         *policy = CPUFREQ_POLICY_POWERSAVE;
398                         err = 0;
399                 }
400         } else if (cpufreq_driver->target) {
401                 struct cpufreq_governor *t;
402
403                 mutex_lock(&cpufreq_governor_mutex);
404
405                 t = __find_governor(str_governor);
406
407                 if (t == NULL) {
408                         char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
409                                                                 str_governor);
410
411                         if (name) {
412                                 int ret;
413
414                                 mutex_unlock(&cpufreq_governor_mutex);
415                                 ret = request_module("%s", name);
416                                 mutex_lock(&cpufreq_governor_mutex);
417
418                                 if (ret == 0)
419                                         t = __find_governor(str_governor);
420                         }
421
422                         kfree(name);
423                 }
424
425                 if (t != NULL) {
426                         *governor = t;
427                         err = 0;
428                 }
429
430                 mutex_unlock(&cpufreq_governor_mutex);
431         }
432   out:
433         return err;
434 }
435
436
437 /* drivers/base/cpu.c */
438 extern struct sysdev_class cpu_sysdev_class;
439
440
441 /**
442  * cpufreq_per_cpu_attr_read() / show_##file_name() -
443  * print out cpufreq information
444  *
445  * Write out information from cpufreq_driver->policy[cpu]; object must be
446  * "unsigned int".
447  */
448
449 #define show_one(file_name, object)                     \
450 static ssize_t show_##file_name                         \
451 (struct cpufreq_policy *policy, char *buf)              \
452 {                                                       \
453         return sprintf (buf, "%u\n", policy->object);   \
454 }
455
456 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
457 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
458 show_one(scaling_min_freq, min);
459 show_one(scaling_max_freq, max);
460 show_one(scaling_cur_freq, cur);
461
462 static int __cpufreq_set_policy(struct cpufreq_policy *data,
463                                 struct cpufreq_policy *policy);
464
465 /**
466  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
467  */
468 #define store_one(file_name, object)                    \
469 static ssize_t store_##file_name                                        \
470 (struct cpufreq_policy *policy, const char *buf, size_t count)          \
471 {                                                                       \
472         unsigned int ret = -EINVAL;                                     \
473         struct cpufreq_policy new_policy;                               \
474                                                                         \
475         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
476         if (ret)                                                        \
477                 return -EINVAL;                                         \
478                                                                         \
479         ret = sscanf (buf, "%u", &new_policy.object);                   \
480         if (ret != 1)                                                   \
481                 return -EINVAL;                                         \
482                                                                         \
483         ret = __cpufreq_set_policy(policy, &new_policy);                \
484         policy->user_policy.object = policy->object;                    \
485                                                                         \
486         return ret ? ret : count;                                       \
487 }
488
489 store_one(scaling_min_freq,min);
490 store_one(scaling_max_freq,max);
491
492 /**
493  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
494  */
495 static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy,
496                                         char *buf)
497 {
498         unsigned int cur_freq = __cpufreq_get(policy->cpu);
499         if (!cur_freq)
500                 return sprintf(buf, "<unknown>");
501         return sprintf(buf, "%u\n", cur_freq);
502 }
503
504
505 /**
506  * show_scaling_governor - show the current policy for the specified CPU
507  */
508 static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
509 {
510         if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
511                 return sprintf(buf, "powersave\n");
512         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
513                 return sprintf(buf, "performance\n");
514         else if (policy->governor)
515                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
516         return -EINVAL;
517 }
518
519
520 /**
521  * store_scaling_governor - store policy for the specified CPU
522  */
523 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
524                                         const char *buf, size_t count)
525 {
526         unsigned int ret = -EINVAL;
527         char    str_governor[16];
528         struct cpufreq_policy new_policy;
529
530         ret = cpufreq_get_policy(&new_policy, policy->cpu);
531         if (ret)
532                 return ret;
533
534         ret = sscanf (buf, "%15s", str_governor);
535         if (ret != 1)
536                 return -EINVAL;
537
538         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
539                                                 &new_policy.governor))
540                 return -EINVAL;
541
542         /* Do not use cpufreq_set_policy here or the user_policy.max
543            will be wrongly overridden */
544         ret = __cpufreq_set_policy(policy, &new_policy);
545
546         policy->user_policy.policy = policy->policy;
547         policy->user_policy.governor = policy->governor;
548
549         if (ret)
550                 return ret;
551         else
552                 return count;
553 }
554
555 /**
556  * show_scaling_driver - show the cpufreq driver currently loaded
557  */
558 static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf)
559 {
560         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
561 }
562
563 /**
564  * show_scaling_available_governors - show the available CPUfreq governors
565  */
566 static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy,
567                                                 char *buf)
568 {
569         ssize_t i = 0;
570         struct cpufreq_governor *t;
571
572         if (!cpufreq_driver->target) {
573                 i += sprintf(buf, "performance powersave");
574                 goto out;
575         }
576
577         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
578                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
579                         goto out;
580                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
581         }
582 out:
583         i += sprintf(&buf[i], "\n");
584         return i;
585 }
586
587 static ssize_t show_cpus(cpumask_t mask, char *buf)
588 {
589         ssize_t i = 0;
590         unsigned int cpu;
591
592         for_each_cpu_mask_nr(cpu, mask) {
593                 if (i)
594                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
595                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
596                 if (i >= (PAGE_SIZE - 5))
597                     break;
598         }
599         i += sprintf(&buf[i], "\n");
600         return i;
601 }
602
603 /**
604  * show_related_cpus - show the CPUs affected by each transition even if
605  * hw coordination is in use
606  */
607 static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf)
608 {
609         if (cpus_empty(policy->related_cpus))
610                 return show_cpus(policy->cpus, buf);
611         return show_cpus(policy->related_cpus, buf);
612 }
613
614 /**
615  * show_affected_cpus - show the CPUs affected by each transition
616  */
617 static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf)
618 {
619         return show_cpus(policy->cpus, buf);
620 }
621
622 static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy,
623                                         const char *buf, size_t count)
624 {
625         unsigned int freq = 0;
626         unsigned int ret;
627
628         if (!policy->governor || !policy->governor->store_setspeed)
629                 return -EINVAL;
630
631         ret = sscanf(buf, "%u", &freq);
632         if (ret != 1)
633                 return -EINVAL;
634
635         policy->governor->store_setspeed(policy, freq);
636
637         return count;
638 }
639
640 static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf)
641 {
642         if (!policy->governor || !policy->governor->show_setspeed)
643                 return sprintf(buf, "<unsupported>\n");
644
645         return policy->governor->show_setspeed(policy, buf);
646 }
647
648 #define define_one_ro(_name) \
649 static struct freq_attr _name = \
650 __ATTR(_name, 0444, show_##_name, NULL)
651
652 #define define_one_ro0400(_name) \
653 static struct freq_attr _name = \
654 __ATTR(_name, 0400, show_##_name, NULL)
655
656 #define define_one_rw(_name) \
657 static struct freq_attr _name = \
658 __ATTR(_name, 0644, show_##_name, store_##_name)
659
660 define_one_ro0400(cpuinfo_cur_freq);
661 define_one_ro(cpuinfo_min_freq);
662 define_one_ro(cpuinfo_max_freq);
663 define_one_ro(scaling_available_governors);
664 define_one_ro(scaling_driver);
665 define_one_ro(scaling_cur_freq);
666 define_one_ro(related_cpus);
667 define_one_ro(affected_cpus);
668 define_one_rw(scaling_min_freq);
669 define_one_rw(scaling_max_freq);
670 define_one_rw(scaling_governor);
671 define_one_rw(scaling_setspeed);
672
673 static struct attribute *default_attrs[] = {
674         &cpuinfo_min_freq.attr,
675         &cpuinfo_max_freq.attr,
676         &scaling_min_freq.attr,
677         &scaling_max_freq.attr,
678         &affected_cpus.attr,
679         &related_cpus.attr,
680         &scaling_governor.attr,
681         &scaling_driver.attr,
682         &scaling_available_governors.attr,
683         &scaling_setspeed.attr,
684         NULL
685 };
686
687 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
688 #define to_attr(a) container_of(a,struct freq_attr,attr)
689
690 static ssize_t show(struct kobject *kobj, struct attribute *attr ,char *buf)
691 {
692         struct cpufreq_policy *policy = to_policy(kobj);
693         struct freq_attr *fattr = to_attr(attr);
694         ssize_t ret = -EINVAL;
695         policy = cpufreq_cpu_get(policy->cpu);
696         if (!policy)
697                 goto no_policy;
698
699         if (lock_policy_rwsem_read(policy->cpu) < 0)
700                 goto fail;
701
702         if (fattr->show)
703                 ret = fattr->show(policy, buf);
704         else
705                 ret = -EIO;
706
707         unlock_policy_rwsem_read(policy->cpu);
708 fail:
709         cpufreq_cpu_put(policy);
710 no_policy:
711         return ret;
712 }
713
714 static ssize_t store(struct kobject *kobj, struct attribute *attr,
715                      const char *buf, size_t count)
716 {
717         struct cpufreq_policy *policy = to_policy(kobj);
718         struct freq_attr *fattr = to_attr(attr);
719         ssize_t ret = -EINVAL;
720         policy = cpufreq_cpu_get(policy->cpu);
721         if (!policy)
722                 goto no_policy;
723
724         if (lock_policy_rwsem_write(policy->cpu) < 0)
725                 goto fail;
726
727         if (fattr->store)
728                 ret = fattr->store(policy, buf, count);
729         else
730                 ret = -EIO;
731
732         unlock_policy_rwsem_write(policy->cpu);
733 fail:
734         cpufreq_cpu_put(policy);
735 no_policy:
736         return ret;
737 }
738
739 static void cpufreq_sysfs_release(struct kobject *kobj)
740 {
741         struct cpufreq_policy *policy = to_policy(kobj);
742         dprintk("last reference is dropped\n");
743         complete(&policy->kobj_unregister);
744 }
745
746 static struct sysfs_ops sysfs_ops = {
747         .show   = show,
748         .store  = store,
749 };
750
751 static struct kobj_type ktype_cpufreq = {
752         .sysfs_ops      = &sysfs_ops,
753         .default_attrs  = default_attrs,
754         .release        = cpufreq_sysfs_release,
755 };
756
757
758 /**
759  * cpufreq_add_dev - add a CPU device
760  *
761  * Adds the cpufreq interface for a CPU device.
762  */
763 static int cpufreq_add_dev(struct sys_device *sys_dev)
764 {
765         unsigned int cpu = sys_dev->id;
766         int ret = 0;
767         struct cpufreq_policy new_policy;
768         struct cpufreq_policy *policy;
769         struct freq_attr **drv_attr;
770         struct sys_device *cpu_sys_dev;
771         unsigned long flags;
772         unsigned int j;
773 #ifdef CONFIG_SMP
774         struct cpufreq_policy *managed_policy;
775 #endif
776
777         if (cpu_is_offline(cpu))
778                 return 0;
779
780         cpufreq_debug_disable_ratelimit();
781         dprintk("adding CPU %u\n", cpu);
782
783 #ifdef CONFIG_SMP
784         /* check whether a different CPU already registered this
785          * CPU because it is in the same boat. */
786         policy = cpufreq_cpu_get(cpu);
787         if (unlikely(policy)) {
788                 cpufreq_cpu_put(policy);
789                 cpufreq_debug_enable_ratelimit();
790                 return 0;
791         }
792 #endif
793
794         if (!try_module_get(cpufreq_driver->owner)) {
795                 ret = -EINVAL;
796                 goto module_out;
797         }
798
799         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
800         if (!policy) {
801                 ret = -ENOMEM;
802                 goto nomem_out;
803         }
804
805         policy->cpu = cpu;
806         policy->cpus = cpumask_of_cpu(cpu);
807
808         /* Initially set CPU itself as the policy_cpu */
809         per_cpu(policy_cpu, cpu) = cpu;
810         lock_policy_rwsem_write(cpu);
811
812         init_completion(&policy->kobj_unregister);
813         INIT_WORK(&policy->update, handle_update);
814
815         /* Set governor before ->init, so that driver could check it */
816         policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
817         /* call driver. From then on the cpufreq must be able
818          * to accept all calls to ->verify and ->setpolicy for this CPU
819          */
820         ret = cpufreq_driver->init(policy);
821         if (ret) {
822                 dprintk("initialization failed\n");
823                 goto err_out;
824         }
825         policy->user_policy.min = policy->cpuinfo.min_freq;
826         policy->user_policy.max = policy->cpuinfo.max_freq;
827
828         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
829                                      CPUFREQ_START, policy);
830
831 #ifdef CONFIG_SMP
832
833 #ifdef CONFIG_HOTPLUG_CPU
834         if (per_cpu(cpufreq_cpu_governor, cpu)) {
835                 policy->governor = per_cpu(cpufreq_cpu_governor, cpu);
836                 dprintk("Restoring governor %s for cpu %d\n",
837                        policy->governor->name, cpu);
838         }
839 #endif
840
841         for_each_cpu_mask_nr(j, policy->cpus) {
842                 if (cpu == j)
843                         continue;
844
845                 /* check for existing affected CPUs.  They may not be aware
846                  * of it due to CPU Hotplug.
847                  */
848                 managed_policy = cpufreq_cpu_get(j);            // FIXME: Where is this released?  What about error paths?
849                 if (unlikely(managed_policy)) {
850
851                         /* Set proper policy_cpu */
852                         unlock_policy_rwsem_write(cpu);
853                         per_cpu(policy_cpu, cpu) = managed_policy->cpu;
854
855                         if (lock_policy_rwsem_write(cpu) < 0)
856                                 goto err_out_driver_exit;
857
858                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
859                         managed_policy->cpus = policy->cpus;
860                         per_cpu(cpufreq_cpu_data, cpu) = managed_policy;
861                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
862
863                         dprintk("CPU already managed, adding link\n");
864                         ret = sysfs_create_link(&sys_dev->kobj,
865                                                 &managed_policy->kobj,
866                                                 "cpufreq");
867                         if (ret)
868                                 goto err_out_driver_exit;
869
870                         cpufreq_debug_enable_ratelimit();
871                         ret = 0;
872                         goto err_out_driver_exit; /* call driver->exit() */
873                 }
874         }
875 #endif
876         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
877
878         /* prepare interface data */
879         ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj,
880                                    "cpufreq");
881         if (ret)
882                 goto err_out_driver_exit;
883
884         /* set up files for this cpu device */
885         drv_attr = cpufreq_driver->attr;
886         while ((drv_attr) && (*drv_attr)) {
887                 ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
888                 if (ret)
889                         goto err_out_driver_exit;
890                 drv_attr++;
891         }
892         if (cpufreq_driver->get) {
893                 ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
894                 if (ret)
895                         goto err_out_driver_exit;
896         }
897         if (cpufreq_driver->target) {
898                 ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
899                 if (ret)
900                         goto err_out_driver_exit;
901         }
902
903         spin_lock_irqsave(&cpufreq_driver_lock, flags);
904         for_each_cpu_mask_nr(j, policy->cpus) {
905                 per_cpu(cpufreq_cpu_data, j) = policy;
906                 per_cpu(policy_cpu, j) = policy->cpu;
907         }
908         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
909
910         /* symlink affected CPUs */
911         for_each_cpu_mask_nr(j, policy->cpus) {
912                 if (j == cpu)
913                         continue;
914                 if (!cpu_online(j))
915                         continue;
916
917                 dprintk("CPU %u already managed, adding link\n", j);
918                 cpufreq_cpu_get(cpu);
919                 cpu_sys_dev = get_cpu_sysdev(j);
920                 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
921                                         "cpufreq");
922                 if (ret)
923                         goto err_out_unregister;
924         }
925
926         policy->governor = NULL; /* to assure that the starting sequence is
927                                   * run in cpufreq_set_policy */
928
929         /* set default policy */
930         ret = __cpufreq_set_policy(policy, &new_policy);
931         policy->user_policy.policy = policy->policy;
932         policy->user_policy.governor = policy->governor;
933
934         if (ret) {
935                 dprintk("setting policy failed\n");
936                 goto err_out_unregister;
937         }
938
939         unlock_policy_rwsem_write(cpu);
940
941         kobject_uevent(&policy->kobj, KOBJ_ADD);
942         module_put(cpufreq_driver->owner);
943         dprintk("initialization complete\n");
944         cpufreq_debug_enable_ratelimit();
945
946         return 0;
947
948
949 err_out_unregister:
950         spin_lock_irqsave(&cpufreq_driver_lock, flags);
951         for_each_cpu_mask_nr(j, policy->cpus)
952                 per_cpu(cpufreq_cpu_data, j) = NULL;
953         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
954
955         kobject_put(&policy->kobj);
956         wait_for_completion(&policy->kobj_unregister);
957
958 err_out_driver_exit:
959         if (cpufreq_driver->exit)
960                 cpufreq_driver->exit(policy);
961
962 err_out:
963         unlock_policy_rwsem_write(cpu);
964         kfree(policy);
965
966 nomem_out:
967         module_put(cpufreq_driver->owner);
968 module_out:
969         cpufreq_debug_enable_ratelimit();
970         return ret;
971 }
972
973
974 /**
975  * __cpufreq_remove_dev - remove a CPU device
976  *
977  * Removes the cpufreq interface for a CPU device.
978  * Caller should already have policy_rwsem in write mode for this CPU.
979  * This routine frees the rwsem before returning.
980  */
981 static int __cpufreq_remove_dev(struct sys_device *sys_dev)
982 {
983         unsigned int cpu = sys_dev->id;
984         unsigned long flags;
985         struct cpufreq_policy *data;
986 #ifdef CONFIG_SMP
987         struct sys_device *cpu_sys_dev;
988         unsigned int j;
989 #endif
990
991         cpufreq_debug_disable_ratelimit();
992         dprintk("unregistering CPU %u\n", cpu);
993
994         spin_lock_irqsave(&cpufreq_driver_lock, flags);
995         data = per_cpu(cpufreq_cpu_data, cpu);
996
997         if (!data) {
998                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
999                 cpufreq_debug_enable_ratelimit();
1000                 unlock_policy_rwsem_write(cpu);
1001                 return -EINVAL;
1002         }
1003         per_cpu(cpufreq_cpu_data, cpu) = NULL;
1004
1005
1006 #ifdef CONFIG_SMP
1007         /* if this isn't the CPU which is the parent of the kobj, we
1008          * only need to unlink, put and exit
1009          */
1010         if (unlikely(cpu != data->cpu)) {
1011                 dprintk("removing link\n");
1012                 cpu_clear(cpu, data->cpus);
1013                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1014                 sysfs_remove_link(&sys_dev->kobj, "cpufreq");
1015                 cpufreq_cpu_put(data);
1016                 cpufreq_debug_enable_ratelimit();
1017                 unlock_policy_rwsem_write(cpu);
1018                 return 0;
1019         }
1020 #endif
1021
1022 #ifdef CONFIG_SMP
1023
1024 #ifdef CONFIG_HOTPLUG_CPU
1025         per_cpu(cpufreq_cpu_governor, cpu) = data->governor;
1026 #endif
1027
1028         /* if we have other CPUs still registered, we need to unlink them,
1029          * or else wait_for_completion below will lock up. Clean the
1030          * per_cpu(cpufreq_cpu_data) while holding the lock, and remove
1031          * the sysfs links afterwards.
1032          */
1033         if (unlikely(cpus_weight(data->cpus) > 1)) {
1034                 for_each_cpu_mask_nr(j, data->cpus) {
1035                         if (j == cpu)
1036                                 continue;
1037                         per_cpu(cpufreq_cpu_data, j) = NULL;
1038                 }
1039         }
1040
1041         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1042
1043         if (unlikely(cpus_weight(data->cpus) > 1)) {
1044                 for_each_cpu_mask_nr(j, data->cpus) {
1045                         if (j == cpu)
1046                                 continue;
1047                         dprintk("removing link for cpu %u\n", j);
1048 #ifdef CONFIG_HOTPLUG_CPU
1049                         per_cpu(cpufreq_cpu_governor, j) = data->governor;
1050 #endif
1051                         cpu_sys_dev = get_cpu_sysdev(j);
1052                         sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
1053                         cpufreq_cpu_put(data);
1054                 }
1055         }
1056 #else
1057         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1058 #endif
1059
1060         if (cpufreq_driver->target)
1061                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1062
1063         unlock_policy_rwsem_write(cpu);
1064
1065         kobject_put(&data->kobj);
1066
1067         /* we need to make sure that the underlying kobj is actually
1068          * not referenced anymore by anybody before we proceed with
1069          * unloading.
1070          */
1071         dprintk("waiting for dropping of refcount\n");
1072         wait_for_completion(&data->kobj_unregister);
1073         dprintk("wait complete\n");
1074
1075         if (cpufreq_driver->exit)
1076                 cpufreq_driver->exit(data);
1077
1078         kfree(data);
1079
1080         cpufreq_debug_enable_ratelimit();
1081         return 0;
1082 }
1083
1084
1085 static int cpufreq_remove_dev(struct sys_device *sys_dev)
1086 {
1087         unsigned int cpu = sys_dev->id;
1088         int retval;
1089
1090         if (cpu_is_offline(cpu))
1091                 return 0;
1092
1093         if (unlikely(lock_policy_rwsem_write(cpu)))
1094                 BUG();
1095
1096         retval = __cpufreq_remove_dev(sys_dev);
1097         return retval;
1098 }
1099
1100
1101 static void handle_update(struct work_struct *work)
1102 {
1103         struct cpufreq_policy *policy =
1104                 container_of(work, struct cpufreq_policy, update);
1105         unsigned int cpu = policy->cpu;
1106         dprintk("handle_update for cpu %u called\n", cpu);
1107         cpufreq_update_policy(cpu);
1108 }
1109
1110 /**
1111  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1112  *      @cpu: cpu number
1113  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1114  *      @new_freq: CPU frequency the CPU actually runs at
1115  *
1116  *      We adjust to current frequency first, and need to clean up later. So either call
1117  *      to cpufreq_update_policy() or schedule handle_update()).
1118  */
1119 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1120                                 unsigned int new_freq)
1121 {
1122         struct cpufreq_freqs freqs;
1123
1124         dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1125                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1126
1127         freqs.cpu = cpu;
1128         freqs.old = old_freq;
1129         freqs.new = new_freq;
1130         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1131         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1132 }
1133
1134
1135 /**
1136  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1137  * @cpu: CPU number
1138  *
1139  * This is the last known freq, without actually getting it from the driver.
1140  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1141  */
1142 unsigned int cpufreq_quick_get(unsigned int cpu)
1143 {
1144         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1145         unsigned int ret_freq = 0;
1146
1147         if (policy) {
1148                 ret_freq = policy->cur;
1149                 cpufreq_cpu_put(policy);
1150         }
1151
1152         return ret_freq;
1153 }
1154 EXPORT_SYMBOL(cpufreq_quick_get);
1155
1156
1157 static unsigned int __cpufreq_get(unsigned int cpu)
1158 {
1159         struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu);
1160         unsigned int ret_freq = 0;
1161
1162         if (!cpufreq_driver->get)
1163                 return ret_freq;
1164
1165         ret_freq = cpufreq_driver->get(cpu);
1166
1167         if (ret_freq && policy->cur &&
1168                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1169                 /* verify no discrepancy between actual and
1170                                         saved value exists */
1171                 if (unlikely(ret_freq != policy->cur)) {
1172                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1173                         schedule_work(&policy->update);
1174                 }
1175         }
1176
1177         return ret_freq;
1178 }
1179
1180 /**
1181  * cpufreq_get - get the current CPU frequency (in kHz)
1182  * @cpu: CPU number
1183  *
1184  * Get the CPU current (static) CPU frequency
1185  */
1186 unsigned int cpufreq_get(unsigned int cpu)
1187 {
1188         unsigned int ret_freq = 0;
1189         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1190
1191         if (!policy)
1192                 goto out;
1193
1194         if (unlikely(lock_policy_rwsem_read(cpu)))
1195                 goto out_policy;
1196
1197         ret_freq = __cpufreq_get(cpu);
1198
1199         unlock_policy_rwsem_read(cpu);
1200
1201 out_policy:
1202         cpufreq_cpu_put(policy);
1203 out:
1204         return ret_freq;
1205 }
1206 EXPORT_SYMBOL(cpufreq_get);
1207
1208
1209 /**
1210  *      cpufreq_suspend - let the low level driver prepare for suspend
1211  */
1212
1213 static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg)
1214 {
1215         int cpu = sysdev->id;
1216         int ret = 0;
1217         unsigned int cur_freq = 0;
1218         struct cpufreq_policy *cpu_policy;
1219
1220         dprintk("suspending cpu %u\n", cpu);
1221
1222         if (!cpu_online(cpu))
1223                 return 0;
1224
1225         /* we may be lax here as interrupts are off. Nonetheless
1226          * we need to grab the correct cpu policy, as to check
1227          * whether we really run on this CPU.
1228          */
1229
1230         cpu_policy = cpufreq_cpu_get(cpu);
1231         if (!cpu_policy)
1232                 return -EINVAL;
1233
1234         /* only handle each CPU group once */
1235         if (unlikely(cpu_policy->cpu != cpu))
1236                 goto out;
1237
1238         if (cpufreq_driver->suspend) {
1239                 ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1240                 if (ret) {
1241                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1242                                         "step on CPU %u\n", cpu_policy->cpu);
1243                         goto out;
1244                 }
1245         }
1246
1247         if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1248                 goto out;
1249
1250         if (cpufreq_driver->get)
1251                 cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1252
1253         if (!cur_freq || !cpu_policy->cur) {
1254                 printk(KERN_ERR "cpufreq: suspend failed to assert current "
1255                        "frequency is what timing core thinks it is.\n");
1256                 goto out;
1257         }
1258
1259         if (unlikely(cur_freq != cpu_policy->cur)) {
1260                 struct cpufreq_freqs freqs;
1261
1262                 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1263                         dprintk("Warning: CPU frequency is %u, "
1264                                "cpufreq assumed %u kHz.\n",
1265                                cur_freq, cpu_policy->cur);
1266
1267                 freqs.cpu = cpu;
1268                 freqs.old = cpu_policy->cur;
1269                 freqs.new = cur_freq;
1270
1271                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1272                                     CPUFREQ_SUSPENDCHANGE, &freqs);
1273                 adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1274
1275                 cpu_policy->cur = cur_freq;
1276         }
1277
1278 out:
1279         cpufreq_cpu_put(cpu_policy);
1280         return ret;
1281 }
1282
1283 /**
1284  *      cpufreq_resume -  restore proper CPU frequency handling after resume
1285  *
1286  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1287  *      2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1288  *      3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1289  *          restored.
1290  */
1291 static int cpufreq_resume(struct sys_device *sysdev)
1292 {
1293         int cpu = sysdev->id;
1294         int ret = 0;
1295         struct cpufreq_policy *cpu_policy;
1296
1297         dprintk("resuming cpu %u\n", cpu);
1298
1299         if (!cpu_online(cpu))
1300                 return 0;
1301
1302         /* we may be lax here as interrupts are off. Nonetheless
1303          * we need to grab the correct cpu policy, as to check
1304          * whether we really run on this CPU.
1305          */
1306
1307         cpu_policy = cpufreq_cpu_get(cpu);
1308         if (!cpu_policy)
1309                 return -EINVAL;
1310
1311         /* only handle each CPU group once */
1312         if (unlikely(cpu_policy->cpu != cpu))
1313                 goto fail;
1314
1315         if (cpufreq_driver->resume) {
1316                 ret = cpufreq_driver->resume(cpu_policy);
1317                 if (ret) {
1318                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1319                                         "step on CPU %u\n", cpu_policy->cpu);
1320                         goto fail;
1321                 }
1322         }
1323
1324         if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1325                 unsigned int cur_freq = 0;
1326
1327                 if (cpufreq_driver->get)
1328                         cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1329
1330                 if (!cur_freq || !cpu_policy->cur) {
1331                         printk(KERN_ERR "cpufreq: resume failed to assert "
1332                                         "current frequency is what timing core "
1333                                         "thinks it is.\n");
1334                         goto out;
1335                 }
1336
1337                 if (unlikely(cur_freq != cpu_policy->cur)) {
1338                         struct cpufreq_freqs freqs;
1339
1340                         if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1341                                 dprintk("Warning: CPU frequency "
1342                                        "is %u, cpufreq assumed %u kHz.\n",
1343                                        cur_freq, cpu_policy->cur);
1344
1345                         freqs.cpu = cpu;
1346                         freqs.old = cpu_policy->cur;
1347                         freqs.new = cur_freq;
1348
1349                         srcu_notifier_call_chain(
1350                                         &cpufreq_transition_notifier_list,
1351                                         CPUFREQ_RESUMECHANGE, &freqs);
1352                         adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1353
1354                         cpu_policy->cur = cur_freq;
1355                 }
1356         }
1357
1358 out:
1359         schedule_work(&cpu_policy->update);
1360 fail:
1361         cpufreq_cpu_put(cpu_policy);
1362         return ret;
1363 }
1364
1365 static struct sysdev_driver cpufreq_sysdev_driver = {
1366         .add            = cpufreq_add_dev,
1367         .remove         = cpufreq_remove_dev,
1368         .suspend        = cpufreq_suspend,
1369         .resume         = cpufreq_resume,
1370 };
1371
1372
1373 /*********************************************************************
1374  *                     NOTIFIER LISTS INTERFACE                      *
1375  *********************************************************************/
1376
1377 /**
1378  *      cpufreq_register_notifier - register a driver with cpufreq
1379  *      @nb: notifier function to register
1380  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1381  *
1382  *      Add a driver to one of two lists: either a list of drivers that
1383  *      are notified about clock rate changes (once before and once after
1384  *      the transition), or a list of drivers that are notified about
1385  *      changes in cpufreq policy.
1386  *
1387  *      This function may sleep, and has the same return conditions as
1388  *      blocking_notifier_chain_register.
1389  */
1390 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1391 {
1392         int ret;
1393
1394         WARN_ON(!init_cpufreq_transition_notifier_list_called);
1395
1396         switch (list) {
1397         case CPUFREQ_TRANSITION_NOTIFIER:
1398                 ret = srcu_notifier_chain_register(
1399                                 &cpufreq_transition_notifier_list, nb);
1400                 break;
1401         case CPUFREQ_POLICY_NOTIFIER:
1402                 ret = blocking_notifier_chain_register(
1403                                 &cpufreq_policy_notifier_list, nb);
1404                 break;
1405         default:
1406                 ret = -EINVAL;
1407         }
1408
1409         return ret;
1410 }
1411 EXPORT_SYMBOL(cpufreq_register_notifier);
1412
1413
1414 /**
1415  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1416  *      @nb: notifier block to be unregistered
1417  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1418  *
1419  *      Remove a driver from the CPU frequency notifier list.
1420  *
1421  *      This function may sleep, and has the same return conditions as
1422  *      blocking_notifier_chain_unregister.
1423  */
1424 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1425 {
1426         int ret;
1427
1428         switch (list) {
1429         case CPUFREQ_TRANSITION_NOTIFIER:
1430                 ret = srcu_notifier_chain_unregister(
1431                                 &cpufreq_transition_notifier_list, nb);
1432                 break;
1433         case CPUFREQ_POLICY_NOTIFIER:
1434                 ret = blocking_notifier_chain_unregister(
1435                                 &cpufreq_policy_notifier_list, nb);
1436                 break;
1437         default:
1438                 ret = -EINVAL;
1439         }
1440
1441         return ret;
1442 }
1443 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1444
1445
1446 /*********************************************************************
1447  *                              GOVERNORS                            *
1448  *********************************************************************/
1449
1450
1451 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1452                             unsigned int target_freq,
1453                             unsigned int relation)
1454 {
1455         int retval = -EINVAL;
1456
1457         dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1458                 target_freq, relation);
1459         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1460                 retval = cpufreq_driver->target(policy, target_freq, relation);
1461
1462         return retval;
1463 }
1464 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1465
1466 int cpufreq_driver_target(struct cpufreq_policy *policy,
1467                           unsigned int target_freq,
1468                           unsigned int relation)
1469 {
1470         int ret = -EINVAL;
1471
1472         policy = cpufreq_cpu_get(policy->cpu);
1473         if (!policy)
1474                 goto no_policy;
1475
1476         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1477                 goto fail;
1478
1479         ret = __cpufreq_driver_target(policy, target_freq, relation);
1480
1481         unlock_policy_rwsem_write(policy->cpu);
1482
1483 fail:
1484         cpufreq_cpu_put(policy);
1485 no_policy:
1486         return ret;
1487 }
1488 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1489
1490 int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu)
1491 {
1492         int ret = 0;
1493
1494         policy = cpufreq_cpu_get(policy->cpu);
1495         if (!policy)
1496                 return -EINVAL;
1497
1498         if (cpu_online(cpu) && cpufreq_driver->getavg)
1499                 ret = cpufreq_driver->getavg(policy, cpu);
1500
1501         cpufreq_cpu_put(policy);
1502         return ret;
1503 }
1504 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1505
1506 /*
1507  * when "event" is CPUFREQ_GOV_LIMITS
1508  */
1509
1510 static int __cpufreq_governor(struct cpufreq_policy *policy,
1511                                         unsigned int event)
1512 {
1513         int ret;
1514
1515         /* Only must be defined when default governor is known to have latency
1516            restrictions, like e.g. conservative or ondemand.
1517            That this is the case is already ensured in Kconfig
1518         */
1519 #ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE
1520         struct cpufreq_governor *gov = &cpufreq_gov_performance;
1521 #else
1522         struct cpufreq_governor *gov = NULL;
1523 #endif
1524
1525         if (policy->governor->max_transition_latency &&
1526             policy->cpuinfo.transition_latency >
1527             policy->governor->max_transition_latency) {
1528                 if (!gov)
1529                         return -EINVAL;
1530                 else {
1531                         printk(KERN_WARNING "%s governor failed, too long"
1532                                " transition latency of HW, fallback"
1533                                " to %s governor\n",
1534                                policy->governor->name,
1535                                gov->name);
1536                         policy->governor = gov;
1537                 }
1538         }
1539
1540         if (!try_module_get(policy->governor->owner))
1541                 return -EINVAL;
1542
1543         dprintk("__cpufreq_governor for CPU %u, event %u\n",
1544                                                 policy->cpu, event);
1545         ret = policy->governor->governor(policy, event);
1546
1547         /* we keep one module reference alive for
1548                         each CPU governed by this CPU */
1549         if ((event != CPUFREQ_GOV_START) || ret)
1550                 module_put(policy->governor->owner);
1551         if ((event == CPUFREQ_GOV_STOP) && !ret)
1552                 module_put(policy->governor->owner);
1553
1554         return ret;
1555 }
1556
1557
1558 int cpufreq_register_governor(struct cpufreq_governor *governor)
1559 {
1560         int err;
1561
1562         if (!governor)
1563                 return -EINVAL;
1564
1565         mutex_lock(&cpufreq_governor_mutex);
1566
1567         err = -EBUSY;
1568         if (__find_governor(governor->name) == NULL) {
1569                 err = 0;
1570                 list_add(&governor->governor_list, &cpufreq_governor_list);
1571         }
1572
1573         mutex_unlock(&cpufreq_governor_mutex);
1574         return err;
1575 }
1576 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1577
1578
1579 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1580 {
1581         if (!governor)
1582                 return;
1583
1584         mutex_lock(&cpufreq_governor_mutex);
1585         list_del(&governor->governor_list);
1586         mutex_unlock(&cpufreq_governor_mutex);
1587         return;
1588 }
1589 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1590
1591
1592
1593 /*********************************************************************
1594  *                          POLICY INTERFACE                         *
1595  *********************************************************************/
1596
1597 /**
1598  * cpufreq_get_policy - get the current cpufreq_policy
1599  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1600  *
1601  * Reads the current cpufreq policy.
1602  */
1603 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1604 {
1605         struct cpufreq_policy *cpu_policy;
1606         if (!policy)
1607                 return -EINVAL;
1608
1609         cpu_policy = cpufreq_cpu_get(cpu);
1610         if (!cpu_policy)
1611                 return -EINVAL;
1612
1613         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1614
1615         cpufreq_cpu_put(cpu_policy);
1616         return 0;
1617 }
1618 EXPORT_SYMBOL(cpufreq_get_policy);
1619
1620
1621 /*
1622  * data   : current policy.
1623  * policy : policy to be set.
1624  */
1625 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1626                                 struct cpufreq_policy *policy)
1627 {
1628         int ret = 0;
1629
1630         cpufreq_debug_disable_ratelimit();
1631         dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1632                 policy->min, policy->max);
1633
1634         memcpy(&policy->cpuinfo, &data->cpuinfo,
1635                                 sizeof(struct cpufreq_cpuinfo));
1636
1637         if (policy->min > data->max || policy->max < data->min) {
1638                 ret = -EINVAL;
1639                 goto error_out;
1640         }
1641
1642         /* verify the cpu speed can be set within this limit */
1643         ret = cpufreq_driver->verify(policy);
1644         if (ret)
1645                 goto error_out;
1646
1647         /* adjust if necessary - all reasons */
1648         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1649                         CPUFREQ_ADJUST, policy);
1650
1651         /* adjust if necessary - hardware incompatibility*/
1652         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1653                         CPUFREQ_INCOMPATIBLE, policy);
1654
1655         /* verify the cpu speed can be set within this limit,
1656            which might be different to the first one */
1657         ret = cpufreq_driver->verify(policy);
1658         if (ret)
1659                 goto error_out;
1660
1661         /* notification of the new policy */
1662         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1663                         CPUFREQ_NOTIFY, policy);
1664
1665         data->min = policy->min;
1666         data->max = policy->max;
1667
1668         dprintk("new min and max freqs are %u - %u kHz\n",
1669                                         data->min, data->max);
1670
1671         if (cpufreq_driver->setpolicy) {
1672                 data->policy = policy->policy;
1673                 dprintk("setting range\n");
1674                 ret = cpufreq_driver->setpolicy(policy);
1675         } else {
1676                 if (policy->governor != data->governor) {
1677                         /* save old, working values */
1678                         struct cpufreq_governor *old_gov = data->governor;
1679
1680                         dprintk("governor switch\n");
1681
1682                         /* end old governor */
1683                         if (data->governor)
1684                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1685
1686                         /* start new governor */
1687                         data->governor = policy->governor;
1688                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1689                                 /* new governor failed, so re-start old one */
1690                                 dprintk("starting governor %s failed\n",
1691                                                         data->governor->name);
1692                                 if (old_gov) {
1693                                         data->governor = old_gov;
1694                                         __cpufreq_governor(data,
1695                                                            CPUFREQ_GOV_START);
1696                                 }
1697                                 ret = -EINVAL;
1698                                 goto error_out;
1699                         }
1700                         /* might be a policy change, too, so fall through */
1701                 }
1702                 dprintk("governor: change or update limits\n");
1703                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1704         }
1705
1706 error_out:
1707         cpufreq_debug_enable_ratelimit();
1708         return ret;
1709 }
1710
1711 /**
1712  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1713  *      @cpu: CPU which shall be re-evaluated
1714  *
1715  *      Usefull for policy notifiers which have different necessities
1716  *      at different times.
1717  */
1718 int cpufreq_update_policy(unsigned int cpu)
1719 {
1720         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1721         struct cpufreq_policy policy;
1722         int ret;
1723
1724         if (!data) {
1725                 ret = -ENODEV;
1726                 goto no_policy;
1727         }
1728
1729         if (unlikely(lock_policy_rwsem_write(cpu))) {
1730                 ret = -EINVAL;
1731                 goto fail;
1732         }
1733
1734         dprintk("updating policy for CPU %u\n", cpu);
1735         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1736         policy.min = data->user_policy.min;
1737         policy.max = data->user_policy.max;
1738         policy.policy = data->user_policy.policy;
1739         policy.governor = data->user_policy.governor;
1740
1741         /* BIOS might change freq behind our back
1742           -> ask driver for current freq and notify governors about a change */
1743         if (cpufreq_driver->get) {
1744                 policy.cur = cpufreq_driver->get(cpu);
1745                 if (!data->cur) {
1746                         dprintk("Driver did not initialize current freq");
1747                         data->cur = policy.cur;
1748                 } else {
1749                         if (data->cur != policy.cur)
1750                                 cpufreq_out_of_sync(cpu, data->cur,
1751                                                                 policy.cur);
1752                 }
1753         }
1754
1755         ret = __cpufreq_set_policy(data, &policy);
1756
1757         unlock_policy_rwsem_write(cpu);
1758
1759 fail:
1760         cpufreq_cpu_put(data);
1761 no_policy:
1762         return ret;
1763 }
1764 EXPORT_SYMBOL(cpufreq_update_policy);
1765
1766 static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb,
1767                                         unsigned long action, void *hcpu)
1768 {
1769         unsigned int cpu = (unsigned long)hcpu;
1770         struct sys_device *sys_dev;
1771
1772         sys_dev = get_cpu_sysdev(cpu);
1773         if (sys_dev) {
1774                 switch (action) {
1775                 case CPU_ONLINE:
1776                 case CPU_ONLINE_FROZEN:
1777                         cpufreq_add_dev(sys_dev);
1778                         break;
1779                 case CPU_DOWN_PREPARE:
1780                 case CPU_DOWN_PREPARE_FROZEN:
1781                         if (unlikely(lock_policy_rwsem_write(cpu)))
1782                                 BUG();
1783
1784                         __cpufreq_remove_dev(sys_dev);
1785                         break;
1786                 case CPU_DOWN_FAILED:
1787                 case CPU_DOWN_FAILED_FROZEN:
1788                         cpufreq_add_dev(sys_dev);
1789                         break;
1790                 }
1791         }
1792         return NOTIFY_OK;
1793 }
1794
1795 static struct notifier_block __refdata cpufreq_cpu_notifier =
1796 {
1797     .notifier_call = cpufreq_cpu_callback,
1798 };
1799
1800 /*********************************************************************
1801  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1802  *********************************************************************/
1803
1804 /**
1805  * cpufreq_register_driver - register a CPU Frequency driver
1806  * @driver_data: A struct cpufreq_driver containing the values#
1807  * submitted by the CPU Frequency driver.
1808  *
1809  *   Registers a CPU Frequency driver to this core code. This code
1810  * returns zero on success, -EBUSY when another driver got here first
1811  * (and isn't unregistered in the meantime).
1812  *
1813  */
1814 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1815 {
1816         unsigned long flags;
1817         int ret;
1818
1819         if (!driver_data || !driver_data->verify || !driver_data->init ||
1820             ((!driver_data->setpolicy) && (!driver_data->target)))
1821                 return -EINVAL;
1822
1823         dprintk("trying to register driver %s\n", driver_data->name);
1824
1825         if (driver_data->setpolicy)
1826                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1827
1828         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1829         if (cpufreq_driver) {
1830                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1831                 return -EBUSY;
1832         }
1833         cpufreq_driver = driver_data;
1834         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1835
1836         ret = sysdev_driver_register(&cpu_sysdev_class,
1837                                         &cpufreq_sysdev_driver);
1838
1839         if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1840                 int i;
1841                 ret = -ENODEV;
1842
1843                 /* check for at least one working CPU */
1844                 for (i = 0; i < nr_cpu_ids; i++)
1845                         if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) {
1846                                 ret = 0;
1847                                 break;
1848                         }
1849
1850                 /* if all ->init() calls failed, unregister */
1851                 if (ret) {
1852                         dprintk("no CPU initialized for driver %s\n",
1853                                                         driver_data->name);
1854                         sysdev_driver_unregister(&cpu_sysdev_class,
1855                                                 &cpufreq_sysdev_driver);
1856
1857                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1858                         cpufreq_driver = NULL;
1859                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1860                 }
1861         }
1862
1863         if (!ret) {
1864                 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1865                 dprintk("driver %s up and running\n", driver_data->name);
1866                 cpufreq_debug_enable_ratelimit();
1867         }
1868
1869         return ret;
1870 }
1871 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1872
1873
1874 /**
1875  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1876  *
1877  *    Unregister the current CPUFreq driver. Only call this if you have
1878  * the right to do so, i.e. if you have succeeded in initialising before!
1879  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1880  * currently not initialised.
1881  */
1882 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1883 {
1884         unsigned long flags;
1885
1886         cpufreq_debug_disable_ratelimit();
1887
1888         if (!cpufreq_driver || (driver != cpufreq_driver)) {
1889                 cpufreq_debug_enable_ratelimit();
1890                 return -EINVAL;
1891         }
1892
1893         dprintk("unregistering driver %s\n", driver->name);
1894
1895         sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1896         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1897
1898         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1899         cpufreq_driver = NULL;
1900         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1901
1902         return 0;
1903 }
1904 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1905
1906 static int __init cpufreq_core_init(void)
1907 {
1908         int cpu;
1909
1910         for_each_possible_cpu(cpu) {
1911                 per_cpu(policy_cpu, cpu) = -1;
1912                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1913         }
1914         return 0;
1915 }
1916
1917 core_initcall(cpufreq_core_init);