[PATCH] correct slow acpi_pm rating
[linux-2.6] / drivers / cpufreq / cpufreq.c
1 /*
2  *  linux/drivers/cpufreq/cpufreq.c
3  *
4  *  Copyright (C) 2001 Russell King
5  *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
6  *
7  *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
8  *      Added handling for CPU hotplug
9  *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
10  *      Fix handling for CPU hotplug -- affected CPUs
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/notifier.h>
22 #include <linux/cpufreq.h>
23 #include <linux/delay.h>
24 #include <linux/interrupt.h>
25 #include <linux/spinlock.h>
26 #include <linux/device.h>
27 #include <linux/slab.h>
28 #include <linux/cpu.h>
29 #include <linux/completion.h>
30 #include <linux/mutex.h>
31
32 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
33                                                 "cpufreq-core", msg)
34
35 /**
36  * The "cpufreq driver" - the arch- or hardware-dependent low
37  * level driver of CPUFreq support, and its spinlock. This lock
38  * also protects the cpufreq_cpu_data array.
39  */
40 static struct cpufreq_driver *cpufreq_driver;
41 static struct cpufreq_policy *cpufreq_cpu_data[NR_CPUS];
42 static DEFINE_SPINLOCK(cpufreq_driver_lock);
43
44 /*
45  * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure
46  * all cpufreq/hotplug/workqueue/etc related lock issues.
47  *
48  * The rules for this semaphore:
49  * - Any routine that wants to read from the policy structure will
50  *   do a down_read on this semaphore.
51  * - Any routine that will write to the policy structure and/or may take away
52  *   the policy altogether (eg. CPU hotplug), will hold this lock in write
53  *   mode before doing so.
54  *
55  * Additional rules:
56  * - All holders of the lock should check to make sure that the CPU they
57  *   are concerned with are online after they get the lock.
58  * - Governor routines that can be called in cpufreq hotplug path should not
59  *   take this sem as top level hotplug notifier handler takes this.
60  */
61 static DEFINE_PER_CPU(int, policy_cpu);
62 static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem);
63
64 #define lock_policy_rwsem(mode, cpu)                                    \
65 int lock_policy_rwsem_##mode                                            \
66 (int cpu)                                                               \
67 {                                                                       \
68         int policy_cpu = per_cpu(policy_cpu, cpu);                      \
69         BUG_ON(policy_cpu == -1);                                       \
70         down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));            \
71         if (unlikely(!cpu_online(cpu))) {                               \
72                 up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu));      \
73                 return -1;                                              \
74         }                                                               \
75                                                                         \
76         return 0;                                                       \
77 }
78
79 lock_policy_rwsem(read, cpu);
80 EXPORT_SYMBOL_GPL(lock_policy_rwsem_read);
81
82 lock_policy_rwsem(write, cpu);
83 EXPORT_SYMBOL_GPL(lock_policy_rwsem_write);
84
85 void unlock_policy_rwsem_read(int cpu)
86 {
87         int policy_cpu = per_cpu(policy_cpu, cpu);
88         BUG_ON(policy_cpu == -1);
89         up_read(&per_cpu(cpu_policy_rwsem, policy_cpu));
90 }
91 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read);
92
93 void unlock_policy_rwsem_write(int cpu)
94 {
95         int policy_cpu = per_cpu(policy_cpu, cpu);
96         BUG_ON(policy_cpu == -1);
97         up_write(&per_cpu(cpu_policy_rwsem, policy_cpu));
98 }
99 EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write);
100
101
102 /* internal prototypes */
103 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
104 static unsigned int __cpufreq_get(unsigned int cpu);
105 static void handle_update(struct work_struct *work);
106
107 /**
108  * Two notifier lists: the "policy" list is involved in the
109  * validation process for a new CPU frequency policy; the
110  * "transition" list for kernel code that needs to handle
111  * changes to devices when the CPU clock speed changes.
112  * The mutex locks both lists.
113  */
114 static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list);
115 static struct srcu_notifier_head cpufreq_transition_notifier_list;
116
117 static int __init init_cpufreq_transition_notifier_list(void)
118 {
119         srcu_init_notifier_head(&cpufreq_transition_notifier_list);
120         return 0;
121 }
122 pure_initcall(init_cpufreq_transition_notifier_list);
123
124 static LIST_HEAD(cpufreq_governor_list);
125 static DEFINE_MUTEX (cpufreq_governor_mutex);
126
127 struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu)
128 {
129         struct cpufreq_policy *data;
130         unsigned long flags;
131
132         if (cpu >= NR_CPUS)
133                 goto err_out;
134
135         /* get the cpufreq driver */
136         spin_lock_irqsave(&cpufreq_driver_lock, flags);
137
138         if (!cpufreq_driver)
139                 goto err_out_unlock;
140
141         if (!try_module_get(cpufreq_driver->owner))
142                 goto err_out_unlock;
143
144
145         /* get the CPU */
146         data = cpufreq_cpu_data[cpu];
147
148         if (!data)
149                 goto err_out_put_module;
150
151         if (!kobject_get(&data->kobj))
152                 goto err_out_put_module;
153
154         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
155         return data;
156
157 err_out_put_module:
158         module_put(cpufreq_driver->owner);
159 err_out_unlock:
160         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
161 err_out:
162         return NULL;
163 }
164 EXPORT_SYMBOL_GPL(cpufreq_cpu_get);
165
166
167 void cpufreq_cpu_put(struct cpufreq_policy *data)
168 {
169         kobject_put(&data->kobj);
170         module_put(cpufreq_driver->owner);
171 }
172 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
173
174
175 /*********************************************************************
176  *                     UNIFIED DEBUG HELPERS                         *
177  *********************************************************************/
178 #ifdef CONFIG_CPU_FREQ_DEBUG
179
180 /* what part(s) of the CPUfreq subsystem are debugged? */
181 static unsigned int debug;
182
183 /* is the debug output ratelimit'ed using printk_ratelimit? User can
184  * set or modify this value.
185  */
186 static unsigned int debug_ratelimit = 1;
187
188 /* is the printk_ratelimit'ing enabled? It's enabled after a successful
189  * loading of a cpufreq driver, temporarily disabled when a new policy
190  * is set, and disabled upon cpufreq driver removal
191  */
192 static unsigned int disable_ratelimit = 1;
193 static DEFINE_SPINLOCK(disable_ratelimit_lock);
194
195 static void cpufreq_debug_enable_ratelimit(void)
196 {
197         unsigned long flags;
198
199         spin_lock_irqsave(&disable_ratelimit_lock, flags);
200         if (disable_ratelimit)
201                 disable_ratelimit--;
202         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
203 }
204
205 static void cpufreq_debug_disable_ratelimit(void)
206 {
207         unsigned long flags;
208
209         spin_lock_irqsave(&disable_ratelimit_lock, flags);
210         disable_ratelimit++;
211         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
212 }
213
214 void cpufreq_debug_printk(unsigned int type, const char *prefix,
215                                                         const char *fmt, ...)
216 {
217         char s[256];
218         va_list args;
219         unsigned int len;
220         unsigned long flags;
221
222         WARN_ON(!prefix);
223         if (type & debug) {
224                 spin_lock_irqsave(&disable_ratelimit_lock, flags);
225                 if (!disable_ratelimit && debug_ratelimit
226                                         && !printk_ratelimit()) {
227                         spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
228                         return;
229                 }
230                 spin_unlock_irqrestore(&disable_ratelimit_lock, flags);
231
232                 len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix);
233
234                 va_start(args, fmt);
235                 len += vsnprintf(&s[len], (256 - len), fmt, args);
236                 va_end(args);
237
238                 printk(s);
239
240                 WARN_ON(len < 5);
241         }
242 }
243 EXPORT_SYMBOL(cpufreq_debug_printk);
244
245
246 module_param(debug, uint, 0644);
247 MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core,"
248                         " 2 to debug drivers, and 4 to debug governors.");
249
250 module_param(debug_ratelimit, uint, 0644);
251 MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:"
252                                         " set to 0 to disable ratelimiting.");
253
254 #else /* !CONFIG_CPU_FREQ_DEBUG */
255
256 static inline void cpufreq_debug_enable_ratelimit(void) { return; }
257 static inline void cpufreq_debug_disable_ratelimit(void) { return; }
258
259 #endif /* CONFIG_CPU_FREQ_DEBUG */
260
261
262 /*********************************************************************
263  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
264  *********************************************************************/
265
266 /**
267  * adjust_jiffies - adjust the system "loops_per_jiffy"
268  *
269  * This function alters the system "loops_per_jiffy" for the clock
270  * speed change. Note that loops_per_jiffy cannot be updated on SMP
271  * systems as each CPU might be scaled differently. So, use the arch
272  * per-CPU loops_per_jiffy value wherever possible.
273  */
274 #ifndef CONFIG_SMP
275 static unsigned long l_p_j_ref;
276 static unsigned int  l_p_j_ref_freq;
277
278 static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
279 {
280         if (ci->flags & CPUFREQ_CONST_LOOPS)
281                 return;
282
283         if (!l_p_j_ref_freq) {
284                 l_p_j_ref = loops_per_jiffy;
285                 l_p_j_ref_freq = ci->old;
286                 dprintk("saving %lu as reference value for loops_per_jiffy;"
287                         "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq);
288         }
289         if ((val == CPUFREQ_PRECHANGE  && ci->old < ci->new) ||
290             (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) ||
291             (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) {
292                 loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq,
293                                                                 ci->new);
294                 dprintk("scaling loops_per_jiffy to %lu"
295                         "for frequency %u kHz\n", loops_per_jiffy, ci->new);
296         }
297 }
298 #else
299 static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci)
300 {
301         return;
302 }
303 #endif
304
305
306 /**
307  * cpufreq_notify_transition - call notifier chain and adjust_jiffies
308  * on frequency transition.
309  *
310  * This function calls the transition notifiers and the "adjust_jiffies"
311  * function. It is called twice on all CPU frequency changes that have
312  * external effects.
313  */
314 void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
315 {
316         struct cpufreq_policy *policy;
317
318         BUG_ON(irqs_disabled());
319
320         freqs->flags = cpufreq_driver->flags;
321         dprintk("notification %u of frequency transition to %u kHz\n",
322                 state, freqs->new);
323
324         policy = cpufreq_cpu_data[freqs->cpu];
325         switch (state) {
326
327         case CPUFREQ_PRECHANGE:
328                 /* detect if the driver reported a value as "old frequency"
329                  * which is not equal to what the cpufreq core thinks is
330                  * "old frequency".
331                  */
332                 if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
333                         if ((policy) && (policy->cpu == freqs->cpu) &&
334                             (policy->cur) && (policy->cur != freqs->old)) {
335                                 dprintk("Warning: CPU frequency is"
336                                         " %u, cpufreq assumed %u kHz.\n",
337                                         freqs->old, policy->cur);
338                                 freqs->old = policy->cur;
339                         }
340                 }
341                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
342                                 CPUFREQ_PRECHANGE, freqs);
343                 adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
344                 break;
345
346         case CPUFREQ_POSTCHANGE:
347                 adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
348                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
349                                 CPUFREQ_POSTCHANGE, freqs);
350                 if (likely(policy) && likely(policy->cpu == freqs->cpu))
351                         policy->cur = freqs->new;
352                 break;
353         }
354 }
355 EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
356
357
358
359 /*********************************************************************
360  *                          SYSFS INTERFACE                          *
361  *********************************************************************/
362
363 static struct cpufreq_governor *__find_governor(const char *str_governor)
364 {
365         struct cpufreq_governor *t;
366
367         list_for_each_entry(t, &cpufreq_governor_list, governor_list)
368                 if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN))
369                         return t;
370
371         return NULL;
372 }
373
374 /**
375  * cpufreq_parse_governor - parse a governor string
376  */
377 static int cpufreq_parse_governor (char *str_governor, unsigned int *policy,
378                                 struct cpufreq_governor **governor)
379 {
380         int err = -EINVAL;
381
382         if (!cpufreq_driver)
383                 goto out;
384
385         if (cpufreq_driver->setpolicy) {
386                 if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
387                         *policy = CPUFREQ_POLICY_PERFORMANCE;
388                         err = 0;
389                 } else if (!strnicmp(str_governor, "powersave",
390                                                 CPUFREQ_NAME_LEN)) {
391                         *policy = CPUFREQ_POLICY_POWERSAVE;
392                         err = 0;
393                 }
394         } else if (cpufreq_driver->target) {
395                 struct cpufreq_governor *t;
396
397                 mutex_lock(&cpufreq_governor_mutex);
398
399                 t = __find_governor(str_governor);
400
401                 if (t == NULL) {
402                         char *name = kasprintf(GFP_KERNEL, "cpufreq_%s",
403                                                                 str_governor);
404
405                         if (name) {
406                                 int ret;
407
408                                 mutex_unlock(&cpufreq_governor_mutex);
409                                 ret = request_module(name);
410                                 mutex_lock(&cpufreq_governor_mutex);
411
412                                 if (ret == 0)
413                                         t = __find_governor(str_governor);
414                         }
415
416                         kfree(name);
417                 }
418
419                 if (t != NULL) {
420                         *governor = t;
421                         err = 0;
422                 }
423
424                 mutex_unlock(&cpufreq_governor_mutex);
425         }
426   out:
427         return err;
428 }
429
430
431 /* drivers/base/cpu.c */
432 extern struct sysdev_class cpu_sysdev_class;
433
434
435 /**
436  * cpufreq_per_cpu_attr_read() / show_##file_name() -
437  * print out cpufreq information
438  *
439  * Write out information from cpufreq_driver->policy[cpu]; object must be
440  * "unsigned int".
441  */
442
443 #define show_one(file_name, object)                     \
444 static ssize_t show_##file_name                         \
445 (struct cpufreq_policy * policy, char *buf)             \
446 {                                                       \
447         return sprintf (buf, "%u\n", policy->object);   \
448 }
449
450 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
451 show_one(cpuinfo_max_freq, cpuinfo.max_freq);
452 show_one(scaling_min_freq, min);
453 show_one(scaling_max_freq, max);
454 show_one(scaling_cur_freq, cur);
455
456 static int __cpufreq_set_policy(struct cpufreq_policy *data,
457                                 struct cpufreq_policy *policy);
458
459 /**
460  * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access
461  */
462 #define store_one(file_name, object)                    \
463 static ssize_t store_##file_name                                        \
464 (struct cpufreq_policy * policy, const char *buf, size_t count)         \
465 {                                                                       \
466         unsigned int ret = -EINVAL;                                     \
467         struct cpufreq_policy new_policy;                               \
468                                                                         \
469         ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
470         if (ret)                                                        \
471                 return -EINVAL;                                         \
472                                                                         \
473         ret = sscanf (buf, "%u", &new_policy.object);                   \
474         if (ret != 1)                                                   \
475                 return -EINVAL;                                         \
476                                                                         \
477         ret = __cpufreq_set_policy(policy, &new_policy);                \
478         policy->user_policy.object = policy->object;                    \
479                                                                         \
480         return ret ? ret : count;                                       \
481 }
482
483 store_one(scaling_min_freq,min);
484 store_one(scaling_max_freq,max);
485
486 /**
487  * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware
488  */
489 static ssize_t show_cpuinfo_cur_freq (struct cpufreq_policy * policy,
490                                                         char *buf)
491 {
492         unsigned int cur_freq = __cpufreq_get(policy->cpu);
493         if (!cur_freq)
494                 return sprintf(buf, "<unknown>");
495         return sprintf(buf, "%u\n", cur_freq);
496 }
497
498
499 /**
500  * show_scaling_governor - show the current policy for the specified CPU
501  */
502 static ssize_t show_scaling_governor (struct cpufreq_policy * policy,
503                                                         char *buf)
504 {
505         if(policy->policy == CPUFREQ_POLICY_POWERSAVE)
506                 return sprintf(buf, "powersave\n");
507         else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE)
508                 return sprintf(buf, "performance\n");
509         else if (policy->governor)
510                 return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name);
511         return -EINVAL;
512 }
513
514
515 /**
516  * store_scaling_governor - store policy for the specified CPU
517  */
518 static ssize_t store_scaling_governor (struct cpufreq_policy * policy,
519                                        const char *buf, size_t count)
520 {
521         unsigned int ret = -EINVAL;
522         char    str_governor[16];
523         struct cpufreq_policy new_policy;
524
525         ret = cpufreq_get_policy(&new_policy, policy->cpu);
526         if (ret)
527                 return ret;
528
529         ret = sscanf (buf, "%15s", str_governor);
530         if (ret != 1)
531                 return -EINVAL;
532
533         if (cpufreq_parse_governor(str_governor, &new_policy.policy,
534                                                 &new_policy.governor))
535                 return -EINVAL;
536
537         /* Do not use cpufreq_set_policy here or the user_policy.max
538            will be wrongly overridden */
539         ret = __cpufreq_set_policy(policy, &new_policy);
540
541         policy->user_policy.policy = policy->policy;
542         policy->user_policy.governor = policy->governor;
543
544         if (ret)
545                 return ret;
546         else
547                 return count;
548 }
549
550 /**
551  * show_scaling_driver - show the cpufreq driver currently loaded
552  */
553 static ssize_t show_scaling_driver (struct cpufreq_policy * policy, char *buf)
554 {
555         return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name);
556 }
557
558 /**
559  * show_scaling_available_governors - show the available CPUfreq governors
560  */
561 static ssize_t show_scaling_available_governors (struct cpufreq_policy *policy,
562                                 char *buf)
563 {
564         ssize_t i = 0;
565         struct cpufreq_governor *t;
566
567         if (!cpufreq_driver->target) {
568                 i += sprintf(buf, "performance powersave");
569                 goto out;
570         }
571
572         list_for_each_entry(t, &cpufreq_governor_list, governor_list) {
573                 if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2)))
574                         goto out;
575                 i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name);
576         }
577 out:
578         i += sprintf(&buf[i], "\n");
579         return i;
580 }
581 /**
582  * show_affected_cpus - show the CPUs affected by each transition
583  */
584 static ssize_t show_affected_cpus (struct cpufreq_policy * policy, char *buf)
585 {
586         ssize_t i = 0;
587         unsigned int cpu;
588
589         for_each_cpu_mask(cpu, policy->cpus) {
590                 if (i)
591                         i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " ");
592                 i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu);
593                 if (i >= (PAGE_SIZE - 5))
594                     break;
595         }
596         i += sprintf(&buf[i], "\n");
597         return i;
598 }
599
600
601 #define define_one_ro(_name) \
602 static struct freq_attr _name = \
603 __ATTR(_name, 0444, show_##_name, NULL)
604
605 #define define_one_ro0400(_name) \
606 static struct freq_attr _name = \
607 __ATTR(_name, 0400, show_##_name, NULL)
608
609 #define define_one_rw(_name) \
610 static struct freq_attr _name = \
611 __ATTR(_name, 0644, show_##_name, store_##_name)
612
613 define_one_ro0400(cpuinfo_cur_freq);
614 define_one_ro(cpuinfo_min_freq);
615 define_one_ro(cpuinfo_max_freq);
616 define_one_ro(scaling_available_governors);
617 define_one_ro(scaling_driver);
618 define_one_ro(scaling_cur_freq);
619 define_one_ro(affected_cpus);
620 define_one_rw(scaling_min_freq);
621 define_one_rw(scaling_max_freq);
622 define_one_rw(scaling_governor);
623
624 static struct attribute * default_attrs[] = {
625         &cpuinfo_min_freq.attr,
626         &cpuinfo_max_freq.attr,
627         &scaling_min_freq.attr,
628         &scaling_max_freq.attr,
629         &affected_cpus.attr,
630         &scaling_governor.attr,
631         &scaling_driver.attr,
632         &scaling_available_governors.attr,
633         NULL
634 };
635
636 #define to_policy(k) container_of(k,struct cpufreq_policy,kobj)
637 #define to_attr(a) container_of(a,struct freq_attr,attr)
638
639 static ssize_t show(struct kobject * kobj, struct attribute * attr ,char * buf)
640 {
641         struct cpufreq_policy * policy = to_policy(kobj);
642         struct freq_attr * fattr = to_attr(attr);
643         ssize_t ret;
644         policy = cpufreq_cpu_get(policy->cpu);
645         if (!policy)
646                 return -EINVAL;
647
648         if (lock_policy_rwsem_read(policy->cpu) < 0)
649                 return -EINVAL;
650
651         if (fattr->show)
652                 ret = fattr->show(policy, buf);
653         else
654                 ret = -EIO;
655
656         unlock_policy_rwsem_read(policy->cpu);
657
658         cpufreq_cpu_put(policy);
659         return ret;
660 }
661
662 static ssize_t store(struct kobject * kobj, struct attribute * attr,
663                      const char * buf, size_t count)
664 {
665         struct cpufreq_policy * policy = to_policy(kobj);
666         struct freq_attr * fattr = to_attr(attr);
667         ssize_t ret;
668         policy = cpufreq_cpu_get(policy->cpu);
669         if (!policy)
670                 return -EINVAL;
671
672         if (lock_policy_rwsem_write(policy->cpu) < 0)
673                 return -EINVAL;
674
675         if (fattr->store)
676                 ret = fattr->store(policy, buf, count);
677         else
678                 ret = -EIO;
679
680         unlock_policy_rwsem_write(policy->cpu);
681
682         cpufreq_cpu_put(policy);
683         return ret;
684 }
685
686 static void cpufreq_sysfs_release(struct kobject * kobj)
687 {
688         struct cpufreq_policy * policy = to_policy(kobj);
689         dprintk("last reference is dropped\n");
690         complete(&policy->kobj_unregister);
691 }
692
693 static struct sysfs_ops sysfs_ops = {
694         .show   = show,
695         .store  = store,
696 };
697
698 static struct kobj_type ktype_cpufreq = {
699         .sysfs_ops      = &sysfs_ops,
700         .default_attrs  = default_attrs,
701         .release        = cpufreq_sysfs_release,
702 };
703
704
705 /**
706  * cpufreq_add_dev - add a CPU device
707  *
708  * Adds the cpufreq interface for a CPU device.
709  */
710 static int cpufreq_add_dev (struct sys_device * sys_dev)
711 {
712         unsigned int cpu = sys_dev->id;
713         int ret = 0;
714         struct cpufreq_policy new_policy;
715         struct cpufreq_policy *policy;
716         struct freq_attr **drv_attr;
717         struct sys_device *cpu_sys_dev;
718         unsigned long flags;
719         unsigned int j;
720 #ifdef CONFIG_SMP
721         struct cpufreq_policy *managed_policy;
722 #endif
723
724         if (cpu_is_offline(cpu))
725                 return 0;
726
727         cpufreq_debug_disable_ratelimit();
728         dprintk("adding CPU %u\n", cpu);
729
730 #ifdef CONFIG_SMP
731         /* check whether a different CPU already registered this
732          * CPU because it is in the same boat. */
733         policy = cpufreq_cpu_get(cpu);
734         if (unlikely(policy)) {
735                 cpufreq_cpu_put(policy);
736                 cpufreq_debug_enable_ratelimit();
737                 return 0;
738         }
739 #endif
740
741         if (!try_module_get(cpufreq_driver->owner)) {
742                 ret = -EINVAL;
743                 goto module_out;
744         }
745
746         policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL);
747         if (!policy) {
748                 ret = -ENOMEM;
749                 goto nomem_out;
750         }
751
752         policy->cpu = cpu;
753         policy->cpus = cpumask_of_cpu(cpu);
754
755         /* Initially set CPU itself as the policy_cpu */
756         per_cpu(policy_cpu, cpu) = cpu;
757         lock_policy_rwsem_write(cpu);
758
759         init_completion(&policy->kobj_unregister);
760         INIT_WORK(&policy->update, handle_update);
761
762         /* call driver. From then on the cpufreq must be able
763          * to accept all calls to ->verify and ->setpolicy for this CPU
764          */
765         ret = cpufreq_driver->init(policy);
766         if (ret) {
767                 dprintk("initialization failed\n");
768                 unlock_policy_rwsem_write(cpu);
769                 goto err_out;
770         }
771
772 #ifdef CONFIG_SMP
773         for_each_cpu_mask(j, policy->cpus) {
774                 if (cpu == j)
775                         continue;
776
777                 /* check for existing affected CPUs.  They may not be aware
778                  * of it due to CPU Hotplug.
779                  */
780                 managed_policy = cpufreq_cpu_get(j);
781                 if (unlikely(managed_policy)) {
782
783                         /* Set proper policy_cpu */
784                         unlock_policy_rwsem_write(cpu);
785                         per_cpu(policy_cpu, cpu) = managed_policy->cpu;
786
787                         if (lock_policy_rwsem_write(cpu) < 0)
788                                 goto err_out_driver_exit;
789
790                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
791                         managed_policy->cpus = policy->cpus;
792                         cpufreq_cpu_data[cpu] = managed_policy;
793                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
794
795                         dprintk("CPU already managed, adding link\n");
796                         ret = sysfs_create_link(&sys_dev->kobj,
797                                                 &managed_policy->kobj,
798                                                 "cpufreq");
799                         if (ret) {
800                                 unlock_policy_rwsem_write(cpu);
801                                 goto err_out_driver_exit;
802                         }
803
804                         cpufreq_debug_enable_ratelimit();
805                         ret = 0;
806                         unlock_policy_rwsem_write(cpu);
807                         goto err_out_driver_exit; /* call driver->exit() */
808                 }
809         }
810 #endif
811         memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
812
813         /* prepare interface data */
814         policy->kobj.parent = &sys_dev->kobj;
815         policy->kobj.ktype = &ktype_cpufreq;
816         strlcpy(policy->kobj.name, "cpufreq", KOBJ_NAME_LEN);
817
818         ret = kobject_register(&policy->kobj);
819         if (ret) {
820                 unlock_policy_rwsem_write(cpu);
821                 goto err_out_driver_exit;
822         }
823         /* set up files for this cpu device */
824         drv_attr = cpufreq_driver->attr;
825         while ((drv_attr) && (*drv_attr)) {
826                 sysfs_create_file(&policy->kobj, &((*drv_attr)->attr));
827                 drv_attr++;
828         }
829         if (cpufreq_driver->get)
830                 sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr);
831         if (cpufreq_driver->target)
832                 sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr);
833
834         spin_lock_irqsave(&cpufreq_driver_lock, flags);
835         for_each_cpu_mask(j, policy->cpus) {
836                 cpufreq_cpu_data[j] = policy;
837                 per_cpu(policy_cpu, j) = policy->cpu;
838         }
839         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
840
841         /* symlink affected CPUs */
842         for_each_cpu_mask(j, policy->cpus) {
843                 if (j == cpu)
844                         continue;
845                 if (!cpu_online(j))
846                         continue;
847
848                 dprintk("CPU %u already managed, adding link\n", j);
849                 cpufreq_cpu_get(cpu);
850                 cpu_sys_dev = get_cpu_sysdev(j);
851                 ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj,
852                                         "cpufreq");
853                 if (ret) {
854                         unlock_policy_rwsem_write(cpu);
855                         goto err_out_unregister;
856                 }
857         }
858
859         policy->governor = NULL; /* to assure that the starting sequence is
860                                   * run in cpufreq_set_policy */
861         unlock_policy_rwsem_write(cpu);
862
863         /* set default policy */
864         ret = cpufreq_set_policy(&new_policy);
865         if (ret) {
866                 dprintk("setting policy failed\n");
867                 goto err_out_unregister;
868         }
869
870         module_put(cpufreq_driver->owner);
871         dprintk("initialization complete\n");
872         cpufreq_debug_enable_ratelimit();
873
874         return 0;
875
876
877 err_out_unregister:
878         spin_lock_irqsave(&cpufreq_driver_lock, flags);
879         for_each_cpu_mask(j, policy->cpus)
880                 cpufreq_cpu_data[j] = NULL;
881         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
882
883         kobject_unregister(&policy->kobj);
884         wait_for_completion(&policy->kobj_unregister);
885
886 err_out_driver_exit:
887         if (cpufreq_driver->exit)
888                 cpufreq_driver->exit(policy);
889
890 err_out:
891         kfree(policy);
892
893 nomem_out:
894         module_put(cpufreq_driver->owner);
895 module_out:
896         cpufreq_debug_enable_ratelimit();
897         return ret;
898 }
899
900
901 /**
902  * __cpufreq_remove_dev - remove a CPU device
903  *
904  * Removes the cpufreq interface for a CPU device.
905  * Caller should already have policy_rwsem in write mode for this CPU.
906  * This routine frees the rwsem before returning.
907  */
908 static int __cpufreq_remove_dev (struct sys_device * sys_dev)
909 {
910         unsigned int cpu = sys_dev->id;
911         unsigned long flags;
912         struct cpufreq_policy *data;
913 #ifdef CONFIG_SMP
914         struct sys_device *cpu_sys_dev;
915         unsigned int j;
916 #endif
917
918         cpufreq_debug_disable_ratelimit();
919         dprintk("unregistering CPU %u\n", cpu);
920
921         spin_lock_irqsave(&cpufreq_driver_lock, flags);
922         data = cpufreq_cpu_data[cpu];
923
924         if (!data) {
925                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
926                 cpufreq_debug_enable_ratelimit();
927                 unlock_policy_rwsem_write(cpu);
928                 return -EINVAL;
929         }
930         cpufreq_cpu_data[cpu] = NULL;
931
932
933 #ifdef CONFIG_SMP
934         /* if this isn't the CPU which is the parent of the kobj, we
935          * only need to unlink, put and exit
936          */
937         if (unlikely(cpu != data->cpu)) {
938                 dprintk("removing link\n");
939                 cpu_clear(cpu, data->cpus);
940                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
941                 sysfs_remove_link(&sys_dev->kobj, "cpufreq");
942                 cpufreq_cpu_put(data);
943                 cpufreq_debug_enable_ratelimit();
944                 unlock_policy_rwsem_write(cpu);
945                 return 0;
946         }
947 #endif
948
949
950         if (!kobject_get(&data->kobj)) {
951                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
952                 cpufreq_debug_enable_ratelimit();
953                 unlock_policy_rwsem_write(cpu);
954                 return -EFAULT;
955         }
956
957 #ifdef CONFIG_SMP
958         /* if we have other CPUs still registered, we need to unlink them,
959          * or else wait_for_completion below will lock up. Clean the
960          * cpufreq_cpu_data[] while holding the lock, and remove the sysfs
961          * links afterwards.
962          */
963         if (unlikely(cpus_weight(data->cpus) > 1)) {
964                 for_each_cpu_mask(j, data->cpus) {
965                         if (j == cpu)
966                                 continue;
967                         cpufreq_cpu_data[j] = NULL;
968                 }
969         }
970
971         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
972
973         if (unlikely(cpus_weight(data->cpus) > 1)) {
974                 for_each_cpu_mask(j, data->cpus) {
975                         if (j == cpu)
976                                 continue;
977                         dprintk("removing link for cpu %u\n", j);
978                         cpu_sys_dev = get_cpu_sysdev(j);
979                         sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq");
980                         cpufreq_cpu_put(data);
981                 }
982         }
983 #else
984         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
985 #endif
986
987         if (cpufreq_driver->target)
988                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
989
990         unlock_policy_rwsem_write(cpu);
991
992         kobject_unregister(&data->kobj);
993
994         kobject_put(&data->kobj);
995
996         /* we need to make sure that the underlying kobj is actually
997          * not referenced anymore by anybody before we proceed with
998          * unloading.
999          */
1000         dprintk("waiting for dropping of refcount\n");
1001         wait_for_completion(&data->kobj_unregister);
1002         dprintk("wait complete\n");
1003
1004         if (cpufreq_driver->exit)
1005                 cpufreq_driver->exit(data);
1006
1007         kfree(data);
1008
1009         cpufreq_debug_enable_ratelimit();
1010         return 0;
1011 }
1012
1013
1014 static int cpufreq_remove_dev (struct sys_device * sys_dev)
1015 {
1016         unsigned int cpu = sys_dev->id;
1017         int retval;
1018
1019         if (cpu_is_offline(cpu))
1020                 return 0;
1021
1022         if (unlikely(lock_policy_rwsem_write(cpu)))
1023                 BUG();
1024
1025         retval = __cpufreq_remove_dev(sys_dev);
1026         return retval;
1027 }
1028
1029
1030 static void handle_update(struct work_struct *work)
1031 {
1032         struct cpufreq_policy *policy =
1033                 container_of(work, struct cpufreq_policy, update);
1034         unsigned int cpu = policy->cpu;
1035         dprintk("handle_update for cpu %u called\n", cpu);
1036         cpufreq_update_policy(cpu);
1037 }
1038
1039 /**
1040  *      cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble.
1041  *      @cpu: cpu number
1042  *      @old_freq: CPU frequency the kernel thinks the CPU runs at
1043  *      @new_freq: CPU frequency the CPU actually runs at
1044  *
1045  *      We adjust to current frequency first, and need to clean up later. So either call
1046  *      to cpufreq_update_policy() or schedule handle_update()).
1047  */
1048 static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq,
1049                                 unsigned int new_freq)
1050 {
1051         struct cpufreq_freqs freqs;
1052
1053         dprintk("Warning: CPU frequency out of sync: cpufreq and timing "
1054                "core thinks of %u, is %u kHz.\n", old_freq, new_freq);
1055
1056         freqs.cpu = cpu;
1057         freqs.old = old_freq;
1058         freqs.new = new_freq;
1059         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
1060         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
1061 }
1062
1063
1064 /**
1065  * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur
1066  * @cpu: CPU number
1067  *
1068  * This is the last known freq, without actually getting it from the driver.
1069  * Return value will be same as what is shown in scaling_cur_freq in sysfs.
1070  */
1071 unsigned int cpufreq_quick_get(unsigned int cpu)
1072 {
1073         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1074         unsigned int ret_freq = 0;
1075
1076         if (policy) {
1077                 if (unlikely(lock_policy_rwsem_read(cpu)))
1078                         return ret_freq;
1079
1080                 ret_freq = policy->cur;
1081
1082                 unlock_policy_rwsem_read(cpu);
1083                 cpufreq_cpu_put(policy);
1084         }
1085
1086         return (ret_freq);
1087 }
1088 EXPORT_SYMBOL(cpufreq_quick_get);
1089
1090
1091 static unsigned int __cpufreq_get(unsigned int cpu)
1092 {
1093         struct cpufreq_policy *policy = cpufreq_cpu_data[cpu];
1094         unsigned int ret_freq = 0;
1095
1096         if (!cpufreq_driver->get)
1097                 return (ret_freq);
1098
1099         ret_freq = cpufreq_driver->get(cpu);
1100
1101         if (ret_freq && policy->cur &&
1102                 !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1103                 /* verify no discrepancy between actual and
1104                                         saved value exists */
1105                 if (unlikely(ret_freq != policy->cur)) {
1106                         cpufreq_out_of_sync(cpu, policy->cur, ret_freq);
1107                         schedule_work(&policy->update);
1108                 }
1109         }
1110
1111         return (ret_freq);
1112 }
1113
1114 /**
1115  * cpufreq_get - get the current CPU frequency (in kHz)
1116  * @cpu: CPU number
1117  *
1118  * Get the CPU current (static) CPU frequency
1119  */
1120 unsigned int cpufreq_get(unsigned int cpu)
1121 {
1122         unsigned int ret_freq = 0;
1123         struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
1124
1125         if (!policy)
1126                 goto out;
1127
1128         if (unlikely(lock_policy_rwsem_read(cpu)))
1129                 goto out_policy;
1130
1131         ret_freq = __cpufreq_get(cpu);
1132
1133         unlock_policy_rwsem_read(cpu);
1134
1135 out_policy:
1136         cpufreq_cpu_put(policy);
1137 out:
1138         return (ret_freq);
1139 }
1140 EXPORT_SYMBOL(cpufreq_get);
1141
1142
1143 /**
1144  *      cpufreq_suspend - let the low level driver prepare for suspend
1145  */
1146
1147 static int cpufreq_suspend(struct sys_device * sysdev, pm_message_t pmsg)
1148 {
1149         int cpu = sysdev->id;
1150         int ret = 0;
1151         unsigned int cur_freq = 0;
1152         struct cpufreq_policy *cpu_policy;
1153
1154         dprintk("suspending cpu %u\n", cpu);
1155
1156         if (!cpu_online(cpu))
1157                 return 0;
1158
1159         /* we may be lax here as interrupts are off. Nonetheless
1160          * we need to grab the correct cpu policy, as to check
1161          * whether we really run on this CPU.
1162          */
1163
1164         cpu_policy = cpufreq_cpu_get(cpu);
1165         if (!cpu_policy)
1166                 return -EINVAL;
1167
1168         /* only handle each CPU group once */
1169         if (unlikely(cpu_policy->cpu != cpu)) {
1170                 cpufreq_cpu_put(cpu_policy);
1171                 return 0;
1172         }
1173
1174         if (cpufreq_driver->suspend) {
1175                 ret = cpufreq_driver->suspend(cpu_policy, pmsg);
1176                 if (ret) {
1177                         printk(KERN_ERR "cpufreq: suspend failed in ->suspend "
1178                                         "step on CPU %u\n", cpu_policy->cpu);
1179                         cpufreq_cpu_put(cpu_policy);
1180                         return ret;
1181                 }
1182         }
1183
1184
1185         if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)
1186                 goto out;
1187
1188         if (cpufreq_driver->get)
1189                 cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1190
1191         if (!cur_freq || !cpu_policy->cur) {
1192                 printk(KERN_ERR "cpufreq: suspend failed to assert current "
1193                        "frequency is what timing core thinks it is.\n");
1194                 goto out;
1195         }
1196
1197         if (unlikely(cur_freq != cpu_policy->cur)) {
1198                 struct cpufreq_freqs freqs;
1199
1200                 if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1201                         dprintk("Warning: CPU frequency is %u, "
1202                                "cpufreq assumed %u kHz.\n",
1203                                cur_freq, cpu_policy->cur);
1204
1205                 freqs.cpu = cpu;
1206                 freqs.old = cpu_policy->cur;
1207                 freqs.new = cur_freq;
1208
1209                 srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
1210                                     CPUFREQ_SUSPENDCHANGE, &freqs);
1211                 adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs);
1212
1213                 cpu_policy->cur = cur_freq;
1214         }
1215
1216 out:
1217         cpufreq_cpu_put(cpu_policy);
1218         return 0;
1219 }
1220
1221 /**
1222  *      cpufreq_resume -  restore proper CPU frequency handling after resume
1223  *
1224  *      1.) resume CPUfreq hardware support (cpufreq_driver->resume())
1225  *      2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync
1226  *      3.) schedule call cpufreq_update_policy() ASAP as interrupts are
1227  *          restored.
1228  */
1229 static int cpufreq_resume(struct sys_device * sysdev)
1230 {
1231         int cpu = sysdev->id;
1232         int ret = 0;
1233         struct cpufreq_policy *cpu_policy;
1234
1235         dprintk("resuming cpu %u\n", cpu);
1236
1237         if (!cpu_online(cpu))
1238                 return 0;
1239
1240         /* we may be lax here as interrupts are off. Nonetheless
1241          * we need to grab the correct cpu policy, as to check
1242          * whether we really run on this CPU.
1243          */
1244
1245         cpu_policy = cpufreq_cpu_get(cpu);
1246         if (!cpu_policy)
1247                 return -EINVAL;
1248
1249         /* only handle each CPU group once */
1250         if (unlikely(cpu_policy->cpu != cpu)) {
1251                 cpufreq_cpu_put(cpu_policy);
1252                 return 0;
1253         }
1254
1255         if (cpufreq_driver->resume) {
1256                 ret = cpufreq_driver->resume(cpu_policy);
1257                 if (ret) {
1258                         printk(KERN_ERR "cpufreq: resume failed in ->resume "
1259                                         "step on CPU %u\n", cpu_policy->cpu);
1260                         cpufreq_cpu_put(cpu_policy);
1261                         return ret;
1262                 }
1263         }
1264
1265         if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) {
1266                 unsigned int cur_freq = 0;
1267
1268                 if (cpufreq_driver->get)
1269                         cur_freq = cpufreq_driver->get(cpu_policy->cpu);
1270
1271                 if (!cur_freq || !cpu_policy->cur) {
1272                         printk(KERN_ERR "cpufreq: resume failed to assert "
1273                                         "current frequency is what timing core "
1274                                         "thinks it is.\n");
1275                         goto out;
1276                 }
1277
1278                 if (unlikely(cur_freq != cpu_policy->cur)) {
1279                         struct cpufreq_freqs freqs;
1280
1281                         if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN))
1282                                 dprintk("Warning: CPU frequency"
1283                                        "is %u, cpufreq assumed %u kHz.\n",
1284                                        cur_freq, cpu_policy->cur);
1285
1286                         freqs.cpu = cpu;
1287                         freqs.old = cpu_policy->cur;
1288                         freqs.new = cur_freq;
1289
1290                         srcu_notifier_call_chain(
1291                                         &cpufreq_transition_notifier_list,
1292                                         CPUFREQ_RESUMECHANGE, &freqs);
1293                         adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs);
1294
1295                         cpu_policy->cur = cur_freq;
1296                 }
1297         }
1298
1299 out:
1300         schedule_work(&cpu_policy->update);
1301         cpufreq_cpu_put(cpu_policy);
1302         return ret;
1303 }
1304
1305 static struct sysdev_driver cpufreq_sysdev_driver = {
1306         .add            = cpufreq_add_dev,
1307         .remove         = cpufreq_remove_dev,
1308         .suspend        = cpufreq_suspend,
1309         .resume         = cpufreq_resume,
1310 };
1311
1312
1313 /*********************************************************************
1314  *                     NOTIFIER LISTS INTERFACE                      *
1315  *********************************************************************/
1316
1317 /**
1318  *      cpufreq_register_notifier - register a driver with cpufreq
1319  *      @nb: notifier function to register
1320  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1321  *
1322  *      Add a driver to one of two lists: either a list of drivers that
1323  *      are notified about clock rate changes (once before and once after
1324  *      the transition), or a list of drivers that are notified about
1325  *      changes in cpufreq policy.
1326  *
1327  *      This function may sleep, and has the same return conditions as
1328  *      blocking_notifier_chain_register.
1329  */
1330 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list)
1331 {
1332         int ret;
1333
1334         switch (list) {
1335         case CPUFREQ_TRANSITION_NOTIFIER:
1336                 ret = srcu_notifier_chain_register(
1337                                 &cpufreq_transition_notifier_list, nb);
1338                 break;
1339         case CPUFREQ_POLICY_NOTIFIER:
1340                 ret = blocking_notifier_chain_register(
1341                                 &cpufreq_policy_notifier_list, nb);
1342                 break;
1343         default:
1344                 ret = -EINVAL;
1345         }
1346
1347         return ret;
1348 }
1349 EXPORT_SYMBOL(cpufreq_register_notifier);
1350
1351
1352 /**
1353  *      cpufreq_unregister_notifier - unregister a driver with cpufreq
1354  *      @nb: notifier block to be unregistered
1355  *      @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER
1356  *
1357  *      Remove a driver from the CPU frequency notifier list.
1358  *
1359  *      This function may sleep, and has the same return conditions as
1360  *      blocking_notifier_chain_unregister.
1361  */
1362 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list)
1363 {
1364         int ret;
1365
1366         switch (list) {
1367         case CPUFREQ_TRANSITION_NOTIFIER:
1368                 ret = srcu_notifier_chain_unregister(
1369                                 &cpufreq_transition_notifier_list, nb);
1370                 break;
1371         case CPUFREQ_POLICY_NOTIFIER:
1372                 ret = blocking_notifier_chain_unregister(
1373                                 &cpufreq_policy_notifier_list, nb);
1374                 break;
1375         default:
1376                 ret = -EINVAL;
1377         }
1378
1379         return ret;
1380 }
1381 EXPORT_SYMBOL(cpufreq_unregister_notifier);
1382
1383
1384 /*********************************************************************
1385  *                              GOVERNORS                            *
1386  *********************************************************************/
1387
1388
1389 int __cpufreq_driver_target(struct cpufreq_policy *policy,
1390                             unsigned int target_freq,
1391                             unsigned int relation)
1392 {
1393         int retval = -EINVAL;
1394
1395         dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu,
1396                 target_freq, relation);
1397         if (cpu_online(policy->cpu) && cpufreq_driver->target)
1398                 retval = cpufreq_driver->target(policy, target_freq, relation);
1399
1400         return retval;
1401 }
1402 EXPORT_SYMBOL_GPL(__cpufreq_driver_target);
1403
1404 int cpufreq_driver_target(struct cpufreq_policy *policy,
1405                           unsigned int target_freq,
1406                           unsigned int relation)
1407 {
1408         int ret;
1409
1410         policy = cpufreq_cpu_get(policy->cpu);
1411         if (!policy)
1412                 return -EINVAL;
1413
1414         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1415                 return -EINVAL;
1416
1417         ret = __cpufreq_driver_target(policy, target_freq, relation);
1418
1419         unlock_policy_rwsem_write(policy->cpu);
1420
1421         cpufreq_cpu_put(policy);
1422         return ret;
1423 }
1424 EXPORT_SYMBOL_GPL(cpufreq_driver_target);
1425
1426 int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
1427 {
1428         int ret = 0;
1429
1430         policy = cpufreq_cpu_get(policy->cpu);
1431         if (!policy)
1432                 return -EINVAL;
1433
1434         if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
1435                 ret = cpufreq_driver->getavg(policy->cpu);
1436
1437         cpufreq_cpu_put(policy);
1438         return ret;
1439 }
1440 EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg);
1441
1442 /*
1443  * when "event" is CPUFREQ_GOV_LIMITS
1444  */
1445
1446 static int __cpufreq_governor(struct cpufreq_policy *policy,
1447                                         unsigned int event)
1448 {
1449         int ret;
1450
1451         if (!try_module_get(policy->governor->owner))
1452                 return -EINVAL;
1453
1454         dprintk("__cpufreq_governor for CPU %u, event %u\n",
1455                                                 policy->cpu, event);
1456         ret = policy->governor->governor(policy, event);
1457
1458         /* we keep one module reference alive for
1459                         each CPU governed by this CPU */
1460         if ((event != CPUFREQ_GOV_START) || ret)
1461                 module_put(policy->governor->owner);
1462         if ((event == CPUFREQ_GOV_STOP) && !ret)
1463                 module_put(policy->governor->owner);
1464
1465         return ret;
1466 }
1467
1468
1469 int cpufreq_register_governor(struct cpufreq_governor *governor)
1470 {
1471         int err;
1472
1473         if (!governor)
1474                 return -EINVAL;
1475
1476         mutex_lock(&cpufreq_governor_mutex);
1477
1478         err = -EBUSY;
1479         if (__find_governor(governor->name) == NULL) {
1480                 err = 0;
1481                 list_add(&governor->governor_list, &cpufreq_governor_list);
1482         }
1483
1484         mutex_unlock(&cpufreq_governor_mutex);
1485         return err;
1486 }
1487 EXPORT_SYMBOL_GPL(cpufreq_register_governor);
1488
1489
1490 void cpufreq_unregister_governor(struct cpufreq_governor *governor)
1491 {
1492         if (!governor)
1493                 return;
1494
1495         mutex_lock(&cpufreq_governor_mutex);
1496         list_del(&governor->governor_list);
1497         mutex_unlock(&cpufreq_governor_mutex);
1498         return;
1499 }
1500 EXPORT_SYMBOL_GPL(cpufreq_unregister_governor);
1501
1502
1503
1504 /*********************************************************************
1505  *                          POLICY INTERFACE                         *
1506  *********************************************************************/
1507
1508 /**
1509  * cpufreq_get_policy - get the current cpufreq_policy
1510  * @policy: struct cpufreq_policy into which the current cpufreq_policy is written
1511  *
1512  * Reads the current cpufreq policy.
1513  */
1514 int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu)
1515 {
1516         struct cpufreq_policy *cpu_policy;
1517         if (!policy)
1518                 return -EINVAL;
1519
1520         cpu_policy = cpufreq_cpu_get(cpu);
1521         if (!cpu_policy)
1522                 return -EINVAL;
1523
1524         memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy));
1525
1526         cpufreq_cpu_put(cpu_policy);
1527         return 0;
1528 }
1529 EXPORT_SYMBOL(cpufreq_get_policy);
1530
1531
1532 /*
1533  * data   : current policy.
1534  * policy : policy to be set.
1535  */
1536 static int __cpufreq_set_policy(struct cpufreq_policy *data,
1537                                 struct cpufreq_policy *policy)
1538 {
1539         int ret = 0;
1540
1541         cpufreq_debug_disable_ratelimit();
1542         dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu,
1543                 policy->min, policy->max);
1544
1545         memcpy(&policy->cpuinfo, &data->cpuinfo,
1546                                 sizeof(struct cpufreq_cpuinfo));
1547
1548         if (policy->min > data->min && policy->min > policy->max) {
1549                 ret = -EINVAL;
1550                 goto error_out;
1551         }
1552
1553         /* verify the cpu speed can be set within this limit */
1554         ret = cpufreq_driver->verify(policy);
1555         if (ret)
1556                 goto error_out;
1557
1558         /* adjust if necessary - all reasons */
1559         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1560                         CPUFREQ_ADJUST, policy);
1561
1562         /* adjust if necessary - hardware incompatibility*/
1563         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1564                         CPUFREQ_INCOMPATIBLE, policy);
1565
1566         /* verify the cpu speed can be set within this limit,
1567            which might be different to the first one */
1568         ret = cpufreq_driver->verify(policy);
1569         if (ret)
1570                 goto error_out;
1571
1572         /* notification of the new policy */
1573         blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
1574                         CPUFREQ_NOTIFY, policy);
1575
1576         data->min = policy->min;
1577         data->max = policy->max;
1578
1579         dprintk("new min and max freqs are %u - %u kHz\n",
1580                                         data->min, data->max);
1581
1582         if (cpufreq_driver->setpolicy) {
1583                 data->policy = policy->policy;
1584                 dprintk("setting range\n");
1585                 ret = cpufreq_driver->setpolicy(policy);
1586         } else {
1587                 if (policy->governor != data->governor) {
1588                         /* save old, working values */
1589                         struct cpufreq_governor *old_gov = data->governor;
1590
1591                         dprintk("governor switch\n");
1592
1593                         /* end old governor */
1594                         if (data->governor)
1595                                 __cpufreq_governor(data, CPUFREQ_GOV_STOP);
1596
1597                         /* start new governor */
1598                         data->governor = policy->governor;
1599                         if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
1600                                 /* new governor failed, so re-start old one */
1601                                 dprintk("starting governor %s failed\n",
1602                                                         data->governor->name);
1603                                 if (old_gov) {
1604                                         data->governor = old_gov;
1605                                         __cpufreq_governor(data,
1606                                                            CPUFREQ_GOV_START);
1607                                 }
1608                                 ret = -EINVAL;
1609                                 goto error_out;
1610                         }
1611                         /* might be a policy change, too, so fall through */
1612                 }
1613                 dprintk("governor: change or update limits\n");
1614                 __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
1615         }
1616
1617 error_out:
1618         cpufreq_debug_enable_ratelimit();
1619         return ret;
1620 }
1621
1622 /**
1623  *      cpufreq_set_policy - set a new CPUFreq policy
1624  *      @policy: policy to be set.
1625  *
1626  *      Sets a new CPU frequency and voltage scaling policy.
1627  */
1628 int cpufreq_set_policy(struct cpufreq_policy *policy)
1629 {
1630         int ret = 0;
1631         struct cpufreq_policy *data;
1632
1633         if (!policy)
1634                 return -EINVAL;
1635
1636         data = cpufreq_cpu_get(policy->cpu);
1637         if (!data)
1638                 return -EINVAL;
1639
1640         if (unlikely(lock_policy_rwsem_write(policy->cpu)))
1641                 return -EINVAL;
1642
1643
1644         ret = __cpufreq_set_policy(data, policy);
1645         data->user_policy.min = data->min;
1646         data->user_policy.max = data->max;
1647         data->user_policy.policy = data->policy;
1648         data->user_policy.governor = data->governor;
1649
1650         unlock_policy_rwsem_write(policy->cpu);
1651
1652         cpufreq_cpu_put(data);
1653
1654         return ret;
1655 }
1656 EXPORT_SYMBOL(cpufreq_set_policy);
1657
1658
1659 /**
1660  *      cpufreq_update_policy - re-evaluate an existing cpufreq policy
1661  *      @cpu: CPU which shall be re-evaluated
1662  *
1663  *      Usefull for policy notifiers which have different necessities
1664  *      at different times.
1665  */
1666 int cpufreq_update_policy(unsigned int cpu)
1667 {
1668         struct cpufreq_policy *data = cpufreq_cpu_get(cpu);
1669         struct cpufreq_policy policy;
1670         int ret = 0;
1671
1672         if (!data)
1673                 return -ENODEV;
1674
1675         if (unlikely(lock_policy_rwsem_write(cpu)))
1676                 return -EINVAL;
1677
1678         dprintk("updating policy for CPU %u\n", cpu);
1679         memcpy(&policy, data, sizeof(struct cpufreq_policy));
1680         policy.min = data->user_policy.min;
1681         policy.max = data->user_policy.max;
1682         policy.policy = data->user_policy.policy;
1683         policy.governor = data->user_policy.governor;
1684
1685         /* BIOS might change freq behind our back
1686           -> ask driver for current freq and notify governors about a change */
1687         if (cpufreq_driver->get) {
1688                 policy.cur = cpufreq_driver->get(cpu);
1689                 if (!data->cur) {
1690                         dprintk("Driver did not initialize current freq");
1691                         data->cur = policy.cur;
1692                 } else {
1693                         if (data->cur != policy.cur)
1694                                 cpufreq_out_of_sync(cpu, data->cur,
1695                                                                 policy.cur);
1696                 }
1697         }
1698
1699         ret = __cpufreq_set_policy(data, &policy);
1700
1701         unlock_policy_rwsem_write(cpu);
1702
1703         cpufreq_cpu_put(data);
1704         return ret;
1705 }
1706 EXPORT_SYMBOL(cpufreq_update_policy);
1707
1708 static int cpufreq_cpu_callback(struct notifier_block *nfb,
1709                                         unsigned long action, void *hcpu)
1710 {
1711         unsigned int cpu = (unsigned long)hcpu;
1712         struct sys_device *sys_dev;
1713         struct cpufreq_policy *policy;
1714
1715         sys_dev = get_cpu_sysdev(cpu);
1716         if (sys_dev) {
1717                 switch (action) {
1718                 case CPU_ONLINE:
1719                         cpufreq_add_dev(sys_dev);
1720                         break;
1721                 case CPU_DOWN_PREPARE:
1722                         if (unlikely(lock_policy_rwsem_write(cpu)))
1723                                 BUG();
1724
1725                         policy = cpufreq_cpu_data[cpu];
1726                         if (policy) {
1727                                 __cpufreq_driver_target(policy, policy->min,
1728                                                 CPUFREQ_RELATION_H);
1729                         }
1730                         __cpufreq_remove_dev(sys_dev);
1731                         break;
1732                 case CPU_DOWN_FAILED:
1733                         cpufreq_add_dev(sys_dev);
1734                         break;
1735                 }
1736         }
1737         return NOTIFY_OK;
1738 }
1739
1740 static struct notifier_block __cpuinitdata cpufreq_cpu_notifier =
1741 {
1742     .notifier_call = cpufreq_cpu_callback,
1743 };
1744
1745 /*********************************************************************
1746  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
1747  *********************************************************************/
1748
1749 /**
1750  * cpufreq_register_driver - register a CPU Frequency driver
1751  * @driver_data: A struct cpufreq_driver containing the values#
1752  * submitted by the CPU Frequency driver.
1753  *
1754  *   Registers a CPU Frequency driver to this core code. This code
1755  * returns zero on success, -EBUSY when another driver got here first
1756  * (and isn't unregistered in the meantime).
1757  *
1758  */
1759 int cpufreq_register_driver(struct cpufreq_driver *driver_data)
1760 {
1761         unsigned long flags;
1762         int ret;
1763
1764         if (!driver_data || !driver_data->verify || !driver_data->init ||
1765             ((!driver_data->setpolicy) && (!driver_data->target)))
1766                 return -EINVAL;
1767
1768         dprintk("trying to register driver %s\n", driver_data->name);
1769
1770         if (driver_data->setpolicy)
1771                 driver_data->flags |= CPUFREQ_CONST_LOOPS;
1772
1773         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1774         if (cpufreq_driver) {
1775                 spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1776                 return -EBUSY;
1777         }
1778         cpufreq_driver = driver_data;
1779         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1780
1781         ret = sysdev_driver_register(&cpu_sysdev_class,&cpufreq_sysdev_driver);
1782
1783         if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) {
1784                 int i;
1785                 ret = -ENODEV;
1786
1787                 /* check for at least one working CPU */
1788                 for (i=0; i<NR_CPUS; i++)
1789                         if (cpufreq_cpu_data[i])
1790                                 ret = 0;
1791
1792                 /* if all ->init() calls failed, unregister */
1793                 if (ret) {
1794                         dprintk("no CPU initialized for driver %s\n",
1795                                                         driver_data->name);
1796                         sysdev_driver_unregister(&cpu_sysdev_class,
1797                                                 &cpufreq_sysdev_driver);
1798
1799                         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1800                         cpufreq_driver = NULL;
1801                         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1802                 }
1803         }
1804
1805         if (!ret) {
1806                 register_hotcpu_notifier(&cpufreq_cpu_notifier);
1807                 dprintk("driver %s up and running\n", driver_data->name);
1808                 cpufreq_debug_enable_ratelimit();
1809         }
1810
1811         return (ret);
1812 }
1813 EXPORT_SYMBOL_GPL(cpufreq_register_driver);
1814
1815
1816 /**
1817  * cpufreq_unregister_driver - unregister the current CPUFreq driver
1818  *
1819  *    Unregister the current CPUFreq driver. Only call this if you have
1820  * the right to do so, i.e. if you have succeeded in initialising before!
1821  * Returns zero if successful, and -EINVAL if the cpufreq_driver is
1822  * currently not initialised.
1823  */
1824 int cpufreq_unregister_driver(struct cpufreq_driver *driver)
1825 {
1826         unsigned long flags;
1827
1828         cpufreq_debug_disable_ratelimit();
1829
1830         if (!cpufreq_driver || (driver != cpufreq_driver)) {
1831                 cpufreq_debug_enable_ratelimit();
1832                 return -EINVAL;
1833         }
1834
1835         dprintk("unregistering driver %s\n", driver->name);
1836
1837         sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver);
1838         unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
1839
1840         spin_lock_irqsave(&cpufreq_driver_lock, flags);
1841         cpufreq_driver = NULL;
1842         spin_unlock_irqrestore(&cpufreq_driver_lock, flags);
1843
1844         return 0;
1845 }
1846 EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
1847
1848 static int __init cpufreq_core_init(void)
1849 {
1850         int cpu;
1851
1852         for_each_possible_cpu(cpu) {
1853                 per_cpu(policy_cpu, cpu) = -1;
1854                 init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
1855         }
1856         return 0;
1857 }
1858
1859 core_initcall(cpufreq_core_init);