Merge /spare/repo/linux-2.6/
[linux-2.6] / arch / ppc64 / kernel / idle.c
1 /*
2  * Idle daemon for PowerPC.  Idle daemon will handle any action
3  * that needs to be taken when the system becomes idle.
4  *
5  * Originally Written by Cort Dougan (cort@cs.nmt.edu)
6  *
7  * iSeries supported added by Mike Corrigan <mikejc@us.ibm.com>
8  *
9  * Additional shared processor, SMT, and firmware support
10  *    Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version
15  * 2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <linux/sched.h>
20 #include <linux/kernel.h>
21 #include <linux/smp.h>
22 #include <linux/cpu.h>
23 #include <linux/module.h>
24 #include <linux/sysctl.h>
25 #include <linux/smp.h>
26
27 #include <asm/system.h>
28 #include <asm/processor.h>
29 #include <asm/mmu.h>
30 #include <asm/cputable.h>
31 #include <asm/time.h>
32 #include <asm/iSeries/HvCall.h>
33 #include <asm/iSeries/ItLpQueue.h>
34 #include <asm/plpar_wrappers.h>
35 #include <asm/systemcfg.h>
36
37 extern void power4_idle(void);
38
39 static int (*idle_loop)(void);
40
41 #ifdef CONFIG_PPC_ISERIES
42 static unsigned long maxYieldTime = 0;
43 static unsigned long minYieldTime = 0xffffffffffffffffUL;
44
45 static inline void process_iSeries_events(void)
46 {
47         asm volatile ("li 0,0x5555; sc" : : : "r0", "r3");
48 }
49
50 static void yield_shared_processor(void)
51 {
52         unsigned long tb;
53         unsigned long yieldTime;
54
55         HvCall_setEnabledInterrupts(HvCall_MaskIPI |
56                                     HvCall_MaskLpEvent |
57                                     HvCall_MaskLpProd |
58                                     HvCall_MaskTimeout);
59
60         tb = get_tb();
61         /* Compute future tb value when yield should expire */
62         HvCall_yieldProcessor(HvCall_YieldTimed, tb+tb_ticks_per_jiffy);
63
64         yieldTime = get_tb() - tb;
65         if (yieldTime > maxYieldTime)
66                 maxYieldTime = yieldTime;
67
68         if (yieldTime < minYieldTime)
69                 minYieldTime = yieldTime;
70         
71         /*
72          * The decrementer stops during the yield.  Force a fake decrementer
73          * here and let the timer_interrupt code sort out the actual time.
74          */
75         get_paca()->lppaca.int_dword.fields.decr_int = 1;
76         process_iSeries_events();
77 }
78
79 static int iSeries_idle(void)
80 {
81         struct paca_struct *lpaca;
82         long oldval;
83
84         /* ensure iSeries run light will be out when idle */
85         ppc64_runlatch_off();
86
87         lpaca = get_paca();
88
89         while (1) {
90                 if (lpaca->lppaca.shared_proc) {
91                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
92                                 process_iSeries_events();
93                         if (!need_resched())
94                                 yield_shared_processor();
95                 } else {
96                         oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
97
98                         if (!oldval) {
99                                 set_thread_flag(TIF_POLLING_NRFLAG);
100
101                                 while (!need_resched()) {
102                                         HMT_medium();
103                                         if (ItLpQueue_isLpIntPending(lpaca->lpqueue_ptr))
104                                                 process_iSeries_events();
105                                         HMT_low();
106                                 }
107
108                                 HMT_medium();
109                                 clear_thread_flag(TIF_POLLING_NRFLAG);
110                         } else {
111                                 set_need_resched();
112                         }
113                 }
114
115                 ppc64_runlatch_on();
116                 schedule();
117                 ppc64_runlatch_off();
118         }
119
120         return 0;
121 }
122
123 #else
124
125 static int default_idle(void)
126 {
127         long oldval;
128         unsigned int cpu = smp_processor_id();
129
130         while (1) {
131                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
132
133                 if (!oldval) {
134                         set_thread_flag(TIF_POLLING_NRFLAG);
135
136                         while (!need_resched() && !cpu_is_offline(cpu)) {
137                                 barrier();
138                                 /*
139                                  * Go into low thread priority and possibly
140                                  * low power mode.
141                                  */
142                                 HMT_low();
143                                 HMT_very_low();
144                         }
145
146                         HMT_medium();
147                         clear_thread_flag(TIF_POLLING_NRFLAG);
148                 } else {
149                         set_need_resched();
150                 }
151
152                 schedule();
153                 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
154                         cpu_die();
155         }
156
157         return 0;
158 }
159
160 #ifdef CONFIG_PPC_PSERIES
161
162 DECLARE_PER_CPU(unsigned long, smt_snooze_delay);
163
164 int dedicated_idle(void)
165 {
166         long oldval;
167         struct paca_struct *lpaca = get_paca(), *ppaca;
168         unsigned long start_snooze;
169         unsigned long *smt_snooze_delay = &__get_cpu_var(smt_snooze_delay);
170         unsigned int cpu = smp_processor_id();
171
172         ppaca = &paca[cpu ^ 1];
173
174         while (1) {
175                 /*
176                  * Indicate to the HV that we are idle. Now would be
177                  * a good time to find other work to dispatch.
178                  */
179                 lpaca->lppaca.idle = 1;
180
181                 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
182                 if (!oldval) {
183                         set_thread_flag(TIF_POLLING_NRFLAG);
184                         start_snooze = __get_tb() +
185                                 *smt_snooze_delay * tb_ticks_per_usec;
186                         while (!need_resched() && !cpu_is_offline(cpu)) {
187                                 /*
188                                  * Go into low thread priority and possibly
189                                  * low power mode.
190                                  */
191                                 HMT_low();
192                                 HMT_very_low();
193
194                                 if (*smt_snooze_delay == 0 ||
195                                     __get_tb() < start_snooze)
196                                         continue;
197
198                                 HMT_medium();
199
200                                 if (!(ppaca->lppaca.idle)) {
201                                         local_irq_disable();
202
203                                         /*
204                                          * We are about to sleep the thread
205                                          * and so wont be polling any
206                                          * more.
207                                          */
208                                         clear_thread_flag(TIF_POLLING_NRFLAG);
209
210                                         /*
211                                          * SMT dynamic mode. Cede will result
212                                          * in this thread going dormant, if the
213                                          * partner thread is still doing work.
214                                          * Thread wakes up if partner goes idle,
215                                          * an interrupt is presented, or a prod
216                                          * occurs.  Returning from the cede
217                                          * enables external interrupts.
218                                          */
219                                         if (!need_resched())
220                                                 cede_processor();
221                                         else
222                                                 local_irq_enable();
223                                 } else {
224                                         /*
225                                          * Give the HV an opportunity at the
226                                          * processor, since we are not doing
227                                          * any work.
228                                          */
229                                         poll_pending();
230                                 }
231                         }
232
233                         clear_thread_flag(TIF_POLLING_NRFLAG);
234                 } else {
235                         set_need_resched();
236                 }
237
238                 HMT_medium();
239                 lpaca->lppaca.idle = 0;
240                 schedule();
241                 if (cpu_is_offline(cpu) && system_state == SYSTEM_RUNNING)
242                         cpu_die();
243         }
244         return 0;
245 }
246
247 static int shared_idle(void)
248 {
249         struct paca_struct *lpaca = get_paca();
250         unsigned int cpu = smp_processor_id();
251
252         while (1) {
253                 /*
254                  * Indicate to the HV that we are idle. Now would be
255                  * a good time to find other work to dispatch.
256                  */
257                 lpaca->lppaca.idle = 1;
258
259                 while (!need_resched() && !cpu_is_offline(cpu)) {
260                         local_irq_disable();
261
262                         /*
263                          * Yield the processor to the hypervisor.  We return if
264                          * an external interrupt occurs (which are driven prior
265                          * to returning here) or if a prod occurs from another 
266                          * processor. When returning here, external interrupts
267                          * are enabled.
268                          *
269                          * Check need_resched() again with interrupts disabled
270                          * to avoid a race.
271                          */
272                         if (!need_resched())
273                                 cede_processor();
274                         else
275                                 local_irq_enable();
276                 }
277
278                 HMT_medium();
279                 lpaca->lppaca.idle = 0;
280                 schedule();
281                 if (cpu_is_offline(smp_processor_id()) &&
282                     system_state == SYSTEM_RUNNING)
283                         cpu_die();
284         }
285
286         return 0;
287 }
288
289 #endif /* CONFIG_PPC_PSERIES */
290
291 static int native_idle(void)
292 {
293         while(1) {
294                 /* check CPU type here */
295                 if (!need_resched())
296                         power4_idle();
297                 if (need_resched())
298                         schedule();
299
300                 if (cpu_is_offline(raw_smp_processor_id()) &&
301                     system_state == SYSTEM_RUNNING)
302                         cpu_die();
303         }
304         return 0;
305 }
306
307 #endif /* CONFIG_PPC_ISERIES */
308
309 void cpu_idle(void)
310 {
311         idle_loop();
312 }
313
314 int powersave_nap;
315
316 #ifdef CONFIG_SYSCTL
317 /*
318  * Register the sysctl to set/clear powersave_nap.
319  */
320 static ctl_table powersave_nap_ctl_table[]={
321         {
322                 .ctl_name       = KERN_PPC_POWERSAVE_NAP,
323                 .procname       = "powersave-nap",
324                 .data           = &powersave_nap,
325                 .maxlen         = sizeof(int),
326                 .mode           = 0644,
327                 .proc_handler   = &proc_dointvec,
328         },
329         { 0, },
330 };
331 static ctl_table powersave_nap_sysctl_root[] = {
332         { 1, "kernel", NULL, 0, 0755, powersave_nap_ctl_table, },
333         { 0,},
334 };
335
336 static int __init
337 register_powersave_nap_sysctl(void)
338 {
339         register_sysctl_table(powersave_nap_sysctl_root, 0);
340
341         return 0;
342 }
343 __initcall(register_powersave_nap_sysctl);
344 #endif
345
346 int idle_setup(void)
347 {
348         /*
349          * Move that junk to each platform specific file, eventually define
350          * a pSeries_idle for shared processor stuff
351          */
352 #ifdef CONFIG_PPC_ISERIES
353         idle_loop = iSeries_idle;
354         return 1;
355 #else
356         idle_loop = default_idle;
357 #endif
358 #ifdef CONFIG_PPC_PSERIES
359         if (systemcfg->platform & PLATFORM_PSERIES) {
360                 if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) {
361                         if (get_paca()->lppaca.shared_proc) {
362                                 printk(KERN_INFO "Using shared processor idle loop\n");
363                                 idle_loop = shared_idle;
364                         } else {
365                                 printk(KERN_INFO "Using dedicated idle loop\n");
366                                 idle_loop = dedicated_idle;
367                         }
368                 } else {
369                         printk(KERN_INFO "Using default idle loop\n");
370                         idle_loop = default_idle;
371                 }
372         }
373 #endif /* CONFIG_PPC_PSERIES */
374 #ifndef CONFIG_PPC_ISERIES
375         if (systemcfg->platform == PLATFORM_POWERMAC ||
376             systemcfg->platform == PLATFORM_MAPLE) {
377                 printk(KERN_INFO "Using native/NAP idle loop\n");
378                 idle_loop = native_idle;
379         }
380 #endif /* CONFIG_PPC_ISERIES */
381
382         return 1;
383 }