Merge branch 'master' of git://oak/home/sfr/kernels/iseries/work
[linux-2.6] / drivers / sbus / char / bbc_envctrl.c
1 /* $Id: bbc_envctrl.c,v 1.4 2001/04/06 16:48:08 davem Exp $
2  * bbc_envctrl.c: UltraSPARC-III environment control driver.
3  *
4  * Copyright (C) 2001 David S. Miller (davem@redhat.com)
5  */
6
7 #include <linux/kernel.h>
8 #include <linux/kthread.h>
9 #include <linux/sched.h>
10 #include <linux/slab.h>
11 #include <linux/delay.h>
12 #include <asm/oplib.h>
13 #include <asm/ebus.h>
14
15 #include "bbc_i2c.h"
16 #include "max1617.h"
17
18 #undef ENVCTRL_TRACE
19
20 /* WARNING: Making changes to this driver is very dangerous.
21  *          If you misprogram the sensor chips they can
22  *          cut the power on you instantly.
23  */
24
25 /* Two temperature sensors exist in the SunBLADE-1000 enclosure.
26  * Both are implemented using max1617 i2c devices.  Each max1617
27  * monitors 2 temperatures, one for one of the cpu dies and the other
28  * for the ambient temperature.
29  *
30  * The max1617 is capable of being programmed with power-off
31  * temperature values, one low limit and one high limit.  These
32  * can be controlled independently for the cpu or ambient temperature.
33  * If a limit is violated, the power is simply shut off.  The frequency
34  * with which the max1617 does temperature sampling can be controlled
35  * as well.
36  *
37  * Three fans exist inside the machine, all three are controlled with
38  * an i2c digital to analog converter.  There is a fan directed at the
39  * two processor slots, another for the rest of the enclosure, and the
40  * third is for the power supply.  The first two fans may be speed
41  * controlled by changing the voltage fed to them.  The third fan may
42  * only be completely off or on.  The third fan is meant to only be
43  * disabled/enabled when entering/exiting the lowest power-saving
44  * mode of the machine.
45  *
46  * An environmental control kernel thread periodically monitors all
47  * temperature sensors.  Based upon the samples it will adjust the
48  * fan speeds to try and keep the system within a certain temperature
49  * range (the goal being to make the fans as quiet as possible without
50  * allowing the system to get too hot).
51  *
52  * If the temperature begins to rise/fall outside of the acceptable
53  * operating range, a periodic warning will be sent to the kernel log.
54  * The fans will be put on full blast to attempt to deal with this
55  * situation.  After exceeding the acceptable operating range by a
56  * certain threshold, the kernel thread will shut down the system.
57  * Here, the thread is attempting to shut the machine down cleanly
58  * before the hardware based power-off event is triggered.
59  */
60
61 /* These settings are in Celsius.  We use these defaults only
62  * if we cannot interrogate the cpu-fru SEEPROM.
63  */
64 struct temp_limits {
65         s8 high_pwroff, high_shutdown, high_warn;
66         s8 low_warn, low_shutdown, low_pwroff;
67 };
68
69 static struct temp_limits cpu_temp_limits[2] = {
70         { 100, 85, 80, 5, -5, -10 },
71         { 100, 85, 80, 5, -5, -10 },
72 };
73
74 static struct temp_limits amb_temp_limits[2] = {
75         { 65, 55, 40, 5, -5, -10 },
76         { 65, 55, 40, 5, -5, -10 },
77 };
78
79 enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX };
80
81 struct bbc_cpu_temperature {
82         struct bbc_cpu_temperature      *next;
83
84         struct bbc_i2c_client           *client;
85         int                             index;
86
87         /* Current readings, and history. */
88         s8                              curr_cpu_temp;
89         s8                              curr_amb_temp;
90         s8                              prev_cpu_temp;
91         s8                              prev_amb_temp;
92         s8                              avg_cpu_temp;
93         s8                              avg_amb_temp;
94
95         int                             sample_tick;
96
97         enum fan_action                 fan_todo[2];
98 #define FAN_AMBIENT     0
99 #define FAN_CPU         1
100 };
101
102 struct bbc_cpu_temperature *all_bbc_temps;
103
104 struct bbc_fan_control {
105         struct bbc_fan_control  *next;
106
107         struct bbc_i2c_client   *client;
108         int                     index;
109
110         int                     psupply_fan_on;
111         int                     cpu_fan_speed;
112         int                     system_fan_speed;
113 };
114
115 struct bbc_fan_control *all_bbc_fans;
116
117 #define CPU_FAN_REG     0xf0
118 #define SYS_FAN_REG     0xf2
119 #define PSUPPLY_FAN_REG 0xf4
120
121 #define FAN_SPEED_MIN   0x0c
122 #define FAN_SPEED_MAX   0x3f
123
124 #define PSUPPLY_FAN_ON  0x1f
125 #define PSUPPLY_FAN_OFF 0x00
126
127 static void set_fan_speeds(struct bbc_fan_control *fp)
128 {
129         /* Put temperatures into range so we don't mis-program
130          * the hardware.
131          */
132         if (fp->cpu_fan_speed < FAN_SPEED_MIN)
133                 fp->cpu_fan_speed = FAN_SPEED_MIN;
134         if (fp->cpu_fan_speed > FAN_SPEED_MAX)
135                 fp->cpu_fan_speed = FAN_SPEED_MAX;
136         if (fp->system_fan_speed < FAN_SPEED_MIN)
137                 fp->system_fan_speed = FAN_SPEED_MIN;
138         if (fp->system_fan_speed > FAN_SPEED_MAX)
139                 fp->system_fan_speed = FAN_SPEED_MAX;
140 #ifdef ENVCTRL_TRACE
141         printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
142                fp->index,
143                fp->cpu_fan_speed, fp->system_fan_speed);
144 #endif
145
146         bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG);
147         bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG);
148         bbc_i2c_writeb(fp->client,
149                        (fp->psupply_fan_on ?
150                         PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF),
151                        PSUPPLY_FAN_REG);
152 }
153
154 static void get_current_temps(struct bbc_cpu_temperature *tp)
155 {
156         tp->prev_amb_temp = tp->curr_amb_temp;
157         bbc_i2c_readb(tp->client,
158                       (unsigned char *) &tp->curr_amb_temp,
159                       MAX1617_AMB_TEMP);
160         tp->prev_cpu_temp = tp->curr_cpu_temp;
161         bbc_i2c_readb(tp->client,
162                       (unsigned char *) &tp->curr_cpu_temp,
163                       MAX1617_CPU_TEMP);
164 #ifdef ENVCTRL_TRACE
165         printk("temp%d: cpu(%d C) amb(%d C)\n",
166                tp->index,
167                (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
168 #endif
169 }
170
171
172 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
173 {
174         static int shutting_down = 0;
175         static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
176         char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
177         char *type = "???";
178         s8 val = -1;
179
180         if (shutting_down != 0)
181                 return;
182
183         if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
184             tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
185                 type = "ambient";
186                 val = tp->curr_amb_temp;
187         } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
188                    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
189                 type = "CPU";
190                 val = tp->curr_cpu_temp;
191         }
192
193         printk(KERN_CRIT "temp%d: Outside of safe %s "
194                "operating temperature, %d C.\n",
195                tp->index, type, val);
196
197         printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
198
199         shutting_down = 1;
200         if (kernel_execve("/sbin/shutdown", argv, envp) < 0)
201                 printk(KERN_CRIT "envctrl: shutdown execution failed\n");
202 }
203
204 #define WARN_INTERVAL   (30 * HZ)
205
206 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
207 {
208         int ret = 0;
209
210         if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
211                 if (tp->curr_amb_temp >=
212                     amb_temp_limits[tp->index].high_warn) {
213                         printk(KERN_WARNING "temp%d: "
214                                "Above safe ambient operating temperature, %d C.\n",
215                                tp->index, (int) tp->curr_amb_temp);
216                         ret = 1;
217                 } else if (tp->curr_amb_temp <
218                            amb_temp_limits[tp->index].low_warn) {
219                         printk(KERN_WARNING "temp%d: "
220                                "Below safe ambient operating temperature, %d C.\n",
221                                tp->index, (int) tp->curr_amb_temp);
222                         ret = 1;
223                 }
224                 if (ret)
225                         *last_warn = jiffies;
226         } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
227                    tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
228                 ret = 1;
229
230         /* Now check the shutdown limits. */
231         if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
232             tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
233                 do_envctrl_shutdown(tp);
234                 ret = 1;
235         }
236
237         if (ret) {
238                 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST;
239         } else if ((tick & (8 - 1)) == 0) {
240                 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
241                 s8 amb_goal_lo;
242
243                 amb_goal_lo = amb_goal_hi - 3;
244
245                 /* We do not try to avoid 'too cold' events.  Basically we
246                  * only try to deal with over-heating and fan noise reduction.
247                  */
248                 if (tp->avg_amb_temp < amb_goal_hi) {
249                         if (tp->avg_amb_temp >= amb_goal_lo)
250                                 tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
251                         else
252                                 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER;
253                 } else {
254                         tp->fan_todo[FAN_AMBIENT] = FAN_FASTER;
255                 }
256         } else {
257                 tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
258         }
259 }
260
261 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
262 {
263         int ret = 0;
264
265         if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
266                 if (tp->curr_cpu_temp >=
267                     cpu_temp_limits[tp->index].high_warn) {
268                         printk(KERN_WARNING "temp%d: "
269                                "Above safe CPU operating temperature, %d C.\n",
270                                tp->index, (int) tp->curr_cpu_temp);
271                         ret = 1;
272                 } else if (tp->curr_cpu_temp <
273                            cpu_temp_limits[tp->index].low_warn) {
274                         printk(KERN_WARNING "temp%d: "
275                                "Below safe CPU operating temperature, %d C.\n",
276                                tp->index, (int) tp->curr_cpu_temp);
277                         ret = 1;
278                 }
279                 if (ret)
280                         *last_warn = jiffies;
281         } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
282                    tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
283                 ret = 1;
284
285         /* Now check the shutdown limits. */
286         if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
287             tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
288                 do_envctrl_shutdown(tp);
289                 ret = 1;
290         }
291
292         if (ret) {
293                 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST;
294         } else if ((tick & (8 - 1)) == 0) {
295                 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
296                 s8 cpu_goal_lo;
297
298                 cpu_goal_lo = cpu_goal_hi - 3;
299
300                 /* We do not try to avoid 'too cold' events.  Basically we
301                  * only try to deal with over-heating and fan noise reduction.
302                  */
303                 if (tp->avg_cpu_temp < cpu_goal_hi) {
304                         if (tp->avg_cpu_temp >= cpu_goal_lo)
305                                 tp->fan_todo[FAN_CPU] = FAN_SAME;
306                         else
307                                 tp->fan_todo[FAN_CPU] = FAN_SLOWER;
308                 } else {
309                         tp->fan_todo[FAN_CPU] = FAN_FASTER;
310                 }
311         } else {
312                 tp->fan_todo[FAN_CPU] = FAN_SAME;
313         }
314 }
315
316 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
317 {
318         tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
319         tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
320
321         analyze_ambient_temp(tp, last_warn, tp->sample_tick);
322         analyze_cpu_temp(tp, last_warn, tp->sample_tick);
323
324         tp->sample_tick++;
325 }
326
327 static enum fan_action prioritize_fan_action(int which_fan)
328 {
329         struct bbc_cpu_temperature *tp;
330         enum fan_action decision = FAN_STATE_MAX;
331
332         /* Basically, prioritize what the temperature sensors
333          * recommend we do, and perform that action on all the
334          * fans.
335          */
336         for (tp = all_bbc_temps; tp; tp = tp->next) {
337                 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
338                         decision = FAN_FULLBLAST;
339                         break;
340                 }
341                 if (tp->fan_todo[which_fan] == FAN_SAME &&
342                     decision != FAN_FASTER)
343                         decision = FAN_SAME;
344                 else if (tp->fan_todo[which_fan] == FAN_FASTER)
345                         decision = FAN_FASTER;
346                 else if (decision != FAN_FASTER &&
347                          decision != FAN_SAME &&
348                          tp->fan_todo[which_fan] == FAN_SLOWER)
349                         decision = FAN_SLOWER;
350         }
351         if (decision == FAN_STATE_MAX)
352                 decision = FAN_SAME;
353
354         return decision;
355 }
356
357 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
358 {
359         enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
360         int ret;
361
362         if (decision == FAN_SAME)
363                 return 0;
364
365         ret = 1;
366         if (decision == FAN_FULLBLAST) {
367                 if (fp->system_fan_speed >= FAN_SPEED_MAX)
368                         ret = 0;
369                 else
370                         fp->system_fan_speed = FAN_SPEED_MAX;
371         } else {
372                 if (decision == FAN_FASTER) {
373                         if (fp->system_fan_speed >= FAN_SPEED_MAX)
374                                 ret = 0;
375                         else
376                                 fp->system_fan_speed += 2;
377                 } else {
378                         int orig_speed = fp->system_fan_speed;
379
380                         if (orig_speed <= FAN_SPEED_MIN ||
381                             orig_speed <= (fp->cpu_fan_speed - 3))
382                                 ret = 0;
383                         else
384                                 fp->system_fan_speed -= 1;
385                 }
386         }
387
388         return ret;
389 }
390
391 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
392 {
393         enum fan_action decision = prioritize_fan_action(FAN_CPU);
394         int ret;
395
396         if (decision == FAN_SAME)
397                 return 0;
398
399         ret = 1;
400         if (decision == FAN_FULLBLAST) {
401                 if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
402                         ret = 0;
403                 else
404                         fp->cpu_fan_speed = FAN_SPEED_MAX;
405         } else {
406                 if (decision == FAN_FASTER) {
407                         if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
408                                 ret = 0;
409                         else {
410                                 fp->cpu_fan_speed += 2;
411                                 if (fp->system_fan_speed <
412                                     (fp->cpu_fan_speed - 3))
413                                         fp->system_fan_speed =
414                                                 fp->cpu_fan_speed - 3;
415                         }
416                 } else {
417                         if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
418                                 ret = 0;
419                         else
420                                 fp->cpu_fan_speed -= 1;
421                 }
422         }
423
424         return ret;
425 }
426
427 static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
428 {
429         int new;
430
431         new  = maybe_new_ambient_fan_speed(fp);
432         new |= maybe_new_cpu_fan_speed(fp);
433
434         if (new)
435                 set_fan_speeds(fp);
436 }
437
438 static void fans_full_blast(void)
439 {
440         struct bbc_fan_control *fp;
441
442         /* Since we will not be monitoring things anymore, put
443          * the fans on full blast.
444          */
445         for (fp = all_bbc_fans; fp; fp = fp->next) {
446                 fp->cpu_fan_speed = FAN_SPEED_MAX;
447                 fp->system_fan_speed = FAN_SPEED_MAX;
448                 fp->psupply_fan_on = 1;
449                 set_fan_speeds(fp);
450         }
451 }
452
453 #define POLL_INTERVAL   (5 * 1000)
454 static unsigned long last_warning_jiffies;
455 static struct task_struct *kenvctrld_task;
456
457 static int kenvctrld(void *__unused)
458 {
459         printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
460         last_warning_jiffies = jiffies - WARN_INTERVAL;
461         for (;;) {
462                 struct bbc_cpu_temperature *tp;
463                 struct bbc_fan_control *fp;
464
465                 msleep_interruptible(POLL_INTERVAL);
466                 if (kthread_should_stop())
467                         break;
468
469                 for (tp = all_bbc_temps; tp; tp = tp->next) {
470                         get_current_temps(tp);
471                         analyze_temps(tp, &last_warning_jiffies);
472                 }
473                 for (fp = all_bbc_fans; fp; fp = fp->next)
474                         maybe_new_fan_speeds(fp);
475         }
476         printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
477
478         fans_full_blast();
479
480         return 0;
481 }
482
483 static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx)
484 {
485         struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL);
486
487         if (!tp)
488                 return;
489         memset(tp, 0, sizeof(*tp));
490         tp->client = bbc_i2c_attach(echild);
491         if (!tp->client) {
492                 kfree(tp);
493                 return;
494         }
495
496         tp->index = temp_idx;
497         {
498                 struct bbc_cpu_temperature **tpp = &all_bbc_temps;
499                 while (*tpp)
500                         tpp = &((*tpp)->next);
501                 tp->next = NULL;
502                 *tpp = tp;
503         }
504
505         /* Tell it to convert once every 5 seconds, clear all cfg
506          * bits.
507          */
508         bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE);
509         bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE);
510
511         /* Program the hard temperature limits into the chip. */
512         bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
513                        MAX1617_WR_AMB_HIGHLIM);
514         bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
515                        MAX1617_WR_AMB_LOWLIM);
516         bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
517                        MAX1617_WR_CPU_HIGHLIM);
518         bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
519                        MAX1617_WR_CPU_LOWLIM);
520
521         get_current_temps(tp);
522         tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
523         tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
524
525         tp->fan_todo[FAN_AMBIENT] = FAN_SAME;
526         tp->fan_todo[FAN_CPU] = FAN_SAME;
527 }
528
529 static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx)
530 {
531         struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL);
532
533         if (!fp)
534                 return;
535         memset(fp, 0, sizeof(*fp));
536         fp->client = bbc_i2c_attach(echild);
537         if (!fp->client) {
538                 kfree(fp);
539                 return;
540         }
541
542         fp->index = fan_idx;
543
544         {
545                 struct bbc_fan_control **fpp = &all_bbc_fans;
546                 while (*fpp)
547                         fpp = &((*fpp)->next);
548                 fp->next = NULL;
549                 *fpp = fp;
550         }
551
552         /* The i2c device controlling the fans is write-only.
553          * So the only way to keep track of the current power
554          * level fed to the fans is via software.  Choose half
555          * power for cpu/system and 'on' fo the powersupply fan
556          * and set it now.
557          */
558         fp->psupply_fan_on = 1;
559         fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
560         fp->cpu_fan_speed += FAN_SPEED_MIN;
561         fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2;
562         fp->system_fan_speed += FAN_SPEED_MIN;
563
564         set_fan_speeds(fp);
565 }
566
567 int bbc_envctrl_init(void)
568 {
569         struct linux_ebus_child *echild;
570         int temp_index = 0;
571         int fan_index = 0;
572         int devidx = 0;
573
574         while ((echild = bbc_i2c_getdev(devidx++)) != NULL) {
575                 if (!strcmp(echild->prom_node->name, "temperature"))
576                         attach_one_temp(echild, temp_index++);
577                 if (!strcmp(echild->prom_node->name, "fan-control"))
578                         attach_one_fan(echild, fan_index++);
579         }
580         if (temp_index != 0 && fan_index != 0) {
581                 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
582                 if (IS_ERR(kenvctrld_task))
583                         return PTR_ERR(kenvctrld_task);
584         }
585
586         return 0;
587 }
588
589 static void destroy_one_temp(struct bbc_cpu_temperature *tp)
590 {
591         bbc_i2c_detach(tp->client);
592         kfree(tp);
593 }
594
595 static void destroy_one_fan(struct bbc_fan_control *fp)
596 {
597         bbc_i2c_detach(fp->client);
598         kfree(fp);
599 }
600
601 void bbc_envctrl_cleanup(void)
602 {
603         struct bbc_cpu_temperature *tp;
604         struct bbc_fan_control *fp;
605
606         kthread_stop(kenvctrld_task);
607
608         tp = all_bbc_temps;
609         while (tp != NULL) {
610                 struct bbc_cpu_temperature *next = tp->next;
611                 destroy_one_temp(tp);
612                 tp = next;
613         }
614         all_bbc_temps = NULL;
615
616         fp = all_bbc_fans;
617         while (fp != NULL) {
618                 struct bbc_fan_control *next = fp->next;
619                 destroy_one_fan(fp);
620                 fp = next;
621         }
622         all_bbc_fans = NULL;
623 }