Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6] / arch / sh / kernel / cpu / sh4 / fpu.c
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 #include <asm/fpu.h>
20
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28                                       unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31                                       unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34                                       unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37                                       unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 extern unsigned long int float64_to_float32(unsigned long long a);
40 static unsigned int fpu_exception_flags;
41
42 /*
43  * Save FPU registers onto task structure.
44  * Assume called with FPU enabled (SR.FD=0).
45  */
46 void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
47 {
48         unsigned long dummy;
49
50         clear_tsk_thread_flag(tsk, TIF_USEDFPU);
51         enable_fpu();
52         asm volatile ("sts.l    fpul, @-%0\n\t"
53                       "sts.l    fpscr, @-%0\n\t"
54                       "lds      %2, fpscr\n\t"
55                       "frchg\n\t"
56                       "fmov.s   fr15, @-%0\n\t"
57                       "fmov.s   fr14, @-%0\n\t"
58                       "fmov.s   fr13, @-%0\n\t"
59                       "fmov.s   fr12, @-%0\n\t"
60                       "fmov.s   fr11, @-%0\n\t"
61                       "fmov.s   fr10, @-%0\n\t"
62                       "fmov.s   fr9, @-%0\n\t"
63                       "fmov.s   fr8, @-%0\n\t"
64                       "fmov.s   fr7, @-%0\n\t"
65                       "fmov.s   fr6, @-%0\n\t"
66                       "fmov.s   fr5, @-%0\n\t"
67                       "fmov.s   fr4, @-%0\n\t"
68                       "fmov.s   fr3, @-%0\n\t"
69                       "fmov.s   fr2, @-%0\n\t"
70                       "fmov.s   fr1, @-%0\n\t"
71                       "fmov.s   fr0, @-%0\n\t"
72                       "frchg\n\t"
73                       "fmov.s   fr15, @-%0\n\t"
74                       "fmov.s   fr14, @-%0\n\t"
75                       "fmov.s   fr13, @-%0\n\t"
76                       "fmov.s   fr12, @-%0\n\t"
77                       "fmov.s   fr11, @-%0\n\t"
78                       "fmov.s   fr10, @-%0\n\t"
79                       "fmov.s   fr9, @-%0\n\t"
80                       "fmov.s   fr8, @-%0\n\t"
81                       "fmov.s   fr7, @-%0\n\t"
82                       "fmov.s   fr6, @-%0\n\t"
83                       "fmov.s   fr5, @-%0\n\t"
84                       "fmov.s   fr4, @-%0\n\t"
85                       "fmov.s   fr3, @-%0\n\t"
86                       "fmov.s   fr2, @-%0\n\t"
87                       "fmov.s   fr1, @-%0\n\t"
88                       "fmov.s   fr0, @-%0\n\t"
89                       "lds      %3, fpscr\n\t":"=r" (dummy)
90                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
91                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
92                       :"memory");
93
94         disable_fpu();
95         release_fpu(regs);
96 }
97
98 static void restore_fpu(struct task_struct *tsk)
99 {
100         unsigned long dummy;
101
102         enable_fpu();
103         asm volatile ("lds      %2, fpscr\n\t"
104                       "fmov.s   @%0+, fr0\n\t"
105                       "fmov.s   @%0+, fr1\n\t"
106                       "fmov.s   @%0+, fr2\n\t"
107                       "fmov.s   @%0+, fr3\n\t"
108                       "fmov.s   @%0+, fr4\n\t"
109                       "fmov.s   @%0+, fr5\n\t"
110                       "fmov.s   @%0+, fr6\n\t"
111                       "fmov.s   @%0+, fr7\n\t"
112                       "fmov.s   @%0+, fr8\n\t"
113                       "fmov.s   @%0+, fr9\n\t"
114                       "fmov.s   @%0+, fr10\n\t"
115                       "fmov.s   @%0+, fr11\n\t"
116                       "fmov.s   @%0+, fr12\n\t"
117                       "fmov.s   @%0+, fr13\n\t"
118                       "fmov.s   @%0+, fr14\n\t"
119                       "fmov.s   @%0+, fr15\n\t"
120                       "frchg\n\t"
121                       "fmov.s   @%0+, fr0\n\t"
122                       "fmov.s   @%0+, fr1\n\t"
123                       "fmov.s   @%0+, fr2\n\t"
124                       "fmov.s   @%0+, fr3\n\t"
125                       "fmov.s   @%0+, fr4\n\t"
126                       "fmov.s   @%0+, fr5\n\t"
127                       "fmov.s   @%0+, fr6\n\t"
128                       "fmov.s   @%0+, fr7\n\t"
129                       "fmov.s   @%0+, fr8\n\t"
130                       "fmov.s   @%0+, fr9\n\t"
131                       "fmov.s   @%0+, fr10\n\t"
132                       "fmov.s   @%0+, fr11\n\t"
133                       "fmov.s   @%0+, fr12\n\t"
134                       "fmov.s   @%0+, fr13\n\t"
135                       "fmov.s   @%0+, fr14\n\t"
136                       "fmov.s   @%0+, fr15\n\t"
137                       "frchg\n\t"
138                       "lds.l    @%0+, fpscr\n\t"
139                       "lds.l    @%0+, fpul\n\t"
140                       :"=r" (dummy)
141                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
142                       :"memory");
143         disable_fpu();
144 }
145
146 /*
147  * Load the FPU with signalling NANS.  This bit pattern we're using
148  * has the property that no matter wether considered as single or as
149  * double precision represents signaling NANS.
150  */
151
152 static void fpu_init(void)
153 {
154         enable_fpu();
155         asm volatile (  "lds    %0, fpul\n\t"
156                         "lds    %1, fpscr\n\t"
157                         "fsts   fpul, fr0\n\t"
158                         "fsts   fpul, fr1\n\t"
159                         "fsts   fpul, fr2\n\t"
160                         "fsts   fpul, fr3\n\t"
161                         "fsts   fpul, fr4\n\t"
162                         "fsts   fpul, fr5\n\t"
163                         "fsts   fpul, fr6\n\t"
164                         "fsts   fpul, fr7\n\t"
165                         "fsts   fpul, fr8\n\t"
166                         "fsts   fpul, fr9\n\t"
167                         "fsts   fpul, fr10\n\t"
168                         "fsts   fpul, fr11\n\t"
169                         "fsts   fpul, fr12\n\t"
170                         "fsts   fpul, fr13\n\t"
171                         "fsts   fpul, fr14\n\t"
172                         "fsts   fpul, fr15\n\t"
173                         "frchg\n\t"
174                         "fsts   fpul, fr0\n\t"
175                         "fsts   fpul, fr1\n\t"
176                         "fsts   fpul, fr2\n\t"
177                         "fsts   fpul, fr3\n\t"
178                         "fsts   fpul, fr4\n\t"
179                         "fsts   fpul, fr5\n\t"
180                         "fsts   fpul, fr6\n\t"
181                         "fsts   fpul, fr7\n\t"
182                         "fsts   fpul, fr8\n\t"
183                         "fsts   fpul, fr9\n\t"
184                         "fsts   fpul, fr10\n\t"
185                         "fsts   fpul, fr11\n\t"
186                         "fsts   fpul, fr12\n\t"
187                         "fsts   fpul, fr13\n\t"
188                         "fsts   fpul, fr14\n\t"
189                         "fsts   fpul, fr15\n\t"
190                         "frchg\n\t"
191                         "lds    %2, fpscr\n\t"
192                         :       /* no output */
193                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
194         disable_fpu();
195 }
196
197 /**
198  *      denormal_to_double - Given denormalized float number,
199  *                           store double float
200  *
201  *      @fpu: Pointer to sh_fpu_hard structure
202  *      @n: Index to FP register
203  */
204 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
205 {
206         unsigned long du, dl;
207         unsigned long x = fpu->fpul;
208         int exp = 1023 - 126;
209
210         if (x != 0 && (x & 0x7f800000) == 0) {
211                 du = (x & 0x80000000);
212                 while ((x & 0x00800000) == 0) {
213                         x <<= 1;
214                         exp--;
215                 }
216                 x &= 0x007fffff;
217                 du |= (exp << 20) | (x >> 3);
218                 dl = x << 29;
219
220                 fpu->fp_regs[n] = du;
221                 fpu->fp_regs[n + 1] = dl;
222         }
223 }
224
225 /**
226  *      ieee_fpe_handler - Handle denormalized number exception
227  *
228  *      @regs: Pointer to register structure
229  *
230  *      Returns 1 when it's handled (should not cause exception).
231  */
232 static int ieee_fpe_handler(struct pt_regs *regs)
233 {
234         unsigned short insn = *(unsigned short *)regs->pc;
235         unsigned short finsn;
236         unsigned long nextpc;
237         int nib[4] = {
238                 (insn >> 12) & 0xf,
239                 (insn >> 8) & 0xf,
240                 (insn >> 4) & 0xf,
241                 insn & 0xf
242         };
243
244         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
245                 regs->pr = regs->pc + 4;  /* bsr & jsr */
246
247         if (nib[0] == 0xa || nib[0] == 0xb) {
248                 /* bra & bsr */
249                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
250                 finsn = *(unsigned short *)(regs->pc + 2);
251         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
252                 /* bt/s */
253                 if (regs->sr & 1)
254                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
255                 else
256                         nextpc = regs->pc + 4;
257                 finsn = *(unsigned short *)(regs->pc + 2);
258         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
259                 /* bf/s */
260                 if (regs->sr & 1)
261                         nextpc = regs->pc + 4;
262                 else
263                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
264                 finsn = *(unsigned short *)(regs->pc + 2);
265         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
266                    (nib[2] == 0x0 || nib[2] == 0x2)) {
267                 /* jmp & jsr */
268                 nextpc = regs->regs[nib[1]];
269                 finsn = *(unsigned short *)(regs->pc + 2);
270         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
271                    (nib[2] == 0x0 || nib[2] == 0x2)) {
272                 /* braf & bsrf */
273                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
274                 finsn = *(unsigned short *)(regs->pc + 2);
275         } else if (insn == 0x000b) {
276                 /* rts */
277                 nextpc = regs->pr;
278                 finsn = *(unsigned short *)(regs->pc + 2);
279         } else {
280                 nextpc = regs->pc + instruction_size(insn);
281                 finsn = insn;
282         }
283
284         if ((finsn & 0xf1ff) == 0xf0ad) {
285                 /* fcnvsd */
286                 struct task_struct *tsk = current;
287
288                 save_fpu(tsk, regs);
289                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
290                         /* FPU error */
291                         denormal_to_double(&tsk->thread.fpu.hard,
292                                            (finsn >> 8) & 0xf);
293                 else
294                         return 0;
295
296                 regs->pc = nextpc;
297                 return 1;
298         } else if ((finsn & 0xf00f) == 0xf002) {
299                 /* fmul */
300                 struct task_struct *tsk = current;
301                 int fpscr;
302                 int n, m, prec;
303                 unsigned int hx, hy;
304
305                 n = (finsn >> 8) & 0xf;
306                 m = (finsn >> 4) & 0xf;
307                 hx = tsk->thread.fpu.hard.fp_regs[n];
308                 hy = tsk->thread.fpu.hard.fp_regs[m];
309                 fpscr = tsk->thread.fpu.hard.fpscr;
310                 prec = fpscr & FPSCR_DBL_PRECISION;
311
312                 if ((fpscr & FPSCR_CAUSE_ERROR)
313                     && (prec && ((hx & 0x7fffffff) < 0x00100000
314                                  || (hy & 0x7fffffff) < 0x00100000))) {
315                         long long llx, lly;
316
317                         /* FPU error because of denormal (doubles) */
318                         llx = ((long long)hx << 32)
319                             | tsk->thread.fpu.hard.fp_regs[n + 1];
320                         lly = ((long long)hy << 32)
321                             | tsk->thread.fpu.hard.fp_regs[m + 1];
322                         llx = float64_mul(llx, lly);
323                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
324                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
325                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
326                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
327                                          || (hy & 0x7fffffff) < 0x00800000))) {
328                         /* FPU error because of denormal (floats) */
329                         hx = float32_mul(hx, hy);
330                         tsk->thread.fpu.hard.fp_regs[n] = hx;
331                 } else
332                         return 0;
333
334                 regs->pc = nextpc;
335                 return 1;
336         } else if ((finsn & 0xf00e) == 0xf000) {
337                 /* fadd, fsub */
338                 struct task_struct *tsk = current;
339                 int fpscr;
340                 int n, m, prec;
341                 unsigned int hx, hy;
342
343                 n = (finsn >> 8) & 0xf;
344                 m = (finsn >> 4) & 0xf;
345                 hx = tsk->thread.fpu.hard.fp_regs[n];
346                 hy = tsk->thread.fpu.hard.fp_regs[m];
347                 fpscr = tsk->thread.fpu.hard.fpscr;
348                 prec = fpscr & FPSCR_DBL_PRECISION;
349
350                 if ((fpscr & FPSCR_CAUSE_ERROR)
351                     && (prec && ((hx & 0x7fffffff) < 0x00100000
352                                  || (hy & 0x7fffffff) < 0x00100000))) {
353                         long long llx, lly;
354
355                         /* FPU error because of denormal (doubles) */
356                         llx = ((long long)hx << 32)
357                             | tsk->thread.fpu.hard.fp_regs[n + 1];
358                         lly = ((long long)hy << 32)
359                             | tsk->thread.fpu.hard.fp_regs[m + 1];
360                         if ((finsn & 0xf00f) == 0xf000)
361                                 llx = float64_add(llx, lly);
362                         else
363                                 llx = float64_sub(llx, lly);
364                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
365                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
366                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
367                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
368                                          || (hy & 0x7fffffff) < 0x00800000))) {
369                         /* FPU error because of denormal (floats) */
370                         if ((finsn & 0xf00f) == 0xf000)
371                                 hx = float32_add(hx, hy);
372                         else
373                                 hx = float32_sub(hx, hy);
374                         tsk->thread.fpu.hard.fp_regs[n] = hx;
375                 } else
376                         return 0;
377
378                 regs->pc = nextpc;
379                 return 1;
380         } else if ((finsn & 0xf003) == 0xf003) {
381                 /* fdiv */
382                 struct task_struct *tsk = current;
383                 int fpscr;
384                 int n, m, prec;
385                 unsigned int hx, hy;
386
387                 n = (finsn >> 8) & 0xf;
388                 m = (finsn >> 4) & 0xf;
389                 hx = tsk->thread.fpu.hard.fp_regs[n];
390                 hy = tsk->thread.fpu.hard.fp_regs[m];
391                 fpscr = tsk->thread.fpu.hard.fpscr;
392                 prec = fpscr & FPSCR_DBL_PRECISION;
393
394                 if ((fpscr & FPSCR_CAUSE_ERROR)
395                     && (prec && ((hx & 0x7fffffff) < 0x00100000
396                                  || (hy & 0x7fffffff) < 0x00100000))) {
397                         long long llx, lly;
398
399                         /* FPU error because of denormal (doubles) */
400                         llx = ((long long)hx << 32)
401                             | tsk->thread.fpu.hard.fp_regs[n + 1];
402                         lly = ((long long)hy << 32)
403                             | tsk->thread.fpu.hard.fp_regs[m + 1];
404
405                         llx = float64_div(llx, lly);
406
407                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
408                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
409                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
410                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
411                                          || (hy & 0x7fffffff) < 0x00800000))) {
412                         /* FPU error because of denormal (floats) */
413                         hx = float32_div(hx, hy);
414                         tsk->thread.fpu.hard.fp_regs[n] = hx;
415                 } else
416                         return 0;
417
418                 regs->pc = nextpc;
419                 return 1;
420         } else if ((finsn & 0xf0bd) == 0xf0bd) {
421                 /* fcnvds - double to single precision convert */
422                 struct task_struct *tsk = current;
423                 int m;
424                 unsigned int hx;
425
426                 m = (finsn >> 8) & 0x7;
427                 hx = tsk->thread.fpu.hard.fp_regs[m];
428
429                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
430                         && ((hx & 0x7fffffff) < 0x00100000)) {
431                         /* subnormal double to float conversion */
432                         long long llx;
433
434                         llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
435                             | tsk->thread.fpu.hard.fp_regs[m + 1];
436
437                         tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
438                 } else
439                         return 0;
440
441                 regs->pc = nextpc;
442                 return 1;
443         }
444
445         return 0;
446 }
447
448 void float_raise(unsigned int flags)
449 {
450         fpu_exception_flags |= flags;
451 }
452
453 int float_rounding_mode(void)
454 {
455         struct task_struct *tsk = current;
456         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
457         return roundingMode;
458 }
459
460 BUILD_TRAP_HANDLER(fpu_error)
461 {
462         struct task_struct *tsk = current;
463         TRAP_HANDLER_DECL;
464
465         save_fpu(tsk, regs);
466         fpu_exception_flags = 0;
467         if (ieee_fpe_handler(regs)) {
468                 tsk->thread.fpu.hard.fpscr &=
469                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
470                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
471                 /* Set the FPSCR flag as well as cause bits - simply
472                  * replicate the cause */
473                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
474                 grab_fpu(regs);
475                 restore_fpu(tsk);
476                 set_tsk_thread_flag(tsk, TIF_USEDFPU);
477                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
478                      (fpu_exception_flags >> 2)) == 0) {
479                         return;
480                 }
481         }
482
483         force_sig(SIGFPE, tsk);
484 }
485
486 BUILD_TRAP_HANDLER(fpu_state_restore)
487 {
488         struct task_struct *tsk = current;
489         TRAP_HANDLER_DECL;
490
491         grab_fpu(regs);
492         if (!user_mode(regs)) {
493                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
494                 return;
495         }
496
497         if (used_math()) {
498                 /* Using the FPU again.  */
499                 restore_fpu(tsk);
500         } else {
501                 /* First time FPU user.  */
502                 fpu_init();
503                 set_used_math();
504         }
505         set_tsk_thread_flag(tsk, TIF_USEDFPU);
506 }