git.oblomov.eu Git - linux-2.6/blob - arch/arm/vfp/vfpsingle.c

   1 /*
   2  *  linux/arch/arm/vfp/vfpsingle.c
   3  *
   4  * This code is derived in part from John R. Housers softfloat library, which
   5  * carries the following notice:
   6  *
   7  * ===========================================================================
   8  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9  * Arithmetic Package, Release 2.
  10  *
  11  * Written by John R. Hauser.  This work was made possible in part by the
  12  * International Computer Science Institute, located at Suite 600, 1947 Center
  13  * Street, Berkeley, California 94704.  Funding was partially provided by the
  14  * National Science Foundation under grant MIP-9311980.  The original version
  15  * of this code was written as part of a project to build a fixed-point vector
  16  * processor in collaboration with the University of California at Berkeley,
  17  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19  * arithmetic/softfloat.html'.
  20  *
  21  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26  *
  27  * Derivative works are acceptable, even for commercial purposes, so long as
  28  * (1) they include prominent notice that the work is derivative, and (2) they
  29  * include prominent notice akin to these three paragraphs for those parts of
  30  * this code that are retained.
  31  * ===========================================================================
  32  */
  33 #include <linux/kernel.h>
  34 #include <linux/bitops.h>
  35
  36 #include <asm/div64.h>
  37 #include <asm/ptrace.h>
  38 #include <asm/vfp.h>
  39
  40 #include "vfpinstr.h"
  41 #include "vfp.h"
  42
  43 static struct vfp_single vfp_single_default_qnan = {
  44         .exponent       = 255,
  45         .sign           = 0,
  46         .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  47 };
  48
  49 static void vfp_single_dump(const char *str, struct vfp_single *s)
  50 {
  51         pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  52                  str, s->sign != 0, s->exponent, s->significand);
  53 }
  54
  55 static void vfp_single_normalise_denormal(struct vfp_single *vs)
  56 {
  57         int bits = 31 - fls(vs->significand);
  58
  59         vfp_single_dump("normalise_denormal: in", vs);
  60
  61         if (bits) {
  62                 vs->exponent -= bits - 1;
  63                 vs->significand <<= bits;
  64         }
  65
  66         vfp_single_dump("normalise_denormal: out", vs);
  67 }
  68
  69 #ifndef DEBUG
  70 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  71 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  72 #else
  73 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  74 #endif
  75 {
  76         u32 significand, incr, rmode;
  77         int exponent, shift, underflow;
  78
  79         vfp_single_dump("pack: in", vs);
  80
  81         /*
  82          * Infinities and NaNs are a special case.
  83          */
  84         if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  85                 goto pack;
  86
  87         /*
  88          * Special-case zero.
  89          */
  90         if (vs->significand == 0) {
  91                 vs->exponent = 0;
  92                 goto pack;
  93         }
  94
  95         exponent = vs->exponent;
  96         significand = vs->significand;
  97
  98         /*
  99          * Normalise first.  Note that we shift the significand up to
 100          * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
 101          * significant bit.
 102          */
 103         shift = 32 - fls(significand);
 104         if (shift < 32 && shift) {
 105                 exponent -= shift;
 106                 significand <<= shift;
 107         }
 108
 109 #ifdef DEBUG
 110         vs->exponent = exponent;
 111         vs->significand = significand;
 112         vfp_single_dump("pack: normalised", vs);
 113 #endif
 114
 115         /*
 116          * Tiny number?
 117          */
 118         underflow = exponent < 0;
 119         if (underflow) {
 120                 significand = vfp_shiftright32jamming(significand, -exponent);
 121                 exponent = 0;
 122 #ifdef DEBUG
 123                 vs->exponent = exponent;
 124                 vs->significand = significand;
 125                 vfp_single_dump("pack: tiny number", vs);
 126 #endif
 127                 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 128                         underflow = 0;
 129         }
 130
 131         /*
 132          * Select rounding increment.
 133          */
 134         incr = 0;
 135         rmode = fpscr & FPSCR_RMODE_MASK;
 136
 137         if (rmode == FPSCR_ROUND_NEAREST) {
 138                 incr = 1 << VFP_SINGLE_LOW_BITS;
 139                 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 140                         incr -= 1;
 141         } else if (rmode == FPSCR_ROUND_TOZERO) {
 142                 incr = 0;
 143         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 144                 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 145
 146         pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 147
 148         /*
 149          * Is our rounding going to overflow?
 150          */
 151         if ((significand + incr) < significand) {
 152                 exponent += 1;
 153                 significand = (significand >> 1) | (significand & 1);
 154                 incr >>= 1;
 155 #ifdef DEBUG
 156                 vs->exponent = exponent;
 157                 vs->significand = significand;
 158                 vfp_single_dump("pack: overflow", vs);
 159 #endif
 160         }
 161
 162         /*
 163          * If any of the low bits (which will be shifted out of the
 164          * number) are non-zero, the result is inexact.
 165          */
 166         if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 167                 exceptions |= FPSCR_IXC;
 168
 169         /*
 170          * Do our rounding.
 171          */
 172         significand += incr;
 173
 174         /*
 175          * Infinity?
 176          */
 177         if (exponent >= 254) {
 178                 exceptions |= FPSCR_OFC | FPSCR_IXC;
 179                 if (incr == 0) {
 180                         vs->exponent = 253;
 181                         vs->significand = 0x7fffffff;
 182                 } else {
 183                         vs->exponent = 255;             /* infinity */
 184                         vs->significand = 0;
 185                 }
 186         } else {
 187                 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 188                         exponent = 0;
 189                 if (exponent || significand > 0x80000000)
 190                         underflow = 0;
 191                 if (underflow)
 192                         exceptions |= FPSCR_UFC;
 193                 vs->exponent = exponent;
 194                 vs->significand = significand >> 1;
 195         }
 196
 197  pack:
 198         vfp_single_dump("pack: final", vs);
 199         {
 200                 s32 d = vfp_single_pack(vs);
 201                 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 202                          sd, d, exceptions);
 203                 vfp_put_float(d, sd);
 204         }
 205
 206         return exceptions;
 207 }
 208
 209 /*
 210  * Propagate the NaN, setting exceptions if it is signalling.
 211  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 212  */
 213 static u32
 214 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 215                   struct vfp_single *vsm, u32 fpscr)
 216 {
 217         struct vfp_single *nan;
 218         int tn, tm = 0;
 219
 220         tn = vfp_single_type(vsn);
 221
 222         if (vsm)
 223                 tm = vfp_single_type(vsm);
 224
 225         if (fpscr & FPSCR_DEFAULT_NAN)
 226                 /*
 227                  * Default NaN mode - always returns a quiet NaN
 228                  */
 229                 nan = &vfp_single_default_qnan;
 230         else {
 231                 /*
 232                  * Contemporary mode - select the first signalling
 233                  * NAN, or if neither are signalling, the first
 234                  * quiet NAN.
 235                  */
 236                 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 237                         nan = vsn;
 238                 else
 239                         nan = vsm;
 240                 /*
 241                  * Make the NaN quiet.
 242                  */
 243                 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 244         }
 245
 246         *vsd = *nan;
 247
 248         /*
 249          * If one was a signalling NAN, raise invalid operation.
 250          */
 251         return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 252 }
 253
 254
 255 /*
 256  * Extended operations
 257  */
 258 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 259 {
 260         vfp_put_float(vfp_single_packed_abs(m), sd);
 261         return 0;
 262 }
 263
 264 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 265 {
 266         vfp_put_float(m, sd);
 267         return 0;
 268 }
 269
 270 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 271 {
 272         vfp_put_float(vfp_single_packed_negate(m), sd);
 273         return 0;
 274 }
 275
 276 static const u16 sqrt_oddadjust[] = {
 277         0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 278         0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 279 };
 280
 281 static const u16 sqrt_evenadjust[] = {
 282         0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 283         0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 284 };
 285
 286 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 287 {
 288         int index;
 289         u32 z, a;
 290
 291         if ((significand & 0xc0000000) != 0x40000000) {
 292                 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
 293         }
 294
 295         a = significand << 1;
 296         index = (a >> 27) & 15;
 297         if (exponent & 1) {
 298                 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 299                 z = ((a / z) << 14) + (z << 15);
 300                 a >>= 1;
 301         } else {
 302                 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 303                 z = a / z + z;
 304                 z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 305                 if (z <= a)
 306                         return (s32)a >> 1;
 307         }
 308         {
 309                 u64 v = (u64)a << 31;
 310                 do_div(v, z);
 311                 return v + (z >> 1);
 312         }
 313 }
 314
 315 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 316 {
 317         struct vfp_single vsm, vsd;
 318         int ret, tm;
 319
 320         vfp_single_unpack(&vsm, m);
 321         tm = vfp_single_type(&vsm);
 322         if (tm & (VFP_NAN|VFP_INFINITY)) {
 323                 struct vfp_single *vsp = &vsd;
 324
 325                 if (tm & VFP_NAN)
 326                         ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 327                 else if (vsm.sign == 0) {
 328  sqrt_copy:
 329                         vsp = &vsm;
 330                         ret = 0;
 331                 } else {
 332  sqrt_invalid:
 333                         vsp = &vfp_single_default_qnan;
 334                         ret = FPSCR_IOC;
 335                 }
 336                 vfp_put_float(vfp_single_pack(vsp), sd);
 337                 return ret;
 338         }
 339
 340         /*
 341          * sqrt(+/- 0) == +/- 0
 342          */
 343         if (tm & VFP_ZERO)
 344                 goto sqrt_copy;
 345
 346         /*
 347          * Normalise a denormalised number
 348          */
 349         if (tm & VFP_DENORMAL)
 350                 vfp_single_normalise_denormal(&vsm);
 351
 352         /*
 353          * sqrt(<0) = invalid
 354          */
 355         if (vsm.sign)
 356                 goto sqrt_invalid;
 357
 358         vfp_single_dump("sqrt", &vsm);
 359
 360         /*
 361          * Estimate the square root.
 362          */
 363         vsd.sign = 0;
 364         vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 365         vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 366
 367         vfp_single_dump("sqrt estimate", &vsd);
 368
 369         /*
 370          * And now adjust.
 371          */
 372         if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 373                 if (vsd.significand < 2) {
 374                         vsd.significand = 0xffffffff;
 375                 } else {
 376                         u64 term;
 377                         s64 rem;
 378                         vsm.significand <<= !(vsm.exponent & 1);
 379                         term = (u64)vsd.significand * vsd.significand;
 380                         rem = ((u64)vsm.significand << 32) - term;
 381
 382                         pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 383
 384                         while (rem < 0) {
 385                                 vsd.significand -= 1;
 386                                 rem += ((u64)vsd.significand << 1) | 1;
 387                         }
 388                         vsd.significand |= rem != 0;
 389                 }
 390         }
 391         vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 392
 393         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 394 }
 395
 396 /*
 397  * Equal        := ZC
 398  * Less than    := N
 399  * Greater than := C
 400  * Unordered    := CV
 401  */
 402 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 403 {
 404         s32 d;
 405         u32 ret = 0;
 406
 407         d = vfp_get_float(sd);
 408         if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 409                 ret |= FPSCR_C | FPSCR_V;
 410                 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 411                         /*
 412                          * Signalling NaN, or signalling on quiet NaN
 413                          */
 414                         ret |= FPSCR_IOC;
 415         }
 416
 417         if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 418                 ret |= FPSCR_C | FPSCR_V;
 419                 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 420                         /*
 421                          * Signalling NaN, or signalling on quiet NaN
 422                          */
 423                         ret |= FPSCR_IOC;
 424         }
 425
 426         if (ret == 0) {
 427                 if (d == m || vfp_single_packed_abs(d | m) == 0) {
 428                         /*
 429                          * equal
 430                          */
 431                         ret |= FPSCR_Z | FPSCR_C;
 432                 } else if (vfp_single_packed_sign(d ^ m)) {
 433                         /*
 434                          * different signs
 435                          */
 436                         if (vfp_single_packed_sign(d))
 437                                 /*
 438                                  * d is negative, so d < m
 439                                  */
 440                                 ret |= FPSCR_N;
 441                         else
 442                                 /*
 443                                  * d is positive, so d > m
 444                                  */
 445                                 ret |= FPSCR_C;
 446                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 447                         /*
 448                          * d < m
 449                          */
 450                         ret |= FPSCR_N;
 451                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 452                         /*
 453                          * d > m
 454                          */
 455                         ret |= FPSCR_C;
 456                 }
 457         }
 458         return ret;
 459 }
 460
 461 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 462 {
 463         return vfp_compare(sd, 0, m, fpscr);
 464 }
 465
 466 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 467 {
 468         return vfp_compare(sd, 1, m, fpscr);
 469 }
 470
 471 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 472 {
 473         return vfp_compare(sd, 0, 0, fpscr);
 474 }
 475
 476 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 477 {
 478         return vfp_compare(sd, 1, 0, fpscr);
 479 }
 480
 481 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 482 {
 483         struct vfp_single vsm;
 484         struct vfp_double vdd;
 485         int tm;
 486         u32 exceptions = 0;
 487
 488         vfp_single_unpack(&vsm, m);
 489
 490         tm = vfp_single_type(&vsm);
 491
 492         /*
 493          * If we have a signalling NaN, signal invalid operation.
 494          */
 495         if (tm == VFP_SNAN)
 496                 exceptions = FPSCR_IOC;
 497
 498         if (tm & VFP_DENORMAL)
 499                 vfp_single_normalise_denormal(&vsm);
 500
 501         vdd.sign = vsm.sign;
 502         vdd.significand = (u64)vsm.significand << 32;
 503
 504         /*
 505          * If we have an infinity or NaN, the exponent must be 2047.
 506          */
 507         if (tm & (VFP_INFINITY|VFP_NAN)) {
 508                 vdd.exponent = 2047;
 509                 if (tm == VFP_QNAN)
 510                         vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 511                 goto pack_nan;
 512         } else if (tm & VFP_ZERO)
 513                 vdd.exponent = 0;
 514         else
 515                 vdd.exponent = vsm.exponent + (1023 - 127);
 516
 517         return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 518
 519  pack_nan:
 520         vfp_put_double(vfp_double_pack(&vdd), dd);
 521         return exceptions;
 522 }
 523
 524 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 525 {
 526         struct vfp_single vs;
 527
 528         vs.sign = 0;
 529         vs.exponent = 127 + 31 - 1;
 530         vs.significand = (u32)m;
 531
 532         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 533 }
 534
 535 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 536 {
 537         struct vfp_single vs;
 538
 539         vs.sign = (m & 0x80000000) >> 16;
 540         vs.exponent = 127 + 31 - 1;
 541         vs.significand = vs.sign ? -m : m;
 542
 543         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 544 }
 545
 546 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 547 {
 548         struct vfp_single vsm;
 549         u32 d, exceptions = 0;
 550         int rmode = fpscr & FPSCR_RMODE_MASK;
 551         int tm;
 552
 553         vfp_single_unpack(&vsm, m);
 554         vfp_single_dump("VSM", &vsm);
 555
 556         /*
 557          * Do we have a denormalised number?
 558          */
 559         tm = vfp_single_type(&vsm);
 560         if (tm & VFP_DENORMAL)
 561                 exceptions |= FPSCR_IDC;
 562
 563         if (tm & VFP_NAN)
 564                 vsm.sign = 0;
 565
 566         if (vsm.exponent >= 127 + 32) {
 567                 d = vsm.sign ? 0 : 0xffffffff;
 568                 exceptions = FPSCR_IOC;
 569         } else if (vsm.exponent >= 127 - 1) {
 570                 int shift = 127 + 31 - vsm.exponent;
 571                 u32 rem, incr = 0;
 572
 573                 /*
 574                  * 2^0 <= m < 2^32-2^8
 575                  */
 576                 d = (vsm.significand << 1) >> shift;
 577                 rem = vsm.significand << (33 - shift);
 578
 579                 if (rmode == FPSCR_ROUND_NEAREST) {
 580                         incr = 0x80000000;
 581                         if ((d & 1) == 0)
 582                                 incr -= 1;
 583                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 584                         incr = 0;
 585                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 586                         incr = ~0;
 587                 }
 588
 589                 if ((rem + incr) < rem) {
 590                         if (d < 0xffffffff)
 591                                 d += 1;
 592                         else
 593                                 exceptions |= FPSCR_IOC;
 594                 }
 595
 596                 if (d && vsm.sign) {
 597                         d = 0;
 598                         exceptions |= FPSCR_IOC;
 599                 } else if (rem)
 600                         exceptions |= FPSCR_IXC;
 601         } else {
 602                 d = 0;
 603                 if (vsm.exponent | vsm.significand) {
 604                         exceptions |= FPSCR_IXC;
 605                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 606                                 d = 1;
 607                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 608                                 d = 0;
 609                                 exceptions |= FPSCR_IOC;
 610                         }
 611                 }
 612         }
 613
 614         pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 615
 616         vfp_put_float(d, sd);
 617
 618         return exceptions;
 619 }
 620
 621 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 622 {
 623         return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 624 }
 625
 626 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 627 {
 628         struct vfp_single vsm;
 629         u32 d, exceptions = 0;
 630         int rmode = fpscr & FPSCR_RMODE_MASK;
 631         int tm;
 632
 633         vfp_single_unpack(&vsm, m);
 634         vfp_single_dump("VSM", &vsm);
 635
 636         /*
 637          * Do we have a denormalised number?
 638          */
 639         tm = vfp_single_type(&vsm);
 640         if (vfp_single_type(&vsm) & VFP_DENORMAL)
 641                 exceptions |= FPSCR_IDC;
 642
 643         if (tm & VFP_NAN) {
 644                 d = 0;
 645                 exceptions |= FPSCR_IOC;
 646         } else if (vsm.exponent >= 127 + 32) {
 647                 /*
 648                  * m >= 2^31-2^7: invalid
 649                  */
 650                 d = 0x7fffffff;
 651                 if (vsm.sign)
 652                         d = ~d;
 653                 exceptions |= FPSCR_IOC;
 654         } else if (vsm.exponent >= 127 - 1) {
 655                 int shift = 127 + 31 - vsm.exponent;
 656                 u32 rem, incr = 0;
 657
 658                 /* 2^0 <= m <= 2^31-2^7 */
 659                 d = (vsm.significand << 1) >> shift;
 660                 rem = vsm.significand << (33 - shift);
 661
 662                 if (rmode == FPSCR_ROUND_NEAREST) {
 663                         incr = 0x80000000;
 664                         if ((d & 1) == 0)
 665                                 incr -= 1;
 666                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 667                         incr = 0;
 668                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 669                         incr = ~0;
 670                 }
 671
 672                 if ((rem + incr) < rem && d < 0xffffffff)
 673                         d += 1;
 674                 if (d > 0x7fffffff + (vsm.sign != 0)) {
 675                         d = 0x7fffffff + (vsm.sign != 0);
 676                         exceptions |= FPSCR_IOC;
 677                 } else if (rem)
 678                         exceptions |= FPSCR_IXC;
 679
 680                 if (vsm.sign)
 681                         d = -d;
 682         } else {
 683                 d = 0;
 684                 if (vsm.exponent | vsm.significand) {
 685                         exceptions |= FPSCR_IXC;
 686                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 687                                 d = 1;
 688                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 689                                 d = -1;
 690                 }
 691         }
 692
 693         pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 694
 695         vfp_put_float((s32)d, sd);
 696
 697         return exceptions;
 698 }
 699
 700 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 701 {
 702         return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 703 }
 704
 705 static u32 (* const fop_extfns[32])(int sd, int unused, s32 m, u32 fpscr) = {
 706         [FEXT_TO_IDX(FEXT_FCPY)]        = vfp_single_fcpy,
 707         [FEXT_TO_IDX(FEXT_FABS)]        = vfp_single_fabs,
 708         [FEXT_TO_IDX(FEXT_FNEG)]        = vfp_single_fneg,
 709         [FEXT_TO_IDX(FEXT_FSQRT)]       = vfp_single_fsqrt,
 710         [FEXT_TO_IDX(FEXT_FCMP)]        = vfp_single_fcmp,
 711         [FEXT_TO_IDX(FEXT_FCMPE)]       = vfp_single_fcmpe,
 712         [FEXT_TO_IDX(FEXT_FCMPZ)]       = vfp_single_fcmpz,
 713         [FEXT_TO_IDX(FEXT_FCMPEZ)]      = vfp_single_fcmpez,
 714         [FEXT_TO_IDX(FEXT_FCVT)]        = vfp_single_fcvtd,
 715         [FEXT_TO_IDX(FEXT_FUITO)]       = vfp_single_fuito,
 716         [FEXT_TO_IDX(FEXT_FSITO)]       = vfp_single_fsito,
 717         [FEXT_TO_IDX(FEXT_FTOUI)]       = vfp_single_ftoui,
 718         [FEXT_TO_IDX(FEXT_FTOUIZ)]      = vfp_single_ftouiz,
 719         [FEXT_TO_IDX(FEXT_FTOSI)]       = vfp_single_ftosi,
 720         [FEXT_TO_IDX(FEXT_FTOSIZ)]      = vfp_single_ftosiz,
 721 };
 722
 723
 724
 725
 726
 727 static u32
 728 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 729                           struct vfp_single *vsm, u32 fpscr)
 730 {
 731         struct vfp_single *vsp;
 732         u32 exceptions = 0;
 733         int tn, tm;
 734
 735         tn = vfp_single_type(vsn);
 736         tm = vfp_single_type(vsm);
 737
 738         if (tn & tm & VFP_INFINITY) {
 739                 /*
 740                  * Two infinities.  Are they different signs?
 741                  */
 742                 if (vsn->sign ^ vsm->sign) {
 743                         /*
 744                          * different signs -> invalid
 745                          */
 746                         exceptions = FPSCR_IOC;
 747                         vsp = &vfp_single_default_qnan;
 748                 } else {
 749                         /*
 750                          * same signs -> valid
 751                          */
 752                         vsp = vsn;
 753                 }
 754         } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 755                 /*
 756                  * One infinity and one number -> infinity
 757                  */
 758                 vsp = vsn;
 759         } else {
 760                 /*
 761                  * 'n' is a NaN of some type
 762                  */
 763                 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 764         }
 765         *vsd = *vsp;
 766         return exceptions;
 767 }
 768
 769 static u32
 770 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 771                struct vfp_single *vsm, u32 fpscr)
 772 {
 773         u32 exp_diff, m_sig;
 774
 775         if (vsn->significand & 0x80000000 ||
 776             vsm->significand & 0x80000000) {
 777                 pr_info("VFP: bad FP values in %s\n", __func__);
 778                 vfp_single_dump("VSN", vsn);
 779                 vfp_single_dump("VSM", vsm);
 780         }
 781
 782         /*
 783          * Ensure that 'n' is the largest magnitude number.  Note that
 784          * if 'n' and 'm' have equal exponents, we do not swap them.
 785          * This ensures that NaN propagation works correctly.
 786          */
 787         if (vsn->exponent < vsm->exponent) {
 788                 struct vfp_single *t = vsn;
 789                 vsn = vsm;
 790                 vsm = t;
 791         }
 792
 793         /*
 794          * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 795          * infinity or a NaN here.
 796          */
 797         if (vsn->exponent == 255)
 798                 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 799
 800         /*
 801          * We have two proper numbers, where 'vsn' is the larger magnitude.
 802          *
 803          * Copy 'n' to 'd' before doing the arithmetic.
 804          */
 805         *vsd = *vsn;
 806
 807         /*
 808          * Align both numbers.
 809          */
 810         exp_diff = vsn->exponent - vsm->exponent;
 811         m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 812
 813         /*
 814          * If the signs are different, we are really subtracting.
 815          */
 816         if (vsn->sign ^ vsm->sign) {
 817                 m_sig = vsn->significand - m_sig;
 818                 if ((s32)m_sig < 0) {
 819                         vsd->sign = vfp_sign_negate(vsd->sign);
 820                         m_sig = -m_sig;
 821                 } else if (m_sig == 0) {
 822                         vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 823                                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 824                 }
 825         } else {
 826                 m_sig = vsn->significand + m_sig;
 827         }
 828         vsd->significand = m_sig;
 829
 830         return 0;
 831 }
 832
 833 static u32
 834 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 835 {
 836         vfp_single_dump("VSN", vsn);
 837         vfp_single_dump("VSM", vsm);
 838
 839         /*
 840          * Ensure that 'n' is the largest magnitude number.  Note that
 841          * if 'n' and 'm' have equal exponents, we do not swap them.
 842          * This ensures that NaN propagation works correctly.
 843          */
 844         if (vsn->exponent < vsm->exponent) {
 845                 struct vfp_single *t = vsn;
 846                 vsn = vsm;
 847                 vsm = t;
 848                 pr_debug("VFP: swapping M <-> N\n");
 849         }
 850
 851         vsd->sign = vsn->sign ^ vsm->sign;
 852
 853         /*
 854          * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 855          */
 856         if (vsn->exponent == 255) {
 857                 if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 858                         return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 859                 if ((vsm->exponent | vsm->significand) == 0) {
 860                         *vsd = vfp_single_default_qnan;
 861                         return FPSCR_IOC;
 862                 }
 863                 vsd->exponent = vsn->exponent;
 864                 vsd->significand = 0;
 865                 return 0;
 866         }
 867
 868         /*
 869          * If 'm' is zero, the result is always zero.  In this case,
 870          * 'n' may be zero or a number, but it doesn't matter which.
 871          */
 872         if ((vsm->exponent | vsm->significand) == 0) {
 873                 vsd->exponent = 0;
 874                 vsd->significand = 0;
 875                 return 0;
 876         }
 877
 878         /*
 879          * We add 2 to the destination exponent for the same reason as
 880          * the addition case - though this time we have +1 from each
 881          * input operand.
 882          */
 883         vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 884         vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 885
 886         vfp_single_dump("VSD", vsd);
 887         return 0;
 888 }
 889
 890 #define NEG_MULTIPLY    (1 << 0)
 891 #define NEG_SUBTRACT    (1 << 1)
 892
 893 static u32
 894 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 895 {
 896         struct vfp_single vsd, vsp, vsn, vsm;
 897         u32 exceptions;
 898         s32 v;
 899
 900         v = vfp_get_float(sn);
 901         pr_debug("VFP: s%u = %08x\n", sn, v);
 902         vfp_single_unpack(&vsn, v);
 903         if (vsn.exponent == 0 && vsn.significand)
 904                 vfp_single_normalise_denormal(&vsn);
 905
 906         vfp_single_unpack(&vsm, m);
 907         if (vsm.exponent == 0 && vsm.significand)
 908                 vfp_single_normalise_denormal(&vsm);
 909
 910         exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 911         if (negate & NEG_MULTIPLY)
 912                 vsp.sign = vfp_sign_negate(vsp.sign);
 913
 914         v = vfp_get_float(sd);
 915         pr_debug("VFP: s%u = %08x\n", sd, v);
 916         vfp_single_unpack(&vsn, v);
 917         if (negate & NEG_SUBTRACT)
 918                 vsn.sign = vfp_sign_negate(vsn.sign);
 919
 920         exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 921
 922         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 923 }
 924
 925 /*
 926  * Standard operations
 927  */
 928
 929 /*
 930  * sd = sd + (sn * sm)
 931  */
 932 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 933 {
 934         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 935 }
 936
 937 /*
 938  * sd = sd - (sn * sm)
 939  */
 940 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 941 {
 942         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 943 }
 944
 945 /*
 946  * sd = -sd + (sn * sm)
 947  */
 948 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 949 {
 950         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 951 }
 952
 953 /*
 954  * sd = -sd - (sn * sm)
 955  */
 956 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 957 {
 958         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 959 }
 960
 961 /*
 962  * sd = sn * sm
 963  */
 964 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 965 {
 966         struct vfp_single vsd, vsn, vsm;
 967         u32 exceptions;
 968         s32 n = vfp_get_float(sn);
 969
 970         pr_debug("VFP: s%u = %08x\n", sn, n);
 971
 972         vfp_single_unpack(&vsn, n);
 973         if (vsn.exponent == 0 && vsn.significand)
 974                 vfp_single_normalise_denormal(&vsn);
 975
 976         vfp_single_unpack(&vsm, m);
 977         if (vsm.exponent == 0 && vsm.significand)
 978                 vfp_single_normalise_denormal(&vsm);
 979
 980         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 981         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 982 }
 983
 984 /*
 985  * sd = -(sn * sm)
 986  */
 987 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 988 {
 989         struct vfp_single vsd, vsn, vsm;
 990         u32 exceptions;
 991         s32 n = vfp_get_float(sn);
 992
 993         pr_debug("VFP: s%u = %08x\n", sn, n);
 994
 995         vfp_single_unpack(&vsn, n);
 996         if (vsn.exponent == 0 && vsn.significand)
 997                 vfp_single_normalise_denormal(&vsn);
 998
 999         vfp_single_unpack(&vsm, m);
1000         if (vsm.exponent == 0 && vsm.significand)
1001                 vfp_single_normalise_denormal(&vsm);
1002
1003         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1004         vsd.sign = vfp_sign_negate(vsd.sign);
1005         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1006 }
1007
1008 /*
1009  * sd = sn + sm
1010  */
1011 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1012 {
1013         struct vfp_single vsd, vsn, vsm;
1014         u32 exceptions;
1015         s32 n = vfp_get_float(sn);
1016
1017         pr_debug("VFP: s%u = %08x\n", sn, n);
1018
1019         /*
1020          * Unpack and normalise denormals.
1021          */
1022         vfp_single_unpack(&vsn, n);
1023         if (vsn.exponent == 0 && vsn.significand)
1024                 vfp_single_normalise_denormal(&vsn);
1025
1026         vfp_single_unpack(&vsm, m);
1027         if (vsm.exponent == 0 && vsm.significand)
1028                 vfp_single_normalise_denormal(&vsm);
1029
1030         exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1031
1032         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1033 }
1034
1035 /*
1036  * sd = sn - sm
1037  */
1038 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1039 {
1040         /*
1041          * Subtraction is addition with one sign inverted.
1042          */
1043         return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1044 }
1045
1046 /*
1047  * sd = sn / sm
1048  */
1049 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1050 {
1051         struct vfp_single vsd, vsn, vsm;
1052         u32 exceptions = 0;
1053         s32 n = vfp_get_float(sn);
1054         int tm, tn;
1055
1056         pr_debug("VFP: s%u = %08x\n", sn, n);
1057
1058         vfp_single_unpack(&vsn, n);
1059         vfp_single_unpack(&vsm, m);
1060
1061         vsd.sign = vsn.sign ^ vsm.sign;
1062
1063         tn = vfp_single_type(&vsn);
1064         tm = vfp_single_type(&vsm);
1065
1066         /*
1067          * Is n a NAN?
1068          */
1069         if (tn & VFP_NAN)
1070                 goto vsn_nan;
1071
1072         /*
1073          * Is m a NAN?
1074          */
1075         if (tm & VFP_NAN)
1076                 goto vsm_nan;
1077
1078         /*
1079          * If n and m are infinity, the result is invalid
1080          * If n and m are zero, the result is invalid
1081          */
1082         if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1083                 goto invalid;
1084
1085         /*
1086          * If n is infinity, the result is infinity
1087          */
1088         if (tn & VFP_INFINITY)
1089                 goto infinity;
1090
1091         /*
1092          * If m is zero, raise div0 exception
1093          */
1094         if (tm & VFP_ZERO)
1095                 goto divzero;
1096
1097         /*
1098          * If m is infinity, or n is zero, the result is zero
1099          */
1100         if (tm & VFP_INFINITY || tn & VFP_ZERO)
1101                 goto zero;
1102
1103         if (tn & VFP_DENORMAL)
1104                 vfp_single_normalise_denormal(&vsn);
1105         if (tm & VFP_DENORMAL)
1106                 vfp_single_normalise_denormal(&vsm);
1107
1108         /*
1109          * Ok, we have two numbers, we can perform division.
1110          */
1111         vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1112         vsm.significand <<= 1;
1113         if (vsm.significand <= (2 * vsn.significand)) {
1114                 vsn.significand >>= 1;
1115                 vsd.exponent++;
1116         }
1117         {
1118                 u64 significand = (u64)vsn.significand << 32;
1119                 do_div(significand, vsm.significand);
1120                 vsd.significand = significand;
1121         }
1122         if ((vsd.significand & 0x3f) == 0)
1123                 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1124
1125         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1126
1127  vsn_nan:
1128         exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1129  pack:
1130         vfp_put_float(vfp_single_pack(&vsd), sd);
1131         return exceptions;
1132
1133  vsm_nan:
1134         exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1135         goto pack;
1136
1137  zero:
1138         vsd.exponent = 0;
1139         vsd.significand = 0;
1140         goto pack;
1141
1142  divzero:
1143         exceptions = FPSCR_DZC;
1144  infinity:
1145         vsd.exponent = 255;
1146         vsd.significand = 0;
1147         goto pack;
1148
1149  invalid:
1150         vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1151         return FPSCR_IOC;
1152 }
1153
1154 static u32 (* const fop_fns[16])(int sd, int sn, s32 m, u32 fpscr) = {
1155         [FOP_TO_IDX(FOP_FMAC)]  = vfp_single_fmac,
1156         [FOP_TO_IDX(FOP_FNMAC)] = vfp_single_fnmac,
1157         [FOP_TO_IDX(FOP_FMSC)]  = vfp_single_fmsc,
1158         [FOP_TO_IDX(FOP_FNMSC)] = vfp_single_fnmsc,
1159         [FOP_TO_IDX(FOP_FMUL)]  = vfp_single_fmul,
1160         [FOP_TO_IDX(FOP_FNMUL)] = vfp_single_fnmul,
1161         [FOP_TO_IDX(FOP_FADD)]  = vfp_single_fadd,
1162         [FOP_TO_IDX(FOP_FSUB)]  = vfp_single_fsub,
1163         [FOP_TO_IDX(FOP_FDIV)]  = vfp_single_fdiv,
1164 };
1165
1166 #define FREG_BANK(x)    ((x) & 0x18)
1167 #define FREG_IDX(x)     ((x) & 7)
1168
1169 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1170 {
1171         u32 op = inst & FOP_MASK;
1172         u32 exceptions = 0;
1173         unsigned int dest;
1174         unsigned int sn = vfp_get_sn(inst);
1175         unsigned int sm = vfp_get_sm(inst);
1176         unsigned int vecitr, veclen, vecstride;
1177         u32 (*fop)(int, int, s32, u32);
1178
1179         veclen = fpscr & FPSCR_LENGTH_MASK;
1180         vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1181
1182         /*
1183          * fcvtsd takes a dN register number as destination, not sN.
1184          * Technically, if bit 0 of dd is set, this is an invalid
1185          * instruction.  However, we ignore this for efficiency.
1186          * It also only operates on scalars.
1187          */
1188         if ((inst & FEXT_MASK) == FEXT_FCVT) {
1189                 veclen = 0;
1190                 dest = vfp_get_dd(inst);
1191         } else
1192                 dest = vfp_get_sd(inst);
1193
1194         /*
1195          * If destination bank is zero, vector length is always '1'.
1196          * ARM DDI0100F C5.1.3, C5.3.2.
1197          */
1198         if (FREG_BANK(dest) == 0)
1199                 veclen = 0;
1200
1201         pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1202                  (veclen >> FPSCR_LENGTH_BIT) + 1);
1203
1204         fop = (op == FOP_EXT) ? fop_extfns[FEXT_TO_IDX(inst)] : fop_fns[FOP_TO_IDX(op)];
1205         if (!fop)
1206                 goto invalid;
1207
1208         for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1209                 s32 m = vfp_get_float(sm);
1210                 u32 except;
1211
1212                 if (op == FOP_EXT && (inst & FEXT_MASK) == FEXT_FCVT)
1213                         pr_debug("VFP: itr%d (d%u) = op[%u] (s%u=%08x)\n",
1214                                  vecitr >> FPSCR_LENGTH_BIT, dest, sn, sm, m);
1215                 else if (op == FOP_EXT)
1216                         pr_debug("VFP: itr%d (s%u) = op[%u] (s%u=%08x)\n",
1217                                  vecitr >> FPSCR_LENGTH_BIT, dest, sn, sm, m);
1218                 else
1219                         pr_debug("VFP: itr%d (s%u) = (s%u) op[%u] (s%u=%08x)\n",
1220                                  vecitr >> FPSCR_LENGTH_BIT, dest, sn,
1221                                  FOP_TO_IDX(op), sm, m);
1222
1223                 except = fop(dest, sn, m, fpscr);
1224                 pr_debug("VFP: itr%d: exceptions=%08x\n",
1225                          vecitr >> FPSCR_LENGTH_BIT, except);
1226
1227                 exceptions |= except;
1228
1229                 /*
1230                  * This ensures that comparisons only operate on scalars;
1231                  * comparisons always return with one FPSCR status bit set.
1232                  */
1233                 if (except & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
1234                         break;
1235
1236                 /*
1237                  * CHECK: It appears to be undefined whether we stop when
1238                  * we encounter an exception.  We continue.
1239                  */
1240
1241                 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1242                 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1243                 if (FREG_BANK(sm) != 0)
1244                         sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1245         }
1246         return exceptions;
1247
1248  invalid:
1249         return (u32)-1;
1250 }