2 * Floating point emulation support for subnormalised numbers on SH4
3 * architecture This file is derived from the SoftFloat IEC/IEEE
4 * Floating-point Arithmetic Package, Release 2 the original license of
5 * which is reproduced below.
7 * ========================================================================
9 * This C source file is part of the SoftFloat IEC/IEEE Floating-point
10 * Arithmetic Package, Release 2.
12 * Written by John R. Hauser. This work was made possible in part by the
13 * International Computer Science Institute, located at Suite 600, 1947 Center
14 * Street, Berkeley, California 94704. Funding was partially provided by the
15 * National Science Foundation under grant MIP-9311980. The original version
16 * of this code was written as part of a project to build a fixed-point vector
17 * processor in collaboration with the University of California at Berkeley,
18 * overseen by Profs. Nelson Morgan and John Wawrzynek. More information
19 * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
20 * arithmetic/softfloat.html'.
22 * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
23 * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
24 * TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
25 * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
26 * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
28 * Derivative works are acceptable, even for commercial purposes, so long as
29 * (1) they include prominent notice that the work is derivative, and (2) they
30 * include prominent notice akin to these three paragraphs for those parts of
31 * this code that are retained.
33 * ========================================================================
35 * SH4 modifications by Ismail Dhaoui <ismail.dhaoui@st.com>
36 * and Kamel Khelifi <kamel.khelifi@st.com>
38 #include <linux/kernel.h>
41 #define LIT64( a ) a##LL
44 typedef unsigned char uint8;
45 typedef signed char int8;
48 typedef unsigned int uint32;
49 typedef signed int int32;
51 typedef unsigned long long int bits64;
52 typedef signed long long int sbits64;
54 typedef unsigned char bits8;
55 typedef signed char sbits8;
56 typedef unsigned short int bits16;
57 typedef signed short int sbits16;
58 typedef unsigned int bits32;
59 typedef signed int sbits32;
61 typedef unsigned long long int uint64;
62 typedef signed long long int int64;
64 typedef unsigned long int float32;
65 typedef unsigned long long float64;
67 extern void float_raise(unsigned int flags); /* in fpu.c */
68 extern int float_rounding_mode(void); /* in fpu.c */
70 inline bits64 extractFloat64Frac(float64 a);
71 inline flag extractFloat64Sign(float64 a);
72 inline int16 extractFloat64Exp(float64 a);
73 inline int16 extractFloat32Exp(float32 a);
74 inline flag extractFloat32Sign(float32 a);
75 inline bits32 extractFloat32Frac(float32 a);
76 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig);
77 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr);
78 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig);
79 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr);
80 float64 float64_sub(float64 a, float64 b);
81 float32 float32_sub(float32 a, float32 b);
82 float32 float32_add(float32 a, float32 b);
83 float64 float64_add(float64 a, float64 b);
84 float64 float64_div(float64 a, float64 b);
85 float32 float32_div(float32 a, float32 b);
86 float32 float32_mul(float32 a, float32 b);
87 float64 float64_mul(float64 a, float64 b);
88 float32 float64_to_float32(float64 a);
89 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
91 inline void sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
93 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr);
95 static int8 countLeadingZeros32(bits32 a);
96 static int8 countLeadingZeros64(bits64 a);
97 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp,
99 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign);
100 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign);
101 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig);
102 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp,
104 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig);
105 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign);
106 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign);
107 static void normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr,
109 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b);
110 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
113 inline bits64 extractFloat64Frac(float64 a)
115 return a & LIT64(0x000FFFFFFFFFFFFF);
118 inline flag extractFloat64Sign(float64 a)
123 inline int16 extractFloat64Exp(float64 a)
125 return (a >> 52) & 0x7FF;
128 inline int16 extractFloat32Exp(float32 a)
130 return (a >> 23) & 0xFF;
133 inline flag extractFloat32Sign(float32 a)
138 inline bits32 extractFloat32Frac(float32 a)
140 return a & 0x007FFFFF;
143 inline float64 packFloat64(flag zSign, int16 zExp, bits64 zSig)
145 return (((bits64) zSign) << 63) + (((bits64) zExp) << 52) + zSig;
148 inline void shift64RightJamming(bits64 a, int16 count, bits64 * zPtr)
154 } else if (count < 64) {
155 z = (a >> count) | ((a << ((-count) & 63)) != 0);
162 static int8 countLeadingZeros32(bits32 a)
164 static const int8 countLeadingZerosHigh[] = {
165 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
166 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
169 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
170 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
193 shiftCount += countLeadingZerosHigh[a >> 24];
198 static int8 countLeadingZeros64(bits64 a)
203 if (a < ((bits64) 1) << 32) {
208 shiftCount += countLeadingZeros32(a);
213 static float64 normalizeRoundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
217 shiftCount = countLeadingZeros64(zSig) - 1;
218 return roundAndPackFloat64(zSign, zExp - shiftCount,
223 static float64 subFloat64Sigs(float64 a, float64 b, flag zSign)
225 int16 aExp, bExp, zExp;
226 bits64 aSig, bSig, zSig;
229 aSig = extractFloat64Frac(a);
230 aExp = extractFloat64Exp(a);
231 bSig = extractFloat64Frac(b);
232 bExp = extractFloat64Exp(b);
233 expDiff = aExp - bExp;
248 return packFloat64(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
251 return packFloat64(zSign ^ 1, 0x7FF, 0);
256 aSig |= LIT64(0x4000000000000000);
258 shift64RightJamming(aSig, -expDiff, &aSig);
259 bSig |= LIT64(0x4000000000000000);
264 goto normalizeRoundAndPack;
272 bSig |= LIT64(0x4000000000000000);
274 shift64RightJamming(bSig, expDiff, &bSig);
275 aSig |= LIT64(0x4000000000000000);
279 normalizeRoundAndPack:
281 return normalizeRoundAndPackFloat64(zSign, zExp, zSig);
284 static float64 addFloat64Sigs(float64 a, float64 b, flag zSign)
286 int16 aExp, bExp, zExp;
287 bits64 aSig, bSig, zSig;
290 aSig = extractFloat64Frac(a);
291 aExp = extractFloat64Exp(a);
292 bSig = extractFloat64Frac(b);
293 bExp = extractFloat64Exp(b);
294 expDiff = aExp - bExp;
304 bSig |= LIT64(0x2000000000000000);
306 shift64RightJamming(bSig, expDiff, &bSig);
308 } else if (expDiff < 0) {
310 return packFloat64(zSign, 0x7FF, 0);
315 aSig |= LIT64(0x2000000000000000);
317 shift64RightJamming(aSig, -expDiff, &aSig);
324 return packFloat64(zSign, 0, (aSig + bSig) >> 9);
325 zSig = LIT64(0x4000000000000000) + aSig + bSig;
329 aSig |= LIT64(0x2000000000000000);
330 zSig = (aSig + bSig) << 1;
332 if ((sbits64) zSig < 0) {
337 return roundAndPackFloat64(zSign, zExp, zSig);
341 inline float32 packFloat32(flag zSign, int16 zExp, bits32 zSig)
343 return (((bits32) zSign) << 31) + (((bits32) zExp) << 23) + zSig;
346 inline void shift32RightJamming(bits32 a, int16 count, bits32 * zPtr)
351 } else if (count < 32) {
352 z = (a >> count) | ((a << ((-count) & 31)) != 0);
359 static float32 roundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
361 flag roundNearestEven;
362 int8 roundIncrement, roundBits;
365 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
366 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
367 roundIncrement = 0x40;
368 if (!roundNearestEven) {
371 roundBits = zSig & 0x7F;
372 if (0xFD <= (bits16) zExp) {
375 && ((sbits32) (zSig + roundIncrement) < 0))
377 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
378 return packFloat32(zSign, 0xFF,
379 0) - (roundIncrement == 0);
383 || (zSig + roundIncrement < 0x80000000);
384 shift32RightJamming(zSig, -zExp, &zSig);
386 roundBits = zSig & 0x7F;
387 if (isTiny && roundBits)
388 float_raise(FPSCR_CAUSE_UNDERFLOW);
392 float_raise(FPSCR_CAUSE_INEXACT);
393 zSig = (zSig + roundIncrement) >> 7;
394 zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
397 return packFloat32(zSign, zExp, zSig);
401 static float32 normalizeRoundAndPackFloat32(flag zSign, int16 zExp, bits32 zSig)
405 shiftCount = countLeadingZeros32(zSig) - 1;
406 return roundAndPackFloat32(zSign, zExp - shiftCount,
410 static float64 roundAndPackFloat64(flag zSign, int16 zExp, bits64 zSig)
412 flag roundNearestEven;
413 int16 roundIncrement, roundBits;
416 /* SH4 has only 2 rounding modes - round to nearest and round to zero */
417 roundNearestEven = (float_rounding_mode() == FPSCR_RM_NEAREST);
418 roundIncrement = 0x200;
419 if (!roundNearestEven) {
422 roundBits = zSig & 0x3FF;
423 if (0x7FD <= (bits16) zExp) {
426 && ((sbits64) (zSig + roundIncrement) < 0))
428 float_raise(FPSCR_CAUSE_OVERFLOW | FPSCR_CAUSE_INEXACT);
429 return packFloat64(zSign, 0x7FF,
430 0) - (roundIncrement == 0);
434 || (zSig + roundIncrement <
435 LIT64(0x8000000000000000));
436 shift64RightJamming(zSig, -zExp, &zSig);
438 roundBits = zSig & 0x3FF;
439 if (isTiny && roundBits)
440 float_raise(FPSCR_CAUSE_UNDERFLOW);
444 float_raise(FPSCR_CAUSE_INEXACT);
445 zSig = (zSig + roundIncrement) >> 10;
446 zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
449 return packFloat64(zSign, zExp, zSig);
453 static float32 subFloat32Sigs(float32 a, float32 b, flag zSign)
455 int16 aExp, bExp, zExp;
456 bits32 aSig, bSig, zSig;
459 aSig = extractFloat32Frac(a);
460 aExp = extractFloat32Exp(a);
461 bSig = extractFloat32Frac(b);
462 bExp = extractFloat32Exp(b);
463 expDiff = aExp - bExp;
478 return packFloat32(float_rounding_mode() == FPSCR_RM_ZERO, 0, 0);
481 return packFloat32(zSign ^ 1, 0xFF, 0);
488 shift32RightJamming(aSig, -expDiff, &aSig);
494 goto normalizeRoundAndPack;
504 shift32RightJamming(bSig, expDiff, &bSig);
509 normalizeRoundAndPack:
511 return normalizeRoundAndPackFloat32(zSign, zExp, zSig);
515 static float32 addFloat32Sigs(float32 a, float32 b, flag zSign)
517 int16 aExp, bExp, zExp;
518 bits32 aSig, bSig, zSig;
521 aSig = extractFloat32Frac(a);
522 aExp = extractFloat32Exp(a);
523 bSig = extractFloat32Frac(b);
524 bExp = extractFloat32Exp(b);
525 expDiff = aExp - bExp;
537 shift32RightJamming(bSig, expDiff, &bSig);
539 } else if (expDiff < 0) {
541 return packFloat32(zSign, 0xFF, 0);
548 shift32RightJamming(aSig, -expDiff, &aSig);
555 return packFloat32(zSign, 0, (aSig + bSig) >> 6);
556 zSig = 0x40000000 + aSig + bSig;
561 zSig = (aSig + bSig) << 1;
563 if ((sbits32) zSig < 0) {
568 return roundAndPackFloat32(zSign, zExp, zSig);
572 float64 float64_sub(float64 a, float64 b)
576 aSign = extractFloat64Sign(a);
577 bSign = extractFloat64Sign(b);
578 if (aSign == bSign) {
579 return subFloat64Sigs(a, b, aSign);
581 return addFloat64Sigs(a, b, aSign);
586 float32 float32_sub(float32 a, float32 b)
590 aSign = extractFloat32Sign(a);
591 bSign = extractFloat32Sign(b);
592 if (aSign == bSign) {
593 return subFloat32Sigs(a, b, aSign);
595 return addFloat32Sigs(a, b, aSign);
600 float32 float32_add(float32 a, float32 b)
604 aSign = extractFloat32Sign(a);
605 bSign = extractFloat32Sign(b);
606 if (aSign == bSign) {
607 return addFloat32Sigs(a, b, aSign);
609 return subFloat32Sigs(a, b, aSign);
614 float64 float64_add(float64 a, float64 b)
618 aSign = extractFloat64Sign(a);
619 bSign = extractFloat64Sign(b);
620 if (aSign == bSign) {
621 return addFloat64Sigs(a, b, aSign);
623 return subFloat64Sigs(a, b, aSign);
628 normalizeFloat64Subnormal(bits64 aSig, int16 * zExpPtr, bits64 * zSigPtr)
632 shiftCount = countLeadingZeros64(aSig) - 11;
633 *zSigPtr = aSig << shiftCount;
634 *zExpPtr = 1 - shiftCount;
637 inline void add128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
644 *z0Ptr = a0 + b0 + (z1 < a1);
648 sub128(bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 * z0Ptr,
652 *z0Ptr = a0 - b0 - (a1 < b1);
655 static bits64 estimateDiv128To64(bits64 a0, bits64 a1, bits64 b)
658 bits64 rem0, rem1, term0, term1;
661 return LIT64(0xFFFFFFFFFFFFFFFF);
663 z = (b0 << 32 <= a0) ? LIT64(0xFFFFFFFF00000000) : (a0 / b0) << 32;
664 mul64To128(b, z, &term0, &term1);
665 sub128(a0, a1, term0, term1, &rem0, &rem1);
666 while (((sbits64) rem0) < 0) {
667 z -= LIT64(0x100000000);
669 add128(rem0, rem1, b0, b1, &rem0, &rem1);
671 rem0 = (rem0 << 32) | (rem1 >> 32);
672 z |= (b0 << 32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
676 inline void mul64To128(bits64 a, bits64 b, bits64 * z0Ptr, bits64 * z1Ptr)
678 bits32 aHigh, aLow, bHigh, bLow;
679 bits64 z0, zMiddleA, zMiddleB, z1;
685 z1 = ((bits64) aLow) * bLow;
686 zMiddleA = ((bits64) aLow) * bHigh;
687 zMiddleB = ((bits64) aHigh) * bLow;
688 z0 = ((bits64) aHigh) * bHigh;
689 zMiddleA += zMiddleB;
690 z0 += (((bits64) (zMiddleA < zMiddleB)) << 32) + (zMiddleA >> 32);
693 z0 += (z1 < zMiddleA);
699 static void normalizeFloat32Subnormal(bits32 aSig, int16 * zExpPtr,
704 shiftCount = countLeadingZeros32(aSig) - 8;
705 *zSigPtr = aSig << shiftCount;
706 *zExpPtr = 1 - shiftCount;
710 float64 float64_div(float64 a, float64 b)
712 flag aSign, bSign, zSign;
713 int16 aExp, bExp, zExp;
714 bits64 aSig, bSig, zSig;
718 aSig = extractFloat64Frac(a);
719 aExp = extractFloat64Exp(a);
720 aSign = extractFloat64Sign(a);
721 bSig = extractFloat64Frac(b);
722 bExp = extractFloat64Exp(b);
723 bSign = extractFloat64Sign(b);
724 zSign = aSign ^ bSign;
728 return packFloat64(zSign, 0x7FF, 0);
731 return packFloat64(zSign, 0, 0);
735 if ((aExp | aSig) == 0) {
736 float_raise(FPSCR_CAUSE_INVALID);
738 return packFloat64(zSign, 0x7FF, 0);
740 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
744 return packFloat64(zSign, 0, 0);
745 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
747 zExp = aExp - bExp + 0x3FD;
748 aSig = (aSig | LIT64(0x0010000000000000)) << 10;
749 bSig = (bSig | LIT64(0x0010000000000000)) << 11;
750 if (bSig <= (aSig + aSig)) {
754 zSig = estimateDiv128To64(aSig, 0, bSig);
755 if ((zSig & 0x1FF) <= 2) {
756 mul64To128(bSig, zSig, &term0, &term1);
757 sub128(aSig, 0, term0, term1, &rem0, &rem1);
758 while ((sbits64) rem0 < 0) {
760 add128(rem0, rem1, 0, bSig, &rem0, &rem1);
764 return roundAndPackFloat64(zSign, zExp, zSig);
768 float32 float32_div(float32 a, float32 b)
770 flag aSign, bSign, zSign;
771 int16 aExp, bExp, zExp;
772 bits32 aSig, bSig, zSig;
774 aSig = extractFloat32Frac(a);
775 aExp = extractFloat32Exp(a);
776 aSign = extractFloat32Sign(a);
777 bSig = extractFloat32Frac(b);
778 bExp = extractFloat32Exp(b);
779 bSign = extractFloat32Sign(b);
780 zSign = aSign ^ bSign;
784 return packFloat32(zSign, 0xFF, 0);
787 return packFloat32(zSign, 0, 0);
791 return packFloat32(zSign, 0xFF, 0);
793 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
797 return packFloat32(zSign, 0, 0);
798 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
800 zExp = aExp - bExp + 0x7D;
801 aSig = (aSig | 0x00800000) << 7;
802 bSig = (bSig | 0x00800000) << 8;
803 if (bSig <= (aSig + aSig)) {
807 zSig = (((bits64) aSig) << 32) / bSig;
808 if ((zSig & 0x3F) == 0) {
809 zSig |= (((bits64) bSig) * zSig != ((bits64) aSig) << 32);
811 return roundAndPackFloat32(zSign, zExp, zSig);
815 float32 float32_mul(float32 a, float32 b)
817 char aSign, bSign, zSign;
818 int aExp, bExp, zExp;
819 unsigned int aSig, bSig;
820 unsigned long long zSig64;
823 aSig = extractFloat32Frac(a);
824 aExp = extractFloat32Exp(a);
825 aSign = extractFloat32Sign(a);
826 bSig = extractFloat32Frac(b);
827 bExp = extractFloat32Exp(b);
828 bSign = extractFloat32Sign(b);
829 zSign = aSign ^ bSign;
832 return packFloat32(zSign, 0, 0);
833 normalizeFloat32Subnormal(aSig, &aExp, &aSig);
837 return packFloat32(zSign, 0, 0);
838 normalizeFloat32Subnormal(bSig, &bExp, &bSig);
840 if ((bExp == 0xff && bSig == 0) || (aExp == 0xff && aSig == 0))
841 return roundAndPackFloat32(zSign, 0xff, 0);
843 zExp = aExp + bExp - 0x7F;
844 aSig = (aSig | 0x00800000) << 7;
845 bSig = (bSig | 0x00800000) << 8;
846 shift64RightJamming(((unsigned long long)aSig) * bSig, 32, &zSig64);
848 if (0 <= (signed int)(zSig << 1)) {
852 return roundAndPackFloat32(zSign, zExp, zSig);
856 float64 float64_mul(float64 a, float64 b)
858 char aSign, bSign, zSign;
859 int aExp, bExp, zExp;
860 unsigned long long int aSig, bSig, zSig0, zSig1;
862 aSig = extractFloat64Frac(a);
863 aExp = extractFloat64Exp(a);
864 aSign = extractFloat64Sign(a);
865 bSig = extractFloat64Frac(b);
866 bExp = extractFloat64Exp(b);
867 bSign = extractFloat64Sign(b);
868 zSign = aSign ^ bSign;
872 return packFloat64(zSign, 0, 0);
873 normalizeFloat64Subnormal(aSig, &aExp, &aSig);
877 return packFloat64(zSign, 0, 0);
878 normalizeFloat64Subnormal(bSig, &bExp, &bSig);
880 if ((aExp == 0x7ff && aSig == 0) || (bExp == 0x7ff && bSig == 0))
881 return roundAndPackFloat64(zSign, 0x7ff, 0);
883 zExp = aExp + bExp - 0x3FF;
884 aSig = (aSig | 0x0010000000000000LL) << 10;
885 bSig = (bSig | 0x0010000000000000LL) << 11;
886 mul64To128(aSig, bSig, &zSig0, &zSig1);
887 zSig0 |= (zSig1 != 0);
888 if (0 <= (signed long long int)(zSig0 << 1)) {
892 return roundAndPackFloat64(zSign, zExp, zSig0);
896 * -------------------------------------------------------------------------------
897 * Returns the result of converting the double-precision floating-point value
898 * `a' to the single-precision floating-point format. The conversion is
899 * performed according to the IEC/IEEE Standard for Binary Floating-point
901 * -------------------------------------------------------------------------------
903 float32 float64_to_float32(float64 a)
910 aSig = extractFloat64Frac( a );
911 aExp = extractFloat64Exp( a );
912 aSign = extractFloat64Sign( a );
914 shift64RightJamming( aSig, 22, &aSig );
916 if ( aExp || zSig ) {
920 return roundAndPackFloat32(aSign, aExp, zSig);