Merge refs/heads/upstream from master.kernel.org:/pub/scm/linux/kernel/git/jgarzik...
[linux-2.6] / arch / parisc / math-emu / fmpyfadd.c
1 /*
2  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
3  *
4  * Floating-point emulation code
5  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2, or (at your option)
10  *    any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 /*
22  * BEGIN_DESC
23  *
24  *  File:
25  *      @(#)    pa/spmath/fmpyfadd.c            $Revision: 1.1 $
26  *
27  *  Purpose:
28  *      Double Floating-point Multiply Fused Add
29  *      Double Floating-point Multiply Negate Fused Add
30  *      Single Floating-point Multiply Fused Add
31  *      Single Floating-point Multiply Negate Fused Add
32  *
33  *  External Interfaces:
34  *      dbl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
35  *      dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
36  *      sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
37  *      sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
38  *
39  *  Internal Interfaces:
40  *
41  *  Theory:
42  *      <<please update with a overview of the operation of this file>>
43  *
44  * END_DESC
45 */
46
47
48 #include "float.h"
49 #include "sgl_float.h"
50 #include "dbl_float.h"
51
52
53 /*
54  *  Double Floating-point Multiply Fused Add
55  */
56
57 int
58 dbl_fmpyfadd(
59             dbl_floating_point *src1ptr,
60             dbl_floating_point *src2ptr,
61             dbl_floating_point *src3ptr,
62             unsigned int *status,
63             dbl_floating_point *dstptr)
64 {
65         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
66         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
67         unsigned int rightp1, rightp2, rightp3, rightp4;
68         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
69         register int mpy_exponent, add_exponent, count;
70         boolean inexact = FALSE, is_tiny = FALSE;
71
72         unsigned int signlessleft1, signlessright1, save;
73         register int result_exponent, diff_exponent;
74         int sign_save, jumpsize;
75         
76         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
77         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
78         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
79
80         /* 
81          * set sign bit of result of multiply
82          */
83         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
84                 Dbl_setnegativezerop1(resultp1); 
85         else Dbl_setzerop1(resultp1);
86
87         /*
88          * Generate multiply exponent 
89          */
90         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
91
92         /*
93          * check first operand for NaN's or infinity
94          */
95         if (Dbl_isinfinity_exponent(opnd1p1)) {
96                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
97                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
98                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
99                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
100                                         /* 
101                                          * invalid since operands are infinity 
102                                          * and zero 
103                                          */
104                                         if (Is_invalidtrap_enabled())
105                                                 return(OPC_2E_INVALIDEXCEPTION);
106                                         Set_invalidflag();
107                                         Dbl_makequietnan(resultp1,resultp2);
108                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
109                                         return(NOEXCEPTION);
110                                 }
111                                 /*
112                                  * Check third operand for infinity with a
113                                  *  sign opposite of the multiply result
114                                  */
115                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
116                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
117                                         /* 
118                                          * invalid since attempting a magnitude
119                                          * subtraction of infinities
120                                          */
121                                         if (Is_invalidtrap_enabled())
122                                                 return(OPC_2E_INVALIDEXCEPTION);
123                                         Set_invalidflag();
124                                         Dbl_makequietnan(resultp1,resultp2);
125                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
126                                         return(NOEXCEPTION);
127                                 }
128
129                                 /*
130                                  * return infinity
131                                  */
132                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
133                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
134                                 return(NOEXCEPTION);
135                         }
136                 }
137                 else {
138                         /*
139                          * is NaN; signaling or quiet?
140                          */
141                         if (Dbl_isone_signaling(opnd1p1)) {
142                                 /* trap if INVALIDTRAP enabled */
143                                 if (Is_invalidtrap_enabled()) 
144                                         return(OPC_2E_INVALIDEXCEPTION);
145                                 /* make NaN quiet */
146                                 Set_invalidflag();
147                                 Dbl_set_quiet(opnd1p1);
148                         }
149                         /* 
150                          * is second operand a signaling NaN? 
151                          */
152                         else if (Dbl_is_signalingnan(opnd2p1)) {
153                                 /* trap if INVALIDTRAP enabled */
154                                 if (Is_invalidtrap_enabled())
155                                         return(OPC_2E_INVALIDEXCEPTION);
156                                 /* make NaN quiet */
157                                 Set_invalidflag();
158                                 Dbl_set_quiet(opnd2p1);
159                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
160                                 return(NOEXCEPTION);
161                         }
162                         /* 
163                          * is third operand a signaling NaN? 
164                          */
165                         else if (Dbl_is_signalingnan(opnd3p1)) {
166                                 /* trap if INVALIDTRAP enabled */
167                                 if (Is_invalidtrap_enabled())
168                                         return(OPC_2E_INVALIDEXCEPTION);
169                                 /* make NaN quiet */
170                                 Set_invalidflag();
171                                 Dbl_set_quiet(opnd3p1);
172                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
173                                 return(NOEXCEPTION);
174                         }
175                         /*
176                          * return quiet NaN
177                          */
178                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
179                         return(NOEXCEPTION);
180                 }
181         }
182
183         /*
184          * check second operand for NaN's or infinity
185          */
186         if (Dbl_isinfinity_exponent(opnd2p1)) {
187                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
188                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
189                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
190                                         /* 
191                                          * invalid since multiply operands are
192                                          * zero & infinity
193                                          */
194                                         if (Is_invalidtrap_enabled())
195                                                 return(OPC_2E_INVALIDEXCEPTION);
196                                         Set_invalidflag();
197                                         Dbl_makequietnan(opnd2p1,opnd2p2);
198                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
199                                         return(NOEXCEPTION);
200                                 }
201
202                                 /*
203                                  * Check third operand for infinity with a
204                                  *  sign opposite of the multiply result
205                                  */
206                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
207                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
208                                         /* 
209                                          * invalid since attempting a magnitude
210                                          * subtraction of infinities
211                                          */
212                                         if (Is_invalidtrap_enabled())
213                                                 return(OPC_2E_INVALIDEXCEPTION);
214                                         Set_invalidflag();
215                                         Dbl_makequietnan(resultp1,resultp2);
216                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
217                                         return(NOEXCEPTION);
218                                 }
219
220                                 /*
221                                  * return infinity
222                                  */
223                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
224                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
225                                 return(NOEXCEPTION);
226                         }
227                 }
228                 else {
229                         /*
230                          * is NaN; signaling or quiet?
231                          */
232                         if (Dbl_isone_signaling(opnd2p1)) {
233                                 /* trap if INVALIDTRAP enabled */
234                                 if (Is_invalidtrap_enabled())
235                                         return(OPC_2E_INVALIDEXCEPTION);
236                                 /* make NaN quiet */
237                                 Set_invalidflag();
238                                 Dbl_set_quiet(opnd2p1);
239                         }
240                         /* 
241                          * is third operand a signaling NaN? 
242                          */
243                         else if (Dbl_is_signalingnan(opnd3p1)) {
244                                 /* trap if INVALIDTRAP enabled */
245                                 if (Is_invalidtrap_enabled())
246                                                 return(OPC_2E_INVALIDEXCEPTION);
247                                 /* make NaN quiet */
248                                 Set_invalidflag();
249                                 Dbl_set_quiet(opnd3p1);
250                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
251                                 return(NOEXCEPTION);
252                         }
253                         /*
254                          * return quiet NaN
255                          */
256                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
257                         return(NOEXCEPTION);
258                 }
259         }
260
261         /*
262          * check third operand for NaN's or infinity
263          */
264         if (Dbl_isinfinity_exponent(opnd3p1)) {
265                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
266                         /* return infinity */
267                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
268                         return(NOEXCEPTION);
269                 } else {
270                         /*
271                          * is NaN; signaling or quiet?
272                          */
273                         if (Dbl_isone_signaling(opnd3p1)) {
274                                 /* trap if INVALIDTRAP enabled */
275                                 if (Is_invalidtrap_enabled())
276                                         return(OPC_2E_INVALIDEXCEPTION);
277                                 /* make NaN quiet */
278                                 Set_invalidflag();
279                                 Dbl_set_quiet(opnd3p1);
280                         }
281                         /*
282                          * return quiet NaN
283                          */
284                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
285                         return(NOEXCEPTION);
286                 }
287         }
288
289         /*
290          * Generate multiply mantissa
291          */
292         if (Dbl_isnotzero_exponent(opnd1p1)) {
293                 /* set hidden bit */
294                 Dbl_clear_signexponent_set_hidden(opnd1p1);
295         }
296         else {
297                 /* check for zero */
298                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
299                         /*
300                          * Perform the add opnd3 with zero here.
301                          */
302                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
303                                 if (Is_rounding_mode(ROUNDMINUS)) {
304                                         Dbl_or_signs(opnd3p1,resultp1);
305                                 } else {
306                                         Dbl_and_signs(opnd3p1,resultp1);
307                                 }
308                         }
309                         /*
310                          * Now let's check for trapped underflow case.
311                          */
312                         else if (Dbl_iszero_exponent(opnd3p1) &&
313                                  Is_underflowtrap_enabled()) {
314                                 /* need to normalize results mantissa */
315                                 sign_save = Dbl_signextendedsign(opnd3p1);
316                                 result_exponent = 0;
317                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
318                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
319                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
320                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
321                                                         unfl);
322                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
323                                 /* inexact = FALSE */
324                                 return(OPC_2E_UNDERFLOWEXCEPTION);
325                         }
326                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
327                         return(NOEXCEPTION);
328                 }
329                 /* is denormalized, adjust exponent */
330                 Dbl_clear_signexponent(opnd1p1);
331                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
332                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
333         }
334         /* opnd2 needs to have hidden bit set with msb in hidden bit */
335         if (Dbl_isnotzero_exponent(opnd2p1)) {
336                 Dbl_clear_signexponent_set_hidden(opnd2p1);
337         }
338         else {
339                 /* check for zero */
340                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
341                         /*
342                          * Perform the add opnd3 with zero here.
343                          */
344                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
345                                 if (Is_rounding_mode(ROUNDMINUS)) {
346                                         Dbl_or_signs(opnd3p1,resultp1);
347                                 } else {
348                                         Dbl_and_signs(opnd3p1,resultp1);
349                                 }
350                         }
351                         /*
352                          * Now let's check for trapped underflow case.
353                          */
354                         else if (Dbl_iszero_exponent(opnd3p1) &&
355                             Is_underflowtrap_enabled()) {
356                                 /* need to normalize results mantissa */
357                                 sign_save = Dbl_signextendedsign(opnd3p1);
358                                 result_exponent = 0;
359                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
360                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
361                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
362                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
363                                                         unfl);
364                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
365                                 /* inexact = FALSE */
366                                 return(OPC_2E_UNDERFLOWEXCEPTION);
367                         }
368                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
369                         return(NOEXCEPTION);
370                 }
371                 /* is denormalized; want to normalize */
372                 Dbl_clear_signexponent(opnd2p1);
373                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
374                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
375         }
376
377         /* Multiply the first two source mantissas together */
378
379         /* 
380          * The intermediate result will be kept in tmpres,
381          * which needs enough room for 106 bits of mantissa,
382          * so lets call it a Double extended.
383          */
384         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
385
386         /* 
387          * Four bits at a time are inspected in each loop, and a 
388          * simple shift and add multiply algorithm is used. 
389          */ 
390         for (count = DBL_P-1; count >= 0; count -= 4) {
391                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
392                 if (Dbit28p2(opnd1p2)) {
393                         /* Fourword_add should be an ADD followed by 3 ADDC's */
394                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
395                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
396                 }
397                 if (Dbit29p2(opnd1p2)) {
398                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
399                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
400                 }
401                 if (Dbit30p2(opnd1p2)) {
402                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
403                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
404                 }
405                 if (Dbit31p2(opnd1p2)) {
406                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
407                          opnd2p1, opnd2p2, 0, 0);
408                 }
409                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
410         }
411         if (Is_dexthiddenoverflow(tmpresp1)) {
412                 /* result mantissa >= 2 (mantissa overflow) */
413                 mpy_exponent++;
414                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
415         }
416
417         /*
418          * Restore the sign of the mpy result which was saved in resultp1.
419          * The exponent will continue to be kept in mpy_exponent.
420          */
421         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
422
423         /* 
424          * No rounding is required, since the result of the multiply
425          * is exact in the extended format.
426          */
427
428         /*
429          * Now we are ready to perform the add portion of the operation.
430          *
431          * The exponents need to be kept as integers for now, since the
432          * multiply result might not fit into the exponent field.  We
433          * can't overflow or underflow because of this yet, since the
434          * add could bring the final result back into range.
435          */
436         add_exponent = Dbl_exponent(opnd3p1);
437
438         /*
439          * Check for denormalized or zero add operand.
440          */
441         if (add_exponent == 0) {
442                 /* check for zero */
443                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
444                         /* right is zero */
445                         /* Left can't be zero and must be result.
446                          *
447                          * The final result is now in tmpres and mpy_exponent,
448                          * and needs to be rounded and squeezed back into
449                          * double precision format from double extended.
450                          */
451                         result_exponent = mpy_exponent;
452                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
453                                 resultp1,resultp2,resultp3,resultp4);
454                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
455                         goto round;
456                 }
457
458                 /* 
459                  * Neither are zeroes.  
460                  * Adjust exponent and normalize add operand.
461                  */
462                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
463                 Dbl_clear_signexponent(opnd3p1);
464                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
465                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
466                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
467         } else {
468                 Dbl_clear_exponent_set_hidden(opnd3p1);
469         }
470         /*
471          * Copy opnd3 to the double extended variable called right.
472          */
473         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
474
475         /*
476          * A zero "save" helps discover equal operands (for later),
477          * and is used in swapping operands (if needed).
478          */
479         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
480
481         /*
482          * Compare magnitude of operands.
483          */
484         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
485         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
486         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
487             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
488                 /*
489                  * Set the left operand to the larger one by XOR swap.
490                  * First finish the first word "save".
491                  */
492                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
493                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
494                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
495                         rightp2,rightp3,rightp4);
496                 /* also setup exponents used in rest of routine */
497                 diff_exponent = add_exponent - mpy_exponent;
498                 result_exponent = add_exponent;
499         } else {
500                 /* also setup exponents used in rest of routine */
501                 diff_exponent = mpy_exponent - add_exponent;
502                 result_exponent = mpy_exponent;
503         }
504         /* Invariant: left is not smaller than right. */
505
506         /*
507          * Special case alignment of operands that would force alignment
508          * beyond the extent of the extension.  A further optimization
509          * could special case this but only reduces the path length for
510          * this infrequent case.
511          */
512         if (diff_exponent > DBLEXT_THRESHOLD) {
513                 diff_exponent = DBLEXT_THRESHOLD;
514         }
515
516         /* Align right operand by shifting it to the right */
517         Dblext_clear_sign(rightp1);
518         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
519                 /*shifted by*/diff_exponent);
520         
521         /* Treat sum and difference of the operands separately. */
522         if ((int)save < 0) {
523                 /*
524                  * Difference of the two operands.  Overflow can occur if the
525                  * multiply overflowed.  A borrow can occur out of the hidden
526                  * bit and force a post normalization phase.
527                  */
528                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
529                         rightp1,rightp2,rightp3,rightp4,
530                         resultp1,resultp2,resultp3,resultp4);
531                 sign_save = Dbl_signextendedsign(resultp1);
532                 if (Dbl_iszero_hidden(resultp1)) {
533                         /* Handle normalization */
534                 /* A straight foward algorithm would now shift the
535                  * result and extension left until the hidden bit
536                  * becomes one.  Not all of the extension bits need
537                  * participate in the shift.  Only the two most 
538                  * significant bits (round and guard) are needed.
539                  * If only a single shift is needed then the guard
540                  * bit becomes a significant low order bit and the
541                  * extension must participate in the rounding.
542                  * If more than a single shift is needed, then all
543                  * bits to the right of the guard bit are zeros, 
544                  * and the guard bit may or may not be zero. */
545                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
546                                 resultp4);
547
548                         /* Need to check for a zero result.  The sign and
549                          * exponent fields have already been zeroed.  The more
550                          * efficient test of the full object can be used.
551                          */
552                          if(Dblext_iszero(resultp1,resultp2,resultp3,resultp4)){
553                                 /* Must have been "x-x" or "x+(-x)". */
554                                 if (Is_rounding_mode(ROUNDMINUS))
555                                         Dbl_setone_sign(resultp1);
556                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
557                                 return(NOEXCEPTION);
558                         }
559                         result_exponent--;
560
561                         /* Look to see if normalization is finished. */
562                         if (Dbl_isone_hidden(resultp1)) {
563                                 /* No further normalization is needed */
564                                 goto round;
565                         }
566
567                         /* Discover first one bit to determine shift amount.
568                          * Use a modified binary search.  We have already
569                          * shifted the result one position right and still
570                          * not found a one so the remainder of the extension
571                          * must be zero and simplifies rounding. */
572                         /* Scan bytes */
573                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
574                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
575                                 result_exponent -= 8;
576                         }
577                         /* Now narrow it down to the nibble */
578                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
579                                 /* The lower nibble contains the
580                                  * normalizing one */
581                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
582                                 result_exponent -= 4;
583                         }
584                         /* Select case where first bit is set (already
585                          * normalized) otherwise select the proper shift. */
586                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
587                         if (jumpsize <= 7) switch(jumpsize) {
588                         case 1:
589                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
590                                         resultp4);
591                                 result_exponent -= 3;
592                                 break;
593                         case 2:
594                         case 3:
595                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
596                                         resultp4);
597                                 result_exponent -= 2;
598                                 break;
599                         case 4:
600                         case 5:
601                         case 6:
602                         case 7:
603                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
604                                         resultp4);
605                                 result_exponent -= 1;
606                                 break;
607                         }
608                 } /* end if (hidden...)... */
609         /* Fall through and round */
610         } /* end if (save < 0)... */
611         else {
612                 /* Add magnitudes */
613                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
614                         rightp1,rightp2,rightp3,rightp4,
615                         /*to*/resultp1,resultp2,resultp3,resultp4);
616                 sign_save = Dbl_signextendedsign(resultp1);
617                 if (Dbl_isone_hiddenoverflow(resultp1)) {
618                         /* Prenormalization required. */
619                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
620                                 resultp4);
621                         result_exponent++;
622                 } /* end if hiddenoverflow... */
623         } /* end else ...add magnitudes... */
624
625         /* Round the result.  If the extension and lower two words are
626          * all zeros, then the result is exact.  Otherwise round in the
627          * correct direction.  Underflow is possible. If a postnormalization
628          * is necessary, then the mantissa is all zeros so no shift is needed.
629          */
630   round:
631         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
632                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
633                         result_exponent,is_tiny);
634         }
635         Dbl_set_sign(resultp1,/*using*/sign_save);
636         if (Dblext_isnotzero_mantissap3(resultp3) || 
637             Dblext_isnotzero_mantissap4(resultp4)) {
638                 inexact = TRUE;
639                 switch(Rounding_mode()) {
640                 case ROUNDNEAREST: /* The default. */
641                         if (Dblext_isone_highp3(resultp3)) {
642                                 /* at least 1/2 ulp */
643                                 if (Dblext_isnotzero_low31p3(resultp3) ||
644                                     Dblext_isnotzero_mantissap4(resultp4) ||
645                                     Dblext_isone_lowp2(resultp2)) {
646                                         /* either exactly half way and odd or
647                                          * more than 1/2ulp */
648                                         Dbl_increment(resultp1,resultp2);
649                                 }
650                         }
651                         break;
652
653                 case ROUNDPLUS:
654                         if (Dbl_iszero_sign(resultp1)) {
655                                 /* Round up positive results */
656                                 Dbl_increment(resultp1,resultp2);
657                         }
658                         break;
659             
660                 case ROUNDMINUS:
661                         if (Dbl_isone_sign(resultp1)) {
662                                 /* Round down negative results */
663                                 Dbl_increment(resultp1,resultp2);
664                         }
665             
666                 case ROUNDZERO:;
667                         /* truncate is simple */
668                 } /* end switch... */
669                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
670         }
671         if (result_exponent >= DBL_INFINITY_EXPONENT) {
672                 /* trap if OVERFLOWTRAP enabled */
673                 if (Is_overflowtrap_enabled()) {
674                         /*
675                          * Adjust bias of result
676                          */
677                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
678                         Dbl_copytoptr(resultp1,resultp2,dstptr);
679                         if (inexact)
680                             if (Is_inexacttrap_enabled())
681                                 return (OPC_2E_OVERFLOWEXCEPTION |
682                                         OPC_2E_INEXACTEXCEPTION);
683                             else Set_inexactflag();
684                         return (OPC_2E_OVERFLOWEXCEPTION);
685                 }
686                 inexact = TRUE;
687                 Set_overflowflag();
688                 /* set result to infinity or largest number */
689                 Dbl_setoverflow(resultp1,resultp2);
690
691         } else if (result_exponent <= 0) {      /* underflow case */
692                 if (Is_underflowtrap_enabled()) {
693                         /*
694                          * Adjust bias of result
695                          */
696                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
697                         Dbl_copytoptr(resultp1,resultp2,dstptr);
698                         if (inexact)
699                             if (Is_inexacttrap_enabled())
700                                 return (OPC_2E_UNDERFLOWEXCEPTION |
701                                         OPC_2E_INEXACTEXCEPTION);
702                             else Set_inexactflag();
703                         return(OPC_2E_UNDERFLOWEXCEPTION);
704                 }
705                 else if (inexact && is_tiny) Set_underflowflag();
706         }
707         else Dbl_set_exponent(resultp1,result_exponent);
708         Dbl_copytoptr(resultp1,resultp2,dstptr);
709         if (inexact) 
710                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
711                 else Set_inexactflag();
712         return(NOEXCEPTION);
713 }
714
715 /*
716  *  Double Floating-point Multiply Negate Fused Add
717  */
718
719 dbl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
720
721 dbl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
722 unsigned int *status;
723 {
724         unsigned int opnd1p1, opnd1p2, opnd2p1, opnd2p2, opnd3p1, opnd3p2;
725         register unsigned int tmpresp1, tmpresp2, tmpresp3, tmpresp4;
726         unsigned int rightp1, rightp2, rightp3, rightp4;
727         unsigned int resultp1, resultp2 = 0, resultp3 = 0, resultp4 = 0;
728         register int mpy_exponent, add_exponent, count;
729         boolean inexact = FALSE, is_tiny = FALSE;
730
731         unsigned int signlessleft1, signlessright1, save;
732         register int result_exponent, diff_exponent;
733         int sign_save, jumpsize;
734         
735         Dbl_copyfromptr(src1ptr,opnd1p1,opnd1p2);
736         Dbl_copyfromptr(src2ptr,opnd2p1,opnd2p2);
737         Dbl_copyfromptr(src3ptr,opnd3p1,opnd3p2);
738
739         /* 
740          * set sign bit of result of multiply
741          */
742         if (Dbl_sign(opnd1p1) ^ Dbl_sign(opnd2p1)) 
743                 Dbl_setzerop1(resultp1);
744         else
745                 Dbl_setnegativezerop1(resultp1); 
746
747         /*
748          * Generate multiply exponent 
749          */
750         mpy_exponent = Dbl_exponent(opnd1p1) + Dbl_exponent(opnd2p1) - DBL_BIAS;
751
752         /*
753          * check first operand for NaN's or infinity
754          */
755         if (Dbl_isinfinity_exponent(opnd1p1)) {
756                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
757                         if (Dbl_isnotnan(opnd2p1,opnd2p2) &&
758                             Dbl_isnotnan(opnd3p1,opnd3p2)) {
759                                 if (Dbl_iszero_exponentmantissa(opnd2p1,opnd2p2)) {
760                                         /* 
761                                          * invalid since operands are infinity 
762                                          * and zero 
763                                          */
764                                         if (Is_invalidtrap_enabled())
765                                                 return(OPC_2E_INVALIDEXCEPTION);
766                                         Set_invalidflag();
767                                         Dbl_makequietnan(resultp1,resultp2);
768                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
769                                         return(NOEXCEPTION);
770                                 }
771                                 /*
772                                  * Check third operand for infinity with a
773                                  *  sign opposite of the multiply result
774                                  */
775                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
776                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
777                                         /* 
778                                          * invalid since attempting a magnitude
779                                          * subtraction of infinities
780                                          */
781                                         if (Is_invalidtrap_enabled())
782                                                 return(OPC_2E_INVALIDEXCEPTION);
783                                         Set_invalidflag();
784                                         Dbl_makequietnan(resultp1,resultp2);
785                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
786                                         return(NOEXCEPTION);
787                                 }
788
789                                 /*
790                                  * return infinity
791                                  */
792                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
793                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
794                                 return(NOEXCEPTION);
795                         }
796                 }
797                 else {
798                         /*
799                          * is NaN; signaling or quiet?
800                          */
801                         if (Dbl_isone_signaling(opnd1p1)) {
802                                 /* trap if INVALIDTRAP enabled */
803                                 if (Is_invalidtrap_enabled()) 
804                                         return(OPC_2E_INVALIDEXCEPTION);
805                                 /* make NaN quiet */
806                                 Set_invalidflag();
807                                 Dbl_set_quiet(opnd1p1);
808                         }
809                         /* 
810                          * is second operand a signaling NaN? 
811                          */
812                         else if (Dbl_is_signalingnan(opnd2p1)) {
813                                 /* trap if INVALIDTRAP enabled */
814                                 if (Is_invalidtrap_enabled())
815                                         return(OPC_2E_INVALIDEXCEPTION);
816                                 /* make NaN quiet */
817                                 Set_invalidflag();
818                                 Dbl_set_quiet(opnd2p1);
819                                 Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
820                                 return(NOEXCEPTION);
821                         }
822                         /* 
823                          * is third operand a signaling NaN? 
824                          */
825                         else if (Dbl_is_signalingnan(opnd3p1)) {
826                                 /* trap if INVALIDTRAP enabled */
827                                 if (Is_invalidtrap_enabled())
828                                         return(OPC_2E_INVALIDEXCEPTION);
829                                 /* make NaN quiet */
830                                 Set_invalidflag();
831                                 Dbl_set_quiet(opnd3p1);
832                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
833                                 return(NOEXCEPTION);
834                         }
835                         /*
836                          * return quiet NaN
837                          */
838                         Dbl_copytoptr(opnd1p1,opnd1p2,dstptr);
839                         return(NOEXCEPTION);
840                 }
841         }
842
843         /*
844          * check second operand for NaN's or infinity
845          */
846         if (Dbl_isinfinity_exponent(opnd2p1)) {
847                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
848                         if (Dbl_isnotnan(opnd3p1,opnd3p2)) {
849                                 if (Dbl_iszero_exponentmantissa(opnd1p1,opnd1p2)) {
850                                         /* 
851                                          * invalid since multiply operands are
852                                          * zero & infinity
853                                          */
854                                         if (Is_invalidtrap_enabled())
855                                                 return(OPC_2E_INVALIDEXCEPTION);
856                                         Set_invalidflag();
857                                         Dbl_makequietnan(opnd2p1,opnd2p2);
858                                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
859                                         return(NOEXCEPTION);
860                                 }
861
862                                 /*
863                                  * Check third operand for infinity with a
864                                  *  sign opposite of the multiply result
865                                  */
866                                 if (Dbl_isinfinity(opnd3p1,opnd3p2) &&
867                                     (Dbl_sign(resultp1) ^ Dbl_sign(opnd3p1))) {
868                                         /* 
869                                          * invalid since attempting a magnitude
870                                          * subtraction of infinities
871                                          */
872                                         if (Is_invalidtrap_enabled())
873                                                 return(OPC_2E_INVALIDEXCEPTION);
874                                         Set_invalidflag();
875                                         Dbl_makequietnan(resultp1,resultp2);
876                                         Dbl_copytoptr(resultp1,resultp2,dstptr);
877                                         return(NOEXCEPTION);
878                                 }
879
880                                 /*
881                                  * return infinity
882                                  */
883                                 Dbl_setinfinity_exponentmantissa(resultp1,resultp2);
884                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
885                                 return(NOEXCEPTION);
886                         }
887                 }
888                 else {
889                         /*
890                          * is NaN; signaling or quiet?
891                          */
892                         if (Dbl_isone_signaling(opnd2p1)) {
893                                 /* trap if INVALIDTRAP enabled */
894                                 if (Is_invalidtrap_enabled())
895                                         return(OPC_2E_INVALIDEXCEPTION);
896                                 /* make NaN quiet */
897                                 Set_invalidflag();
898                                 Dbl_set_quiet(opnd2p1);
899                         }
900                         /* 
901                          * is third operand a signaling NaN? 
902                          */
903                         else if (Dbl_is_signalingnan(opnd3p1)) {
904                                 /* trap if INVALIDTRAP enabled */
905                                 if (Is_invalidtrap_enabled())
906                                                 return(OPC_2E_INVALIDEXCEPTION);
907                                 /* make NaN quiet */
908                                 Set_invalidflag();
909                                 Dbl_set_quiet(opnd3p1);
910                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
911                                 return(NOEXCEPTION);
912                         }
913                         /*
914                          * return quiet NaN
915                          */
916                         Dbl_copytoptr(opnd2p1,opnd2p2,dstptr);
917                         return(NOEXCEPTION);
918                 }
919         }
920
921         /*
922          * check third operand for NaN's or infinity
923          */
924         if (Dbl_isinfinity_exponent(opnd3p1)) {
925                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
926                         /* return infinity */
927                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
928                         return(NOEXCEPTION);
929                 } else {
930                         /*
931                          * is NaN; signaling or quiet?
932                          */
933                         if (Dbl_isone_signaling(opnd3p1)) {
934                                 /* trap if INVALIDTRAP enabled */
935                                 if (Is_invalidtrap_enabled())
936                                         return(OPC_2E_INVALIDEXCEPTION);
937                                 /* make NaN quiet */
938                                 Set_invalidflag();
939                                 Dbl_set_quiet(opnd3p1);
940                         }
941                         /*
942                          * return quiet NaN
943                          */
944                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
945                         return(NOEXCEPTION);
946                 }
947         }
948
949         /*
950          * Generate multiply mantissa
951          */
952         if (Dbl_isnotzero_exponent(opnd1p1)) {
953                 /* set hidden bit */
954                 Dbl_clear_signexponent_set_hidden(opnd1p1);
955         }
956         else {
957                 /* check for zero */
958                 if (Dbl_iszero_mantissa(opnd1p1,opnd1p2)) {
959                         /*
960                          * Perform the add opnd3 with zero here.
961                          */
962                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
963                                 if (Is_rounding_mode(ROUNDMINUS)) {
964                                         Dbl_or_signs(opnd3p1,resultp1);
965                                 } else {
966                                         Dbl_and_signs(opnd3p1,resultp1);
967                                 }
968                         }
969                         /*
970                          * Now let's check for trapped underflow case.
971                          */
972                         else if (Dbl_iszero_exponent(opnd3p1) &&
973                                  Is_underflowtrap_enabled()) {
974                                 /* need to normalize results mantissa */
975                                 sign_save = Dbl_signextendedsign(opnd3p1);
976                                 result_exponent = 0;
977                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
978                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
979                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
980                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
981                                                         unfl);
982                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
983                                 /* inexact = FALSE */
984                                 return(OPC_2E_UNDERFLOWEXCEPTION);
985                         }
986                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
987                         return(NOEXCEPTION);
988                 }
989                 /* is denormalized, adjust exponent */
990                 Dbl_clear_signexponent(opnd1p1);
991                 Dbl_leftshiftby1(opnd1p1,opnd1p2);
992                 Dbl_normalize(opnd1p1,opnd1p2,mpy_exponent);
993         }
994         /* opnd2 needs to have hidden bit set with msb in hidden bit */
995         if (Dbl_isnotzero_exponent(opnd2p1)) {
996                 Dbl_clear_signexponent_set_hidden(opnd2p1);
997         }
998         else {
999                 /* check for zero */
1000                 if (Dbl_iszero_mantissa(opnd2p1,opnd2p2)) {
1001                         /*
1002                          * Perform the add opnd3 with zero here.
1003                          */
1004                         if (Dbl_iszero_exponentmantissa(opnd3p1,opnd3p2)) {
1005                                 if (Is_rounding_mode(ROUNDMINUS)) {
1006                                         Dbl_or_signs(opnd3p1,resultp1);
1007                                 } else {
1008                                         Dbl_and_signs(opnd3p1,resultp1);
1009                                 }
1010                         }
1011                         /*
1012                          * Now let's check for trapped underflow case.
1013                          */
1014                         else if (Dbl_iszero_exponent(opnd3p1) &&
1015                             Is_underflowtrap_enabled()) {
1016                                 /* need to normalize results mantissa */
1017                                 sign_save = Dbl_signextendedsign(opnd3p1);
1018                                 result_exponent = 0;
1019                                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1020                                 Dbl_normalize(opnd3p1,opnd3p2,result_exponent);
1021                                 Dbl_set_sign(opnd3p1,/*using*/sign_save);
1022                                 Dbl_setwrapped_exponent(opnd3p1,result_exponent,
1023                                                         unfl);
1024                                 Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1025                                 /* inexact = FALSE */
1026                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1027                         }
1028                         Dbl_copytoptr(opnd3p1,opnd3p2,dstptr);
1029                         return(NOEXCEPTION);
1030                 }
1031                 /* is denormalized; want to normalize */
1032                 Dbl_clear_signexponent(opnd2p1);
1033                 Dbl_leftshiftby1(opnd2p1,opnd2p2);
1034                 Dbl_normalize(opnd2p1,opnd2p2,mpy_exponent);
1035         }
1036
1037         /* Multiply the first two source mantissas together */
1038
1039         /* 
1040          * The intermediate result will be kept in tmpres,
1041          * which needs enough room for 106 bits of mantissa,
1042          * so lets call it a Double extended.
1043          */
1044         Dblext_setzero(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1045
1046         /* 
1047          * Four bits at a time are inspected in each loop, and a 
1048          * simple shift and add multiply algorithm is used. 
1049          */ 
1050         for (count = DBL_P-1; count >= 0; count -= 4) {
1051                 Dblext_rightshiftby4(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1052                 if (Dbit28p2(opnd1p2)) {
1053                         /* Fourword_add should be an ADD followed by 3 ADDC's */
1054                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4, 
1055                          opnd2p1<<3 | opnd2p2>>29, opnd2p2<<3, 0, 0);
1056                 }
1057                 if (Dbit29p2(opnd1p2)) {
1058                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1059                          opnd2p1<<2 | opnd2p2>>30, opnd2p2<<2, 0, 0);
1060                 }
1061                 if (Dbit30p2(opnd1p2)) {
1062                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1063                          opnd2p1<<1 | opnd2p2>>31, opnd2p2<<1, 0, 0);
1064                 }
1065                 if (Dbit31p2(opnd1p2)) {
1066                         Fourword_add(tmpresp1, tmpresp2, tmpresp3, tmpresp4,
1067                          opnd2p1, opnd2p2, 0, 0);
1068                 }
1069                 Dbl_rightshiftby4(opnd1p1,opnd1p2);
1070         }
1071         if (Is_dexthiddenoverflow(tmpresp1)) {
1072                 /* result mantissa >= 2 (mantissa overflow) */
1073                 mpy_exponent++;
1074                 Dblext_rightshiftby1(tmpresp1,tmpresp2,tmpresp3,tmpresp4);
1075         }
1076
1077         /*
1078          * Restore the sign of the mpy result which was saved in resultp1.
1079          * The exponent will continue to be kept in mpy_exponent.
1080          */
1081         Dblext_set_sign(tmpresp1,Dbl_sign(resultp1));
1082
1083         /* 
1084          * No rounding is required, since the result of the multiply
1085          * is exact in the extended format.
1086          */
1087
1088         /*
1089          * Now we are ready to perform the add portion of the operation.
1090          *
1091          * The exponents need to be kept as integers for now, since the
1092          * multiply result might not fit into the exponent field.  We
1093          * can't overflow or underflow because of this yet, since the
1094          * add could bring the final result back into range.
1095          */
1096         add_exponent = Dbl_exponent(opnd3p1);
1097
1098         /*
1099          * Check for denormalized or zero add operand.
1100          */
1101         if (add_exponent == 0) {
1102                 /* check for zero */
1103                 if (Dbl_iszero_mantissa(opnd3p1,opnd3p2)) {
1104                         /* right is zero */
1105                         /* Left can't be zero and must be result.
1106                          *
1107                          * The final result is now in tmpres and mpy_exponent,
1108                          * and needs to be rounded and squeezed back into
1109                          * double precision format from double extended.
1110                          */
1111                         result_exponent = mpy_exponent;
1112                         Dblext_copy(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1113                                 resultp1,resultp2,resultp3,resultp4);
1114                         sign_save = Dbl_signextendedsign(resultp1);/*save sign*/
1115                         goto round;
1116                 }
1117
1118                 /* 
1119                  * Neither are zeroes.  
1120                  * Adjust exponent and normalize add operand.
1121                  */
1122                 sign_save = Dbl_signextendedsign(opnd3p1);      /* save sign */
1123                 Dbl_clear_signexponent(opnd3p1);
1124                 Dbl_leftshiftby1(opnd3p1,opnd3p2);
1125                 Dbl_normalize(opnd3p1,opnd3p2,add_exponent);
1126                 Dbl_set_sign(opnd3p1,sign_save);        /* restore sign */
1127         } else {
1128                 Dbl_clear_exponent_set_hidden(opnd3p1);
1129         }
1130         /*
1131          * Copy opnd3 to the double extended variable called right.
1132          */
1133         Dbl_copyto_dblext(opnd3p1,opnd3p2,rightp1,rightp2,rightp3,rightp4);
1134
1135         /*
1136          * A zero "save" helps discover equal operands (for later),
1137          * and is used in swapping operands (if needed).
1138          */
1139         Dblext_xortointp1(tmpresp1,rightp1,/*to*/save);
1140
1141         /*
1142          * Compare magnitude of operands.
1143          */
1144         Dblext_copytoint_exponentmantissap1(tmpresp1,signlessleft1);
1145         Dblext_copytoint_exponentmantissap1(rightp1,signlessright1);
1146         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1147             Dblext_ismagnitudeless(tmpresp2,rightp2,signlessleft1,signlessright1)){
1148                 /*
1149                  * Set the left operand to the larger one by XOR swap.
1150                  * First finish the first word "save".
1151                  */
1152                 Dblext_xorfromintp1(save,rightp1,/*to*/rightp1);
1153                 Dblext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1154                 Dblext_swap_lower(tmpresp2,tmpresp3,tmpresp4,
1155                         rightp2,rightp3,rightp4);
1156                 /* also setup exponents used in rest of routine */
1157                 diff_exponent = add_exponent - mpy_exponent;
1158                 result_exponent = add_exponent;
1159         } else {
1160                 /* also setup exponents used in rest of routine */
1161                 diff_exponent = mpy_exponent - add_exponent;
1162                 result_exponent = mpy_exponent;
1163         }
1164         /* Invariant: left is not smaller than right. */
1165
1166         /*
1167          * Special case alignment of operands that would force alignment
1168          * beyond the extent of the extension.  A further optimization
1169          * could special case this but only reduces the path length for
1170          * this infrequent case.
1171          */
1172         if (diff_exponent > DBLEXT_THRESHOLD) {
1173                 diff_exponent = DBLEXT_THRESHOLD;
1174         }
1175
1176         /* Align right operand by shifting it to the right */
1177         Dblext_clear_sign(rightp1);
1178         Dblext_right_align(rightp1,rightp2,rightp3,rightp4,
1179                 /*shifted by*/diff_exponent);
1180         
1181         /* Treat sum and difference of the operands separately. */
1182         if ((int)save < 0) {
1183                 /*
1184                  * Difference of the two operands.  Overflow can occur if the
1185                  * multiply overflowed.  A borrow can occur out of the hidden
1186                  * bit and force a post normalization phase.
1187                  */
1188                 Dblext_subtract(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1189                         rightp1,rightp2,rightp3,rightp4,
1190                         resultp1,resultp2,resultp3,resultp4);
1191                 sign_save = Dbl_signextendedsign(resultp1);
1192                 if (Dbl_iszero_hidden(resultp1)) {
1193                         /* Handle normalization */
1194                 /* A straight foward algorithm would now shift the
1195                  * result and extension left until the hidden bit
1196                  * becomes one.  Not all of the extension bits need
1197                  * participate in the shift.  Only the two most 
1198                  * significant bits (round and guard) are needed.
1199                  * If only a single shift is needed then the guard
1200                  * bit becomes a significant low order bit and the
1201                  * extension must participate in the rounding.
1202                  * If more than a single shift is needed, then all
1203                  * bits to the right of the guard bit are zeros, 
1204                  * and the guard bit may or may not be zero. */
1205                         Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1206                                 resultp4);
1207
1208                         /* Need to check for a zero result.  The sign and
1209                          * exponent fields have already been zeroed.  The more
1210                          * efficient test of the full object can be used.
1211                          */
1212                          if (Dblext_iszero(resultp1,resultp2,resultp3,resultp4)) {
1213                                 /* Must have been "x-x" or "x+(-x)". */
1214                                 if (Is_rounding_mode(ROUNDMINUS))
1215                                         Dbl_setone_sign(resultp1);
1216                                 Dbl_copytoptr(resultp1,resultp2,dstptr);
1217                                 return(NOEXCEPTION);
1218                         }
1219                         result_exponent--;
1220
1221                         /* Look to see if normalization is finished. */
1222                         if (Dbl_isone_hidden(resultp1)) {
1223                                 /* No further normalization is needed */
1224                                 goto round;
1225                         }
1226
1227                         /* Discover first one bit to determine shift amount.
1228                          * Use a modified binary search.  We have already
1229                          * shifted the result one position right and still
1230                          * not found a one so the remainder of the extension
1231                          * must be zero and simplifies rounding. */
1232                         /* Scan bytes */
1233                         while (Dbl_iszero_hiddenhigh7mantissa(resultp1)) {
1234                                 Dblext_leftshiftby8(resultp1,resultp2,resultp3,resultp4);
1235                                 result_exponent -= 8;
1236                         }
1237                         /* Now narrow it down to the nibble */
1238                         if (Dbl_iszero_hiddenhigh3mantissa(resultp1)) {
1239                                 /* The lower nibble contains the
1240                                  * normalizing one */
1241                                 Dblext_leftshiftby4(resultp1,resultp2,resultp3,resultp4);
1242                                 result_exponent -= 4;
1243                         }
1244                         /* Select case where first bit is set (already
1245                          * normalized) otherwise select the proper shift. */
1246                         jumpsize = Dbl_hiddenhigh3mantissa(resultp1);
1247                         if (jumpsize <= 7) switch(jumpsize) {
1248                         case 1:
1249                                 Dblext_leftshiftby3(resultp1,resultp2,resultp3,
1250                                         resultp4);
1251                                 result_exponent -= 3;
1252                                 break;
1253                         case 2:
1254                         case 3:
1255                                 Dblext_leftshiftby2(resultp1,resultp2,resultp3,
1256                                         resultp4);
1257                                 result_exponent -= 2;
1258                                 break;
1259                         case 4:
1260                         case 5:
1261                         case 6:
1262                         case 7:
1263                                 Dblext_leftshiftby1(resultp1,resultp2,resultp3,
1264                                         resultp4);
1265                                 result_exponent -= 1;
1266                                 break;
1267                         }
1268                 } /* end if (hidden...)... */
1269         /* Fall through and round */
1270         } /* end if (save < 0)... */
1271         else {
1272                 /* Add magnitudes */
1273                 Dblext_addition(tmpresp1,tmpresp2,tmpresp3,tmpresp4,
1274                         rightp1,rightp2,rightp3,rightp4,
1275                         /*to*/resultp1,resultp2,resultp3,resultp4);
1276                 sign_save = Dbl_signextendedsign(resultp1);
1277                 if (Dbl_isone_hiddenoverflow(resultp1)) {
1278                         /* Prenormalization required. */
1279                         Dblext_arithrightshiftby1(resultp1,resultp2,resultp3,
1280                                 resultp4);
1281                         result_exponent++;
1282                 } /* end if hiddenoverflow... */
1283         } /* end else ...add magnitudes... */
1284
1285         /* Round the result.  If the extension and lower two words are
1286          * all zeros, then the result is exact.  Otherwise round in the
1287          * correct direction.  Underflow is possible. If a postnormalization
1288          * is necessary, then the mantissa is all zeros so no shift is needed.
1289          */
1290   round:
1291         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1292                 Dblext_denormalize(resultp1,resultp2,resultp3,resultp4,
1293                         result_exponent,is_tiny);
1294         }
1295         Dbl_set_sign(resultp1,/*using*/sign_save);
1296         if (Dblext_isnotzero_mantissap3(resultp3) || 
1297             Dblext_isnotzero_mantissap4(resultp4)) {
1298                 inexact = TRUE;
1299                 switch(Rounding_mode()) {
1300                 case ROUNDNEAREST: /* The default. */
1301                         if (Dblext_isone_highp3(resultp3)) {
1302                                 /* at least 1/2 ulp */
1303                                 if (Dblext_isnotzero_low31p3(resultp3) ||
1304                                     Dblext_isnotzero_mantissap4(resultp4) ||
1305                                     Dblext_isone_lowp2(resultp2)) {
1306                                         /* either exactly half way and odd or
1307                                          * more than 1/2ulp */
1308                                         Dbl_increment(resultp1,resultp2);
1309                                 }
1310                         }
1311                         break;
1312
1313                 case ROUNDPLUS:
1314                         if (Dbl_iszero_sign(resultp1)) {
1315                                 /* Round up positive results */
1316                                 Dbl_increment(resultp1,resultp2);
1317                         }
1318                         break;
1319             
1320                 case ROUNDMINUS:
1321                         if (Dbl_isone_sign(resultp1)) {
1322                                 /* Round down negative results */
1323                                 Dbl_increment(resultp1,resultp2);
1324                         }
1325             
1326                 case ROUNDZERO:;
1327                         /* truncate is simple */
1328                 } /* end switch... */
1329                 if (Dbl_isone_hiddenoverflow(resultp1)) result_exponent++;
1330         }
1331         if (result_exponent >= DBL_INFINITY_EXPONENT) {
1332                 /* Overflow */
1333                 if (Is_overflowtrap_enabled()) {
1334                         /*
1335                          * Adjust bias of result
1336                          */
1337                         Dbl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1338                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1339                         if (inexact)
1340                             if (Is_inexacttrap_enabled())
1341                                 return (OPC_2E_OVERFLOWEXCEPTION |
1342                                         OPC_2E_INEXACTEXCEPTION);
1343                             else Set_inexactflag();
1344                         return (OPC_2E_OVERFLOWEXCEPTION);
1345                 }
1346                 inexact = TRUE;
1347                 Set_overflowflag();
1348                 Dbl_setoverflow(resultp1,resultp2);
1349         } else if (result_exponent <= 0) {      /* underflow case */
1350                 if (Is_underflowtrap_enabled()) {
1351                         /*
1352                          * Adjust bias of result
1353                          */
1354                         Dbl_setwrapped_exponent(resultp1,result_exponent,unfl);
1355                         Dbl_copytoptr(resultp1,resultp2,dstptr);
1356                         if (inexact)
1357                             if (Is_inexacttrap_enabled())
1358                                 return (OPC_2E_UNDERFLOWEXCEPTION |
1359                                         OPC_2E_INEXACTEXCEPTION);
1360                             else Set_inexactflag();
1361                         return(OPC_2E_UNDERFLOWEXCEPTION);
1362                 }
1363                 else if (inexact && is_tiny) Set_underflowflag();
1364         }
1365         else Dbl_set_exponent(resultp1,result_exponent);
1366         Dbl_copytoptr(resultp1,resultp2,dstptr);
1367         if (inexact) 
1368                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
1369                 else Set_inexactflag();
1370         return(NOEXCEPTION);
1371 }
1372
1373 /*
1374  *  Single Floating-point Multiply Fused Add
1375  */
1376
1377 sgl_fmpyfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
1378
1379 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
1380 unsigned int *status;
1381 {
1382         unsigned int opnd1, opnd2, opnd3;
1383         register unsigned int tmpresp1, tmpresp2;
1384         unsigned int rightp1, rightp2;
1385         unsigned int resultp1, resultp2 = 0;
1386         register int mpy_exponent, add_exponent, count;
1387         boolean inexact = FALSE, is_tiny = FALSE;
1388
1389         unsigned int signlessleft1, signlessright1, save;
1390         register int result_exponent, diff_exponent;
1391         int sign_save, jumpsize;
1392         
1393         Sgl_copyfromptr(src1ptr,opnd1);
1394         Sgl_copyfromptr(src2ptr,opnd2);
1395         Sgl_copyfromptr(src3ptr,opnd3);
1396
1397         /* 
1398          * set sign bit of result of multiply
1399          */
1400         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
1401                 Sgl_setnegativezero(resultp1); 
1402         else Sgl_setzero(resultp1);
1403
1404         /*
1405          * Generate multiply exponent 
1406          */
1407         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
1408
1409         /*
1410          * check first operand for NaN's or infinity
1411          */
1412         if (Sgl_isinfinity_exponent(opnd1)) {
1413                 if (Sgl_iszero_mantissa(opnd1)) {
1414                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
1415                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
1416                                         /* 
1417                                          * invalid since operands are infinity 
1418                                          * and zero 
1419                                          */
1420                                         if (Is_invalidtrap_enabled())
1421                                                 return(OPC_2E_INVALIDEXCEPTION);
1422                                         Set_invalidflag();
1423                                         Sgl_makequietnan(resultp1);
1424                                         Sgl_copytoptr(resultp1,dstptr);
1425                                         return(NOEXCEPTION);
1426                                 }
1427                                 /*
1428                                  * Check third operand for infinity with a
1429                                  *  sign opposite of the multiply result
1430                                  */
1431                                 if (Sgl_isinfinity(opnd3) &&
1432                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1433                                         /* 
1434                                          * invalid since attempting a magnitude
1435                                          * subtraction of infinities
1436                                          */
1437                                         if (Is_invalidtrap_enabled())
1438                                                 return(OPC_2E_INVALIDEXCEPTION);
1439                                         Set_invalidflag();
1440                                         Sgl_makequietnan(resultp1);
1441                                         Sgl_copytoptr(resultp1,dstptr);
1442                                         return(NOEXCEPTION);
1443                                 }
1444
1445                                 /*
1446                                  * return infinity
1447                                  */
1448                                 Sgl_setinfinity_exponentmantissa(resultp1);
1449                                 Sgl_copytoptr(resultp1,dstptr);
1450                                 return(NOEXCEPTION);
1451                         }
1452                 }
1453                 else {
1454                         /*
1455                          * is NaN; signaling or quiet?
1456                          */
1457                         if (Sgl_isone_signaling(opnd1)) {
1458                                 /* trap if INVALIDTRAP enabled */
1459                                 if (Is_invalidtrap_enabled()) 
1460                                         return(OPC_2E_INVALIDEXCEPTION);
1461                                 /* make NaN quiet */
1462                                 Set_invalidflag();
1463                                 Sgl_set_quiet(opnd1);
1464                         }
1465                         /* 
1466                          * is second operand a signaling NaN? 
1467                          */
1468                         else if (Sgl_is_signalingnan(opnd2)) {
1469                                 /* trap if INVALIDTRAP enabled */
1470                                 if (Is_invalidtrap_enabled())
1471                                         return(OPC_2E_INVALIDEXCEPTION);
1472                                 /* make NaN quiet */
1473                                 Set_invalidflag();
1474                                 Sgl_set_quiet(opnd2);
1475                                 Sgl_copytoptr(opnd2,dstptr);
1476                                 return(NOEXCEPTION);
1477                         }
1478                         /* 
1479                          * is third operand a signaling NaN? 
1480                          */
1481                         else if (Sgl_is_signalingnan(opnd3)) {
1482                                 /* trap if INVALIDTRAP enabled */
1483                                 if (Is_invalidtrap_enabled())
1484                                         return(OPC_2E_INVALIDEXCEPTION);
1485                                 /* make NaN quiet */
1486                                 Set_invalidflag();
1487                                 Sgl_set_quiet(opnd3);
1488                                 Sgl_copytoptr(opnd3,dstptr);
1489                                 return(NOEXCEPTION);
1490                         }
1491                         /*
1492                          * return quiet NaN
1493                          */
1494                         Sgl_copytoptr(opnd1,dstptr);
1495                         return(NOEXCEPTION);
1496                 }
1497         }
1498
1499         /*
1500          * check second operand for NaN's or infinity
1501          */
1502         if (Sgl_isinfinity_exponent(opnd2)) {
1503                 if (Sgl_iszero_mantissa(opnd2)) {
1504                         if (Sgl_isnotnan(opnd3)) {
1505                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
1506                                         /* 
1507                                          * invalid since multiply operands are
1508                                          * zero & infinity
1509                                          */
1510                                         if (Is_invalidtrap_enabled())
1511                                                 return(OPC_2E_INVALIDEXCEPTION);
1512                                         Set_invalidflag();
1513                                         Sgl_makequietnan(opnd2);
1514                                         Sgl_copytoptr(opnd2,dstptr);
1515                                         return(NOEXCEPTION);
1516                                 }
1517
1518                                 /*
1519                                  * Check third operand for infinity with a
1520                                  *  sign opposite of the multiply result
1521                                  */
1522                                 if (Sgl_isinfinity(opnd3) &&
1523                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
1524                                         /* 
1525                                          * invalid since attempting a magnitude
1526                                          * subtraction of infinities
1527                                          */
1528                                         if (Is_invalidtrap_enabled())
1529                                                 return(OPC_2E_INVALIDEXCEPTION);
1530                                         Set_invalidflag();
1531                                         Sgl_makequietnan(resultp1);
1532                                         Sgl_copytoptr(resultp1,dstptr);
1533                                         return(NOEXCEPTION);
1534                                 }
1535
1536                                 /*
1537                                  * return infinity
1538                                  */
1539                                 Sgl_setinfinity_exponentmantissa(resultp1);
1540                                 Sgl_copytoptr(resultp1,dstptr);
1541                                 return(NOEXCEPTION);
1542                         }
1543                 }
1544                 else {
1545                         /*
1546                          * is NaN; signaling or quiet?
1547                          */
1548                         if (Sgl_isone_signaling(opnd2)) {
1549                                 /* trap if INVALIDTRAP enabled */
1550                                 if (Is_invalidtrap_enabled())
1551                                         return(OPC_2E_INVALIDEXCEPTION);
1552                                 /* make NaN quiet */
1553                                 Set_invalidflag();
1554                                 Sgl_set_quiet(opnd2);
1555                         }
1556                         /* 
1557                          * is third operand a signaling NaN? 
1558                          */
1559                         else if (Sgl_is_signalingnan(opnd3)) {
1560                                 /* trap if INVALIDTRAP enabled */
1561                                 if (Is_invalidtrap_enabled())
1562                                                 return(OPC_2E_INVALIDEXCEPTION);
1563                                 /* make NaN quiet */
1564                                 Set_invalidflag();
1565                                 Sgl_set_quiet(opnd3);
1566                                 Sgl_copytoptr(opnd3,dstptr);
1567                                 return(NOEXCEPTION);
1568                         }
1569                         /*
1570                          * return quiet NaN
1571                          */
1572                         Sgl_copytoptr(opnd2,dstptr);
1573                         return(NOEXCEPTION);
1574                 }
1575         }
1576
1577         /*
1578          * check third operand for NaN's or infinity
1579          */
1580         if (Sgl_isinfinity_exponent(opnd3)) {
1581                 if (Sgl_iszero_mantissa(opnd3)) {
1582                         /* return infinity */
1583                         Sgl_copytoptr(opnd3,dstptr);
1584                         return(NOEXCEPTION);
1585                 } else {
1586                         /*
1587                          * is NaN; signaling or quiet?
1588                          */
1589                         if (Sgl_isone_signaling(opnd3)) {
1590                                 /* trap if INVALIDTRAP enabled */
1591                                 if (Is_invalidtrap_enabled())
1592                                         return(OPC_2E_INVALIDEXCEPTION);
1593                                 /* make NaN quiet */
1594                                 Set_invalidflag();
1595                                 Sgl_set_quiet(opnd3);
1596                         }
1597                         /*
1598                          * return quiet NaN
1599                          */
1600                         Sgl_copytoptr(opnd3,dstptr);
1601                         return(NOEXCEPTION);
1602                 }
1603         }
1604
1605         /*
1606          * Generate multiply mantissa
1607          */
1608         if (Sgl_isnotzero_exponent(opnd1)) {
1609                 /* set hidden bit */
1610                 Sgl_clear_signexponent_set_hidden(opnd1);
1611         }
1612         else {
1613                 /* check for zero */
1614                 if (Sgl_iszero_mantissa(opnd1)) {
1615                         /*
1616                          * Perform the add opnd3 with zero here.
1617                          */
1618                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1619                                 if (Is_rounding_mode(ROUNDMINUS)) {
1620                                         Sgl_or_signs(opnd3,resultp1);
1621                                 } else {
1622                                         Sgl_and_signs(opnd3,resultp1);
1623                                 }
1624                         }
1625                         /*
1626                          * Now let's check for trapped underflow case.
1627                          */
1628                         else if (Sgl_iszero_exponent(opnd3) &&
1629                                  Is_underflowtrap_enabled()) {
1630                                 /* need to normalize results mantissa */
1631                                 sign_save = Sgl_signextendedsign(opnd3);
1632                                 result_exponent = 0;
1633                                 Sgl_leftshiftby1(opnd3);
1634                                 Sgl_normalize(opnd3,result_exponent);
1635                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1636                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1637                                                         unfl);
1638                                 Sgl_copytoptr(opnd3,dstptr);
1639                                 /* inexact = FALSE */
1640                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1641                         }
1642                         Sgl_copytoptr(opnd3,dstptr);
1643                         return(NOEXCEPTION);
1644                 }
1645                 /* is denormalized, adjust exponent */
1646                 Sgl_clear_signexponent(opnd1);
1647                 Sgl_leftshiftby1(opnd1);
1648                 Sgl_normalize(opnd1,mpy_exponent);
1649         }
1650         /* opnd2 needs to have hidden bit set with msb in hidden bit */
1651         if (Sgl_isnotzero_exponent(opnd2)) {
1652                 Sgl_clear_signexponent_set_hidden(opnd2);
1653         }
1654         else {
1655                 /* check for zero */
1656                 if (Sgl_iszero_mantissa(opnd2)) {
1657                         /*
1658                          * Perform the add opnd3 with zero here.
1659                          */
1660                         if (Sgl_iszero_exponentmantissa(opnd3)) {
1661                                 if (Is_rounding_mode(ROUNDMINUS)) {
1662                                         Sgl_or_signs(opnd3,resultp1);
1663                                 } else {
1664                                         Sgl_and_signs(opnd3,resultp1);
1665                                 }
1666                         }
1667                         /*
1668                          * Now let's check for trapped underflow case.
1669                          */
1670                         else if (Sgl_iszero_exponent(opnd3) &&
1671                             Is_underflowtrap_enabled()) {
1672                                 /* need to normalize results mantissa */
1673                                 sign_save = Sgl_signextendedsign(opnd3);
1674                                 result_exponent = 0;
1675                                 Sgl_leftshiftby1(opnd3);
1676                                 Sgl_normalize(opnd3,result_exponent);
1677                                 Sgl_set_sign(opnd3,/*using*/sign_save);
1678                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
1679                                                         unfl);
1680                                 Sgl_copytoptr(opnd3,dstptr);
1681                                 /* inexact = FALSE */
1682                                 return(OPC_2E_UNDERFLOWEXCEPTION);
1683                         }
1684                         Sgl_copytoptr(opnd3,dstptr);
1685                         return(NOEXCEPTION);
1686                 }
1687                 /* is denormalized; want to normalize */
1688                 Sgl_clear_signexponent(opnd2);
1689                 Sgl_leftshiftby1(opnd2);
1690                 Sgl_normalize(opnd2,mpy_exponent);
1691         }
1692
1693         /* Multiply the first two source mantissas together */
1694
1695         /* 
1696          * The intermediate result will be kept in tmpres,
1697          * which needs enough room for 106 bits of mantissa,
1698          * so lets call it a Double extended.
1699          */
1700         Sglext_setzero(tmpresp1,tmpresp2);
1701
1702         /* 
1703          * Four bits at a time are inspected in each loop, and a 
1704          * simple shift and add multiply algorithm is used. 
1705          */ 
1706         for (count = SGL_P-1; count >= 0; count -= 4) {
1707                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1708                 if (Sbit28(opnd1)) {
1709                         /* Twoword_add should be an ADD followed by 2 ADDC's */
1710                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
1711                 }
1712                 if (Sbit29(opnd1)) {
1713                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
1714                 }
1715                 if (Sbit30(opnd1)) {
1716                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
1717                 }
1718                 if (Sbit31(opnd1)) {
1719                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
1720                 }
1721                 Sgl_rightshiftby4(opnd1);
1722         }
1723         if (Is_sexthiddenoverflow(tmpresp1)) {
1724                 /* result mantissa >= 2 (mantissa overflow) */
1725                 mpy_exponent++;
1726                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
1727         } else {
1728                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
1729         }
1730
1731         /*
1732          * Restore the sign of the mpy result which was saved in resultp1.
1733          * The exponent will continue to be kept in mpy_exponent.
1734          */
1735         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
1736
1737         /* 
1738          * No rounding is required, since the result of the multiply
1739          * is exact in the extended format.
1740          */
1741
1742         /*
1743          * Now we are ready to perform the add portion of the operation.
1744          *
1745          * The exponents need to be kept as integers for now, since the
1746          * multiply result might not fit into the exponent field.  We
1747          * can't overflow or underflow because of this yet, since the
1748          * add could bring the final result back into range.
1749          */
1750         add_exponent = Sgl_exponent(opnd3);
1751
1752         /*
1753          * Check for denormalized or zero add operand.
1754          */
1755         if (add_exponent == 0) {
1756                 /* check for zero */
1757                 if (Sgl_iszero_mantissa(opnd3)) {
1758                         /* right is zero */
1759                         /* Left can't be zero and must be result.
1760                          *
1761                          * The final result is now in tmpres and mpy_exponent,
1762                          * and needs to be rounded and squeezed back into
1763                          * double precision format from double extended.
1764                          */
1765                         result_exponent = mpy_exponent;
1766                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
1767                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
1768                         goto round;
1769                 }
1770
1771                 /* 
1772                  * Neither are zeroes.  
1773                  * Adjust exponent and normalize add operand.
1774                  */
1775                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
1776                 Sgl_clear_signexponent(opnd3);
1777                 Sgl_leftshiftby1(opnd3);
1778                 Sgl_normalize(opnd3,add_exponent);
1779                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
1780         } else {
1781                 Sgl_clear_exponent_set_hidden(opnd3);
1782         }
1783         /*
1784          * Copy opnd3 to the double extended variable called right.
1785          */
1786         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
1787
1788         /*
1789          * A zero "save" helps discover equal operands (for later),
1790          * and is used in swapping operands (if needed).
1791          */
1792         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
1793
1794         /*
1795          * Compare magnitude of operands.
1796          */
1797         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
1798         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
1799         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
1800             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
1801                 /*
1802                  * Set the left operand to the larger one by XOR swap.
1803                  * First finish the first word "save".
1804                  */
1805                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
1806                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
1807                 Sglext_swap_lower(tmpresp2,rightp2);
1808                 /* also setup exponents used in rest of routine */
1809                 diff_exponent = add_exponent - mpy_exponent;
1810                 result_exponent = add_exponent;
1811         } else {
1812                 /* also setup exponents used in rest of routine */
1813                 diff_exponent = mpy_exponent - add_exponent;
1814                 result_exponent = mpy_exponent;
1815         }
1816         /* Invariant: left is not smaller than right. */
1817
1818         /*
1819          * Special case alignment of operands that would force alignment
1820          * beyond the extent of the extension.  A further optimization
1821          * could special case this but only reduces the path length for
1822          * this infrequent case.
1823          */
1824         if (diff_exponent > SGLEXT_THRESHOLD) {
1825                 diff_exponent = SGLEXT_THRESHOLD;
1826         }
1827
1828         /* Align right operand by shifting it to the right */
1829         Sglext_clear_sign(rightp1);
1830         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
1831         
1832         /* Treat sum and difference of the operands separately. */
1833         if ((int)save < 0) {
1834                 /*
1835                  * Difference of the two operands.  Overflow can occur if the
1836                  * multiply overflowed.  A borrow can occur out of the hidden
1837                  * bit and force a post normalization phase.
1838                  */
1839                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
1840                         resultp1,resultp2);
1841                 sign_save = Sgl_signextendedsign(resultp1);
1842                 if (Sgl_iszero_hidden(resultp1)) {
1843                         /* Handle normalization */
1844                 /* A straight foward algorithm would now shift the
1845                  * result and extension left until the hidden bit
1846                  * becomes one.  Not all of the extension bits need
1847                  * participate in the shift.  Only the two most 
1848                  * significant bits (round and guard) are needed.
1849                  * If only a single shift is needed then the guard
1850                  * bit becomes a significant low order bit and the
1851                  * extension must participate in the rounding.
1852                  * If more than a single shift is needed, then all
1853                  * bits to the right of the guard bit are zeros, 
1854                  * and the guard bit may or may not be zero. */
1855                         Sglext_leftshiftby1(resultp1,resultp2);
1856
1857                         /* Need to check for a zero result.  The sign and
1858                          * exponent fields have already been zeroed.  The more
1859                          * efficient test of the full object can be used.
1860                          */
1861                          if (Sglext_iszero(resultp1,resultp2)) {
1862                                 /* Must have been "x-x" or "x+(-x)". */
1863                                 if (Is_rounding_mode(ROUNDMINUS))
1864                                         Sgl_setone_sign(resultp1);
1865                                 Sgl_copytoptr(resultp1,dstptr);
1866                                 return(NOEXCEPTION);
1867                         }
1868                         result_exponent--;
1869
1870                         /* Look to see if normalization is finished. */
1871                         if (Sgl_isone_hidden(resultp1)) {
1872                                 /* No further normalization is needed */
1873                                 goto round;
1874                         }
1875
1876                         /* Discover first one bit to determine shift amount.
1877                          * Use a modified binary search.  We have already
1878                          * shifted the result one position right and still
1879                          * not found a one so the remainder of the extension
1880                          * must be zero and simplifies rounding. */
1881                         /* Scan bytes */
1882                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
1883                                 Sglext_leftshiftby8(resultp1,resultp2);
1884                                 result_exponent -= 8;
1885                         }
1886                         /* Now narrow it down to the nibble */
1887                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
1888                                 /* The lower nibble contains the
1889                                  * normalizing one */
1890                                 Sglext_leftshiftby4(resultp1,resultp2);
1891                                 result_exponent -= 4;
1892                         }
1893                         /* Select case where first bit is set (already
1894                          * normalized) otherwise select the proper shift. */
1895                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
1896                         if (jumpsize <= 7) switch(jumpsize) {
1897                         case 1:
1898                                 Sglext_leftshiftby3(resultp1,resultp2);
1899                                 result_exponent -= 3;
1900                                 break;
1901                         case 2:
1902                         case 3:
1903                                 Sglext_leftshiftby2(resultp1,resultp2);
1904                                 result_exponent -= 2;
1905                                 break;
1906                         case 4:
1907                         case 5:
1908                         case 6:
1909                         case 7:
1910                                 Sglext_leftshiftby1(resultp1,resultp2);
1911                                 result_exponent -= 1;
1912                                 break;
1913                         }
1914                 } /* end if (hidden...)... */
1915         /* Fall through and round */
1916         } /* end if (save < 0)... */
1917         else {
1918                 /* Add magnitudes */
1919                 Sglext_addition(tmpresp1,tmpresp2,
1920                         rightp1,rightp2, /*to*/resultp1,resultp2);
1921                 sign_save = Sgl_signextendedsign(resultp1);
1922                 if (Sgl_isone_hiddenoverflow(resultp1)) {
1923                         /* Prenormalization required. */
1924                         Sglext_arithrightshiftby1(resultp1,resultp2);
1925                         result_exponent++;
1926                 } /* end if hiddenoverflow... */
1927         } /* end else ...add magnitudes... */
1928
1929         /* Round the result.  If the extension and lower two words are
1930          * all zeros, then the result is exact.  Otherwise round in the
1931          * correct direction.  Underflow is possible. If a postnormalization
1932          * is necessary, then the mantissa is all zeros so no shift is needed.
1933          */
1934   round:
1935         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
1936                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
1937         }
1938         Sgl_set_sign(resultp1,/*using*/sign_save);
1939         if (Sglext_isnotzero_mantissap2(resultp2)) {
1940                 inexact = TRUE;
1941                 switch(Rounding_mode()) {
1942                 case ROUNDNEAREST: /* The default. */
1943                         if (Sglext_isone_highp2(resultp2)) {
1944                                 /* at least 1/2 ulp */
1945                                 if (Sglext_isnotzero_low31p2(resultp2) ||
1946                                     Sglext_isone_lowp1(resultp1)) {
1947                                         /* either exactly half way and odd or
1948                                          * more than 1/2ulp */
1949                                         Sgl_increment(resultp1);
1950                                 }
1951                         }
1952                         break;
1953
1954                 case ROUNDPLUS:
1955                         if (Sgl_iszero_sign(resultp1)) {
1956                                 /* Round up positive results */
1957                                 Sgl_increment(resultp1);
1958                         }
1959                         break;
1960             
1961                 case ROUNDMINUS:
1962                         if (Sgl_isone_sign(resultp1)) {
1963                                 /* Round down negative results */
1964                                 Sgl_increment(resultp1);
1965                         }
1966             
1967                 case ROUNDZERO:;
1968                         /* truncate is simple */
1969                 } /* end switch... */
1970                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
1971         }
1972         if (result_exponent >= SGL_INFINITY_EXPONENT) {
1973                 /* Overflow */
1974                 if (Is_overflowtrap_enabled()) {
1975                         /*
1976                          * Adjust bias of result
1977                          */
1978                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
1979                         Sgl_copytoptr(resultp1,dstptr);
1980                         if (inexact)
1981                             if (Is_inexacttrap_enabled())
1982                                 return (OPC_2E_OVERFLOWEXCEPTION |
1983                                         OPC_2E_INEXACTEXCEPTION);
1984                             else Set_inexactflag();
1985                         return (OPC_2E_OVERFLOWEXCEPTION);
1986                 }
1987                 inexact = TRUE;
1988                 Set_overflowflag();
1989                 Sgl_setoverflow(resultp1);
1990         } else if (result_exponent <= 0) {      /* underflow case */
1991                 if (Is_underflowtrap_enabled()) {
1992                         /*
1993                          * Adjust bias of result
1994                          */
1995                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
1996                         Sgl_copytoptr(resultp1,dstptr);
1997                         if (inexact)
1998                             if (Is_inexacttrap_enabled())
1999                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2000                                         OPC_2E_INEXACTEXCEPTION);
2001                             else Set_inexactflag();
2002                         return(OPC_2E_UNDERFLOWEXCEPTION);
2003                 }
2004                 else if (inexact && is_tiny) Set_underflowflag();
2005         }
2006         else Sgl_set_exponent(resultp1,result_exponent);
2007         Sgl_copytoptr(resultp1,dstptr);
2008         if (inexact) 
2009                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2010                 else Set_inexactflag();
2011         return(NOEXCEPTION);
2012 }
2013
2014 /*
2015  *  Single Floating-point Multiply Negate Fused Add
2016  */
2017
2018 sgl_fmpynfadd(src1ptr,src2ptr,src3ptr,status,dstptr)
2019
2020 sgl_floating_point *src1ptr, *src2ptr, *src3ptr, *dstptr;
2021 unsigned int *status;
2022 {
2023         unsigned int opnd1, opnd2, opnd3;
2024         register unsigned int tmpresp1, tmpresp2;
2025         unsigned int rightp1, rightp2;
2026         unsigned int resultp1, resultp2 = 0;
2027         register int mpy_exponent, add_exponent, count;
2028         boolean inexact = FALSE, is_tiny = FALSE;
2029
2030         unsigned int signlessleft1, signlessright1, save;
2031         register int result_exponent, diff_exponent;
2032         int sign_save, jumpsize;
2033         
2034         Sgl_copyfromptr(src1ptr,opnd1);
2035         Sgl_copyfromptr(src2ptr,opnd2);
2036         Sgl_copyfromptr(src3ptr,opnd3);
2037
2038         /* 
2039          * set sign bit of result of multiply
2040          */
2041         if (Sgl_sign(opnd1) ^ Sgl_sign(opnd2)) 
2042                 Sgl_setzero(resultp1);
2043         else 
2044                 Sgl_setnegativezero(resultp1); 
2045
2046         /*
2047          * Generate multiply exponent 
2048          */
2049         mpy_exponent = Sgl_exponent(opnd1) + Sgl_exponent(opnd2) - SGL_BIAS;
2050
2051         /*
2052          * check first operand for NaN's or infinity
2053          */
2054         if (Sgl_isinfinity_exponent(opnd1)) {
2055                 if (Sgl_iszero_mantissa(opnd1)) {
2056                         if (Sgl_isnotnan(opnd2) && Sgl_isnotnan(opnd3)) {
2057                                 if (Sgl_iszero_exponentmantissa(opnd2)) {
2058                                         /* 
2059                                          * invalid since operands are infinity 
2060                                          * and zero 
2061                                          */
2062                                         if (Is_invalidtrap_enabled())
2063                                                 return(OPC_2E_INVALIDEXCEPTION);
2064                                         Set_invalidflag();
2065                                         Sgl_makequietnan(resultp1);
2066                                         Sgl_copytoptr(resultp1,dstptr);
2067                                         return(NOEXCEPTION);
2068                                 }
2069                                 /*
2070                                  * Check third operand for infinity with a
2071                                  *  sign opposite of the multiply result
2072                                  */
2073                                 if (Sgl_isinfinity(opnd3) &&
2074                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2075                                         /* 
2076                                          * invalid since attempting a magnitude
2077                                          * subtraction of infinities
2078                                          */
2079                                         if (Is_invalidtrap_enabled())
2080                                                 return(OPC_2E_INVALIDEXCEPTION);
2081                                         Set_invalidflag();
2082                                         Sgl_makequietnan(resultp1);
2083                                         Sgl_copytoptr(resultp1,dstptr);
2084                                         return(NOEXCEPTION);
2085                                 }
2086
2087                                 /*
2088                                  * return infinity
2089                                  */
2090                                 Sgl_setinfinity_exponentmantissa(resultp1);
2091                                 Sgl_copytoptr(resultp1,dstptr);
2092                                 return(NOEXCEPTION);
2093                         }
2094                 }
2095                 else {
2096                         /*
2097                          * is NaN; signaling or quiet?
2098                          */
2099                         if (Sgl_isone_signaling(opnd1)) {
2100                                 /* trap if INVALIDTRAP enabled */
2101                                 if (Is_invalidtrap_enabled()) 
2102                                         return(OPC_2E_INVALIDEXCEPTION);
2103                                 /* make NaN quiet */
2104                                 Set_invalidflag();
2105                                 Sgl_set_quiet(opnd1);
2106                         }
2107                         /* 
2108                          * is second operand a signaling NaN? 
2109                          */
2110                         else if (Sgl_is_signalingnan(opnd2)) {
2111                                 /* trap if INVALIDTRAP enabled */
2112                                 if (Is_invalidtrap_enabled())
2113                                         return(OPC_2E_INVALIDEXCEPTION);
2114                                 /* make NaN quiet */
2115                                 Set_invalidflag();
2116                                 Sgl_set_quiet(opnd2);
2117                                 Sgl_copytoptr(opnd2,dstptr);
2118                                 return(NOEXCEPTION);
2119                         }
2120                         /* 
2121                          * is third operand a signaling NaN? 
2122                          */
2123                         else if (Sgl_is_signalingnan(opnd3)) {
2124                                 /* trap if INVALIDTRAP enabled */
2125                                 if (Is_invalidtrap_enabled())
2126                                         return(OPC_2E_INVALIDEXCEPTION);
2127                                 /* make NaN quiet */
2128                                 Set_invalidflag();
2129                                 Sgl_set_quiet(opnd3);
2130                                 Sgl_copytoptr(opnd3,dstptr);
2131                                 return(NOEXCEPTION);
2132                         }
2133                         /*
2134                          * return quiet NaN
2135                          */
2136                         Sgl_copytoptr(opnd1,dstptr);
2137                         return(NOEXCEPTION);
2138                 }
2139         }
2140
2141         /*
2142          * check second operand for NaN's or infinity
2143          */
2144         if (Sgl_isinfinity_exponent(opnd2)) {
2145                 if (Sgl_iszero_mantissa(opnd2)) {
2146                         if (Sgl_isnotnan(opnd3)) {
2147                                 if (Sgl_iszero_exponentmantissa(opnd1)) {
2148                                         /* 
2149                                          * invalid since multiply operands are
2150                                          * zero & infinity
2151                                          */
2152                                         if (Is_invalidtrap_enabled())
2153                                                 return(OPC_2E_INVALIDEXCEPTION);
2154                                         Set_invalidflag();
2155                                         Sgl_makequietnan(opnd2);
2156                                         Sgl_copytoptr(opnd2,dstptr);
2157                                         return(NOEXCEPTION);
2158                                 }
2159
2160                                 /*
2161                                  * Check third operand for infinity with a
2162                                  *  sign opposite of the multiply result
2163                                  */
2164                                 if (Sgl_isinfinity(opnd3) &&
2165                                     (Sgl_sign(resultp1) ^ Sgl_sign(opnd3))) {
2166                                         /* 
2167                                          * invalid since attempting a magnitude
2168                                          * subtraction of infinities
2169                                          */
2170                                         if (Is_invalidtrap_enabled())
2171                                                 return(OPC_2E_INVALIDEXCEPTION);
2172                                         Set_invalidflag();
2173                                         Sgl_makequietnan(resultp1);
2174                                         Sgl_copytoptr(resultp1,dstptr);
2175                                         return(NOEXCEPTION);
2176                                 }
2177
2178                                 /*
2179                                  * return infinity
2180                                  */
2181                                 Sgl_setinfinity_exponentmantissa(resultp1);
2182                                 Sgl_copytoptr(resultp1,dstptr);
2183                                 return(NOEXCEPTION);
2184                         }
2185                 }
2186                 else {
2187                         /*
2188                          * is NaN; signaling or quiet?
2189                          */
2190                         if (Sgl_isone_signaling(opnd2)) {
2191                                 /* trap if INVALIDTRAP enabled */
2192                                 if (Is_invalidtrap_enabled())
2193                                         return(OPC_2E_INVALIDEXCEPTION);
2194                                 /* make NaN quiet */
2195                                 Set_invalidflag();
2196                                 Sgl_set_quiet(opnd2);
2197                         }
2198                         /* 
2199                          * is third operand a signaling NaN? 
2200                          */
2201                         else if (Sgl_is_signalingnan(opnd3)) {
2202                                 /* trap if INVALIDTRAP enabled */
2203                                 if (Is_invalidtrap_enabled())
2204                                                 return(OPC_2E_INVALIDEXCEPTION);
2205                                 /* make NaN quiet */
2206                                 Set_invalidflag();
2207                                 Sgl_set_quiet(opnd3);
2208                                 Sgl_copytoptr(opnd3,dstptr);
2209                                 return(NOEXCEPTION);
2210                         }
2211                         /*
2212                          * return quiet NaN
2213                          */
2214                         Sgl_copytoptr(opnd2,dstptr);
2215                         return(NOEXCEPTION);
2216                 }
2217         }
2218
2219         /*
2220          * check third operand for NaN's or infinity
2221          */
2222         if (Sgl_isinfinity_exponent(opnd3)) {
2223                 if (Sgl_iszero_mantissa(opnd3)) {
2224                         /* return infinity */
2225                         Sgl_copytoptr(opnd3,dstptr);
2226                         return(NOEXCEPTION);
2227                 } else {
2228                         /*
2229                          * is NaN; signaling or quiet?
2230                          */
2231                         if (Sgl_isone_signaling(opnd3)) {
2232                                 /* trap if INVALIDTRAP enabled */
2233                                 if (Is_invalidtrap_enabled())
2234                                         return(OPC_2E_INVALIDEXCEPTION);
2235                                 /* make NaN quiet */
2236                                 Set_invalidflag();
2237                                 Sgl_set_quiet(opnd3);
2238                         }
2239                         /*
2240                          * return quiet NaN
2241                          */
2242                         Sgl_copytoptr(opnd3,dstptr);
2243                         return(NOEXCEPTION);
2244                 }
2245         }
2246
2247         /*
2248          * Generate multiply mantissa
2249          */
2250         if (Sgl_isnotzero_exponent(opnd1)) {
2251                 /* set hidden bit */
2252                 Sgl_clear_signexponent_set_hidden(opnd1);
2253         }
2254         else {
2255                 /* check for zero */
2256                 if (Sgl_iszero_mantissa(opnd1)) {
2257                         /*
2258                          * Perform the add opnd3 with zero here.
2259                          */
2260                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2261                                 if (Is_rounding_mode(ROUNDMINUS)) {
2262                                         Sgl_or_signs(opnd3,resultp1);
2263                                 } else {
2264                                         Sgl_and_signs(opnd3,resultp1);
2265                                 }
2266                         }
2267                         /*
2268                          * Now let's check for trapped underflow case.
2269                          */
2270                         else if (Sgl_iszero_exponent(opnd3) &&
2271                                  Is_underflowtrap_enabled()) {
2272                                 /* need to normalize results mantissa */
2273                                 sign_save = Sgl_signextendedsign(opnd3);
2274                                 result_exponent = 0;
2275                                 Sgl_leftshiftby1(opnd3);
2276                                 Sgl_normalize(opnd3,result_exponent);
2277                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2278                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2279                                                         unfl);
2280                                 Sgl_copytoptr(opnd3,dstptr);
2281                                 /* inexact = FALSE */
2282                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2283                         }
2284                         Sgl_copytoptr(opnd3,dstptr);
2285                         return(NOEXCEPTION);
2286                 }
2287                 /* is denormalized, adjust exponent */
2288                 Sgl_clear_signexponent(opnd1);
2289                 Sgl_leftshiftby1(opnd1);
2290                 Sgl_normalize(opnd1,mpy_exponent);
2291         }
2292         /* opnd2 needs to have hidden bit set with msb in hidden bit */
2293         if (Sgl_isnotzero_exponent(opnd2)) {
2294                 Sgl_clear_signexponent_set_hidden(opnd2);
2295         }
2296         else {
2297                 /* check for zero */
2298                 if (Sgl_iszero_mantissa(opnd2)) {
2299                         /*
2300                          * Perform the add opnd3 with zero here.
2301                          */
2302                         if (Sgl_iszero_exponentmantissa(opnd3)) {
2303                                 if (Is_rounding_mode(ROUNDMINUS)) {
2304                                         Sgl_or_signs(opnd3,resultp1);
2305                                 } else {
2306                                         Sgl_and_signs(opnd3,resultp1);
2307                                 }
2308                         }
2309                         /*
2310                          * Now let's check for trapped underflow case.
2311                          */
2312                         else if (Sgl_iszero_exponent(opnd3) &&
2313                             Is_underflowtrap_enabled()) {
2314                                 /* need to normalize results mantissa */
2315                                 sign_save = Sgl_signextendedsign(opnd3);
2316                                 result_exponent = 0;
2317                                 Sgl_leftshiftby1(opnd3);
2318                                 Sgl_normalize(opnd3,result_exponent);
2319                                 Sgl_set_sign(opnd3,/*using*/sign_save);
2320                                 Sgl_setwrapped_exponent(opnd3,result_exponent,
2321                                                         unfl);
2322                                 Sgl_copytoptr(opnd3,dstptr);
2323                                 /* inexact = FALSE */
2324                                 return(OPC_2E_UNDERFLOWEXCEPTION);
2325                         }
2326                         Sgl_copytoptr(opnd3,dstptr);
2327                         return(NOEXCEPTION);
2328                 }
2329                 /* is denormalized; want to normalize */
2330                 Sgl_clear_signexponent(opnd2);
2331                 Sgl_leftshiftby1(opnd2);
2332                 Sgl_normalize(opnd2,mpy_exponent);
2333         }
2334
2335         /* Multiply the first two source mantissas together */
2336
2337         /* 
2338          * The intermediate result will be kept in tmpres,
2339          * which needs enough room for 106 bits of mantissa,
2340          * so lets call it a Double extended.
2341          */
2342         Sglext_setzero(tmpresp1,tmpresp2);
2343
2344         /* 
2345          * Four bits at a time are inspected in each loop, and a 
2346          * simple shift and add multiply algorithm is used. 
2347          */ 
2348         for (count = SGL_P-1; count >= 0; count -= 4) {
2349                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2350                 if (Sbit28(opnd1)) {
2351                         /* Twoword_add should be an ADD followed by 2 ADDC's */
2352                         Twoword_add(tmpresp1, tmpresp2, opnd2<<3, 0);
2353                 }
2354                 if (Sbit29(opnd1)) {
2355                         Twoword_add(tmpresp1, tmpresp2, opnd2<<2, 0);
2356                 }
2357                 if (Sbit30(opnd1)) {
2358                         Twoword_add(tmpresp1, tmpresp2, opnd2<<1, 0);
2359                 }
2360                 if (Sbit31(opnd1)) {
2361                         Twoword_add(tmpresp1, tmpresp2, opnd2, 0);
2362                 }
2363                 Sgl_rightshiftby4(opnd1);
2364         }
2365         if (Is_sexthiddenoverflow(tmpresp1)) {
2366                 /* result mantissa >= 2 (mantissa overflow) */
2367                 mpy_exponent++;
2368                 Sglext_rightshiftby4(tmpresp1,tmpresp2);
2369         } else {
2370                 Sglext_rightshiftby3(tmpresp1,tmpresp2);
2371         }
2372
2373         /*
2374          * Restore the sign of the mpy result which was saved in resultp1.
2375          * The exponent will continue to be kept in mpy_exponent.
2376          */
2377         Sglext_set_sign(tmpresp1,Sgl_sign(resultp1));
2378
2379         /* 
2380          * No rounding is required, since the result of the multiply
2381          * is exact in the extended format.
2382          */
2383
2384         /*
2385          * Now we are ready to perform the add portion of the operation.
2386          *
2387          * The exponents need to be kept as integers for now, since the
2388          * multiply result might not fit into the exponent field.  We
2389          * can't overflow or underflow because of this yet, since the
2390          * add could bring the final result back into range.
2391          */
2392         add_exponent = Sgl_exponent(opnd3);
2393
2394         /*
2395          * Check for denormalized or zero add operand.
2396          */
2397         if (add_exponent == 0) {
2398                 /* check for zero */
2399                 if (Sgl_iszero_mantissa(opnd3)) {
2400                         /* right is zero */
2401                         /* Left can't be zero and must be result.
2402                          *
2403                          * The final result is now in tmpres and mpy_exponent,
2404                          * and needs to be rounded and squeezed back into
2405                          * double precision format from double extended.
2406                          */
2407                         result_exponent = mpy_exponent;
2408                         Sglext_copy(tmpresp1,tmpresp2,resultp1,resultp2);
2409                         sign_save = Sgl_signextendedsign(resultp1);/*save sign*/
2410                         goto round;
2411                 }
2412
2413                 /* 
2414                  * Neither are zeroes.  
2415                  * Adjust exponent and normalize add operand.
2416                  */
2417                 sign_save = Sgl_signextendedsign(opnd3);        /* save sign */
2418                 Sgl_clear_signexponent(opnd3);
2419                 Sgl_leftshiftby1(opnd3);
2420                 Sgl_normalize(opnd3,add_exponent);
2421                 Sgl_set_sign(opnd3,sign_save);          /* restore sign */
2422         } else {
2423                 Sgl_clear_exponent_set_hidden(opnd3);
2424         }
2425         /*
2426          * Copy opnd3 to the double extended variable called right.
2427          */
2428         Sgl_copyto_sglext(opnd3,rightp1,rightp2);
2429
2430         /*
2431          * A zero "save" helps discover equal operands (for later),
2432          * and is used in swapping operands (if needed).
2433          */
2434         Sglext_xortointp1(tmpresp1,rightp1,/*to*/save);
2435
2436         /*
2437          * Compare magnitude of operands.
2438          */
2439         Sglext_copytoint_exponentmantissa(tmpresp1,signlessleft1);
2440         Sglext_copytoint_exponentmantissa(rightp1,signlessright1);
2441         if (mpy_exponent < add_exponent || mpy_exponent == add_exponent &&
2442             Sglext_ismagnitudeless(signlessleft1,signlessright1)) {
2443                 /*
2444                  * Set the left operand to the larger one by XOR swap.
2445                  * First finish the first word "save".
2446                  */
2447                 Sglext_xorfromintp1(save,rightp1,/*to*/rightp1);
2448                 Sglext_xorfromintp1(save,tmpresp1,/*to*/tmpresp1);
2449                 Sglext_swap_lower(tmpresp2,rightp2);
2450                 /* also setup exponents used in rest of routine */
2451                 diff_exponent = add_exponent - mpy_exponent;
2452                 result_exponent = add_exponent;
2453         } else {
2454                 /* also setup exponents used in rest of routine */
2455                 diff_exponent = mpy_exponent - add_exponent;
2456                 result_exponent = mpy_exponent;
2457         }
2458         /* Invariant: left is not smaller than right. */
2459
2460         /*
2461          * Special case alignment of operands that would force alignment
2462          * beyond the extent of the extension.  A further optimization
2463          * could special case this but only reduces the path length for
2464          * this infrequent case.
2465          */
2466         if (diff_exponent > SGLEXT_THRESHOLD) {
2467                 diff_exponent = SGLEXT_THRESHOLD;
2468         }
2469
2470         /* Align right operand by shifting it to the right */
2471         Sglext_clear_sign(rightp1);
2472         Sglext_right_align(rightp1,rightp2,/*shifted by*/diff_exponent);
2473         
2474         /* Treat sum and difference of the operands separately. */
2475         if ((int)save < 0) {
2476                 /*
2477                  * Difference of the two operands.  Overflow can occur if the
2478                  * multiply overflowed.  A borrow can occur out of the hidden
2479                  * bit and force a post normalization phase.
2480                  */
2481                 Sglext_subtract(tmpresp1,tmpresp2, rightp1,rightp2,
2482                         resultp1,resultp2);
2483                 sign_save = Sgl_signextendedsign(resultp1);
2484                 if (Sgl_iszero_hidden(resultp1)) {
2485                         /* Handle normalization */
2486                 /* A straight foward algorithm would now shift the
2487                  * result and extension left until the hidden bit
2488                  * becomes one.  Not all of the extension bits need
2489                  * participate in the shift.  Only the two most 
2490                  * significant bits (round and guard) are needed.
2491                  * If only a single shift is needed then the guard
2492                  * bit becomes a significant low order bit and the
2493                  * extension must participate in the rounding.
2494                  * If more than a single shift is needed, then all
2495                  * bits to the right of the guard bit are zeros, 
2496                  * and the guard bit may or may not be zero. */
2497                         Sglext_leftshiftby1(resultp1,resultp2);
2498
2499                         /* Need to check for a zero result.  The sign and
2500                          * exponent fields have already been zeroed.  The more
2501                          * efficient test of the full object can be used.
2502                          */
2503                          if (Sglext_iszero(resultp1,resultp2)) {
2504                                 /* Must have been "x-x" or "x+(-x)". */
2505                                 if (Is_rounding_mode(ROUNDMINUS))
2506                                         Sgl_setone_sign(resultp1);
2507                                 Sgl_copytoptr(resultp1,dstptr);
2508                                 return(NOEXCEPTION);
2509                         }
2510                         result_exponent--;
2511
2512                         /* Look to see if normalization is finished. */
2513                         if (Sgl_isone_hidden(resultp1)) {
2514                                 /* No further normalization is needed */
2515                                 goto round;
2516                         }
2517
2518                         /* Discover first one bit to determine shift amount.
2519                          * Use a modified binary search.  We have already
2520                          * shifted the result one position right and still
2521                          * not found a one so the remainder of the extension
2522                          * must be zero and simplifies rounding. */
2523                         /* Scan bytes */
2524                         while (Sgl_iszero_hiddenhigh7mantissa(resultp1)) {
2525                                 Sglext_leftshiftby8(resultp1,resultp2);
2526                                 result_exponent -= 8;
2527                         }
2528                         /* Now narrow it down to the nibble */
2529                         if (Sgl_iszero_hiddenhigh3mantissa(resultp1)) {
2530                                 /* The lower nibble contains the
2531                                  * normalizing one */
2532                                 Sglext_leftshiftby4(resultp1,resultp2);
2533                                 result_exponent -= 4;
2534                         }
2535                         /* Select case where first bit is set (already
2536                          * normalized) otherwise select the proper shift. */
2537                         jumpsize = Sgl_hiddenhigh3mantissa(resultp1);
2538                         if (jumpsize <= 7) switch(jumpsize) {
2539                         case 1:
2540                                 Sglext_leftshiftby3(resultp1,resultp2);
2541                                 result_exponent -= 3;
2542                                 break;
2543                         case 2:
2544                         case 3:
2545                                 Sglext_leftshiftby2(resultp1,resultp2);
2546                                 result_exponent -= 2;
2547                                 break;
2548                         case 4:
2549                         case 5:
2550                         case 6:
2551                         case 7:
2552                                 Sglext_leftshiftby1(resultp1,resultp2);
2553                                 result_exponent -= 1;
2554                                 break;
2555                         }
2556                 } /* end if (hidden...)... */
2557         /* Fall through and round */
2558         } /* end if (save < 0)... */
2559         else {
2560                 /* Add magnitudes */
2561                 Sglext_addition(tmpresp1,tmpresp2,
2562                         rightp1,rightp2, /*to*/resultp1,resultp2);
2563                 sign_save = Sgl_signextendedsign(resultp1);
2564                 if (Sgl_isone_hiddenoverflow(resultp1)) {
2565                         /* Prenormalization required. */
2566                         Sglext_arithrightshiftby1(resultp1,resultp2);
2567                         result_exponent++;
2568                 } /* end if hiddenoverflow... */
2569         } /* end else ...add magnitudes... */
2570
2571         /* Round the result.  If the extension and lower two words are
2572          * all zeros, then the result is exact.  Otherwise round in the
2573          * correct direction.  Underflow is possible. If a postnormalization
2574          * is necessary, then the mantissa is all zeros so no shift is needed.
2575          */
2576   round:
2577         if (result_exponent <= 0 && !Is_underflowtrap_enabled()) {
2578                 Sglext_denormalize(resultp1,resultp2,result_exponent,is_tiny);
2579         }
2580         Sgl_set_sign(resultp1,/*using*/sign_save);
2581         if (Sglext_isnotzero_mantissap2(resultp2)) {
2582                 inexact = TRUE;
2583                 switch(Rounding_mode()) {
2584                 case ROUNDNEAREST: /* The default. */
2585                         if (Sglext_isone_highp2(resultp2)) {
2586                                 /* at least 1/2 ulp */
2587                                 if (Sglext_isnotzero_low31p2(resultp2) ||
2588                                     Sglext_isone_lowp1(resultp1)) {
2589                                         /* either exactly half way and odd or
2590                                          * more than 1/2ulp */
2591                                         Sgl_increment(resultp1);
2592                                 }
2593                         }
2594                         break;
2595
2596                 case ROUNDPLUS:
2597                         if (Sgl_iszero_sign(resultp1)) {
2598                                 /* Round up positive results */
2599                                 Sgl_increment(resultp1);
2600                         }
2601                         break;
2602             
2603                 case ROUNDMINUS:
2604                         if (Sgl_isone_sign(resultp1)) {
2605                                 /* Round down negative results */
2606                                 Sgl_increment(resultp1);
2607                         }
2608             
2609                 case ROUNDZERO:;
2610                         /* truncate is simple */
2611                 } /* end switch... */
2612                 if (Sgl_isone_hiddenoverflow(resultp1)) result_exponent++;
2613         }
2614         if (result_exponent >= SGL_INFINITY_EXPONENT) {
2615                 /* Overflow */
2616                 if (Is_overflowtrap_enabled()) {
2617                         /*
2618                          * Adjust bias of result
2619                          */
2620                         Sgl_setwrapped_exponent(resultp1,result_exponent,ovfl);
2621                         Sgl_copytoptr(resultp1,dstptr);
2622                         if (inexact)
2623                             if (Is_inexacttrap_enabled())
2624                                 return (OPC_2E_OVERFLOWEXCEPTION |
2625                                         OPC_2E_INEXACTEXCEPTION);
2626                             else Set_inexactflag();
2627                         return (OPC_2E_OVERFLOWEXCEPTION);
2628                 }
2629                 inexact = TRUE;
2630                 Set_overflowflag();
2631                 Sgl_setoverflow(resultp1);
2632         } else if (result_exponent <= 0) {      /* underflow case */
2633                 if (Is_underflowtrap_enabled()) {
2634                         /*
2635                          * Adjust bias of result
2636                          */
2637                         Sgl_setwrapped_exponent(resultp1,result_exponent,unfl);
2638                         Sgl_copytoptr(resultp1,dstptr);
2639                         if (inexact)
2640                             if (Is_inexacttrap_enabled())
2641                                 return (OPC_2E_UNDERFLOWEXCEPTION |
2642                                         OPC_2E_INEXACTEXCEPTION);
2643                             else Set_inexactflag();
2644                         return(OPC_2E_UNDERFLOWEXCEPTION);
2645                 }
2646                 else if (inexact && is_tiny) Set_underflowflag();
2647         }
2648         else Sgl_set_exponent(resultp1,result_exponent);
2649         Sgl_copytoptr(resultp1,dstptr);
2650         if (inexact) 
2651                 if (Is_inexacttrap_enabled()) return(OPC_2E_INEXACTEXCEPTION);
2652                 else Set_inexactflag();
2653         return(NOEXCEPTION);
2654 }
2655