git.oblomov.eu Git - linux-2.6/blob - arch/x86/math-emu/div_Xsig.S

   1         .file   "div_Xsig.S"
   2 /*---------------------------------------------------------------------------+
   3  |  div_Xsig.S                                                               |
   4  |                                                                           |
   5  | Division subroutine for 96 bit quantities                                 |
   6  |                                                                           |
   7  | Copyright (C) 1994,1995                                                   |
   8  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   9  |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
  10  |                                                                           |
  11  |                                                                           |
  12  +---------------------------------------------------------------------------*/
  13
  14 /*---------------------------------------------------------------------------+
  15  | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
  16  | put the 96 bit result at the location d.                                  |
  17  |                                                                           |
  18  | The result may not be accurate to 96 bits. It is intended for use where   |
  19  | a result better than 64 bits is required. The result should usually be    |
  20  | good to at least 94 bits.                                                 |
  21  | The returned result is actually divided by one half. This is done to      |
  22  | prevent overflow.                                                         |
  23  |                                                                           |
  24  |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
  25  |                                                                           |
  26  |  void div_Xsig(Xsig *a, Xsig *b, Xsig *dest)                              |
  27  |                                                                           |
  28  +---------------------------------------------------------------------------*/
  29
  30 #include "exception.h"
  31 #include "fpu_emu.h"
  32
  33
  34 #define XsigLL(x)       (x)
  35 #define XsigL(x)        4(x)
  36 #define XsigH(x)        8(x)
  37
  38
  39 #ifndef NON_REENTRANT_FPU
  40 /*
  41         Local storage on the stack:
  42         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  43  */
  44 #define FPU_accum_3     -4(%ebp)
  45 #define FPU_accum_2     -8(%ebp)
  46 #define FPU_accum_1     -12(%ebp)
  47 #define FPU_accum_0     -16(%ebp)
  48 #define FPU_result_3    -20(%ebp)
  49 #define FPU_result_2    -24(%ebp)
  50 #define FPU_result_1    -28(%ebp)
  51
  52 #else
  53 .data
  54 /*
  55         Local storage in a static area:
  56         Accumulator:    FPU_accum_3:FPU_accum_2:FPU_accum_1:FPU_accum_0
  57  */
  58         .align 4,0
  59 FPU_accum_3:
  60         .long   0
  61 FPU_accum_2:
  62         .long   0
  63 FPU_accum_1:
  64         .long   0
  65 FPU_accum_0:
  66         .long   0
  67 FPU_result_3:
  68         .long   0
  69 FPU_result_2:
  70         .long   0
  71 FPU_result_1:
  72         .long   0
  73 #endif /* NON_REENTRANT_FPU */
  74
  75
  76 .text
  77 ENTRY(div_Xsig)
  78         pushl   %ebp
  79         movl    %esp,%ebp
  80 #ifndef NON_REENTRANT_FPU
  81         subl    $28,%esp
  82 #endif /* NON_REENTRANT_FPU */
  83
  84         pushl   %esi
  85         pushl   %edi
  86         pushl   %ebx
  87
  88         movl    PARAM1,%esi     /* pointer to num */
  89         movl    PARAM2,%ebx     /* pointer to denom */
  90
  91 #ifdef PARANOID
  92         testl   $0x80000000, XsigH(%ebx)        /* Divisor */
  93         je      L_bugged
  94 #endif /* PARANOID */
  95
  96
  97 /*---------------------------------------------------------------------------+
  98  |  Divide:   Return  arg1/arg2 to arg3.                                     |
  99  |                                                                           |
 100  |  The maximum returned value is (ignoring exponents)                       |
 101  |               .ffffffff ffffffff                                          |
 102  |               ------------------  =  1.ffffffff fffffffe                  |
 103  |               .80000000 00000000                                          |
 104  | and the minimum is                                                        |
 105  |               .80000000 00000000                                          |
 106  |               ------------------  =  .80000000 00000001   (rounded)       |
 107  |               .ffffffff ffffffff                                          |
 108  |                                                                           |
 109  +---------------------------------------------------------------------------*/
 110
 111         /* Save extended dividend in local register */
 112
 113         /* Divide by 2 to prevent overflow */
 114         clc
 115         movl    XsigH(%esi),%eax
 116         rcrl    %eax
 117         movl    %eax,FPU_accum_3
 118         movl    XsigL(%esi),%eax
 119         rcrl    %eax
 120         movl    %eax,FPU_accum_2
 121         movl    XsigLL(%esi),%eax
 122         rcrl    %eax
 123         movl    %eax,FPU_accum_1
 124         movl    $0,%eax
 125         rcrl    %eax
 126         movl    %eax,FPU_accum_0
 127
 128         movl    FPU_accum_2,%eax        /* Get the current num */
 129         movl    FPU_accum_3,%edx
 130
 131 /*----------------------------------------------------------------------*/
 132 /* Initialization done.
 133    Do the first 32 bits. */
 134
 135         /* We will divide by a number which is too large */
 136         movl    XsigH(%ebx),%ecx
 137         addl    $1,%ecx
 138         jnc     LFirst_div_not_1
 139
 140         /* here we need to divide by 100000000h,
 141            i.e., no division at all.. */
 142         mov     %edx,%eax
 143         jmp     LFirst_div_done
 144
 145 LFirst_div_not_1:
 146         divl    %ecx            /* Divide the numerator by the augmented
 147                                    denom ms dw */
 148
 149 LFirst_div_done:
 150         movl    %eax,FPU_result_3       /* Put the result in the answer */
 151
 152         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 153
 154         subl    %eax,FPU_accum_2        /* Subtract from the num local reg */
 155         sbbl    %edx,FPU_accum_3
 156
 157         movl    FPU_result_3,%eax       /* Get the result back */
 158         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 159
 160         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 161         sbbl    %edx,FPU_accum_2
 162         sbbl    $0,FPU_accum_3
 163         je      LDo_2nd_32_bits         /* Must check for non-zero result here */
 164
 165 #ifdef PARANOID
 166         jb      L_bugged_1
 167 #endif /* PARANOID */
 168
 169         /* need to subtract another once of the denom */
 170         incl    FPU_result_3    /* Correct the answer */
 171
 172         movl    XsigL(%ebx),%eax
 173         movl    XsigH(%ebx),%edx
 174         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 175         sbbl    %edx,FPU_accum_2
 176
 177 #ifdef PARANOID
 178         sbbl    $0,FPU_accum_3
 179         jne     L_bugged_1      /* Must check for non-zero result here */
 180 #endif /* PARANOID */
 181
 182 /*----------------------------------------------------------------------*/
 183 /* Half of the main problem is done, there is just a reduced numerator
 184    to handle now.
 185    Work with the second 32 bits, FPU_accum_0 not used from now on */
 186 LDo_2nd_32_bits:
 187         movl    FPU_accum_2,%edx        /* get the reduced num */
 188         movl    FPU_accum_1,%eax
 189
 190         /* need to check for possible subsequent overflow */
 191         cmpl    XsigH(%ebx),%edx
 192         jb      LDo_2nd_div
 193         ja      LPrevent_2nd_overflow
 194
 195         cmpl    XsigL(%ebx),%eax
 196         jb      LDo_2nd_div
 197
 198 LPrevent_2nd_overflow:
 199 /* The numerator is greater or equal, would cause overflow */
 200         /* prevent overflow */
 201         subl    XsigL(%ebx),%eax
 202         sbbl    XsigH(%ebx),%edx
 203         movl    %edx,FPU_accum_2
 204         movl    %eax,FPU_accum_1
 205
 206         incl    FPU_result_3    /* Reflect the subtraction in the answer */
 207
 208 #ifdef PARANOID
 209         je      L_bugged_2      /* Can't bump the result to 1.0 */
 210 #endif /* PARANOID */
 211
 212 LDo_2nd_div:
 213         cmpl    $0,%ecx         /* augmented denom msw */
 214         jnz     LSecond_div_not_1
 215
 216         /* %ecx == 0, we are dividing by 1.0 */
 217         mov     %edx,%eax
 218         jmp     LSecond_div_done
 219
 220 LSecond_div_not_1:
 221         divl    %ecx            /* Divide the numerator by the denom ms dw */
 222
 223 LSecond_div_done:
 224         movl    %eax,FPU_result_2       /* Put the result in the answer */
 225
 226         mull    XsigH(%ebx)     /* mul by the ms dw of the denom */
 227
 228         subl    %eax,FPU_accum_1        /* Subtract from the num local reg */
 229         sbbl    %edx,FPU_accum_2
 230
 231 #ifdef PARANOID
 232         jc      L_bugged_2
 233 #endif /* PARANOID */
 234
 235         movl    FPU_result_2,%eax       /* Get the result back */
 236         mull    XsigL(%ebx)     /* now mul the ls dw of the denom */
 237
 238         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 239         sbbl    %edx,FPU_accum_1        /* Subtract from the num local reg */
 240         sbbl    $0,FPU_accum_2
 241
 242 #ifdef PARANOID
 243         jc      L_bugged_2
 244 #endif /* PARANOID */
 245
 246         jz      LDo_3rd_32_bits
 247
 248 #ifdef PARANOID
 249         cmpl    $1,FPU_accum_2
 250         jne     L_bugged_2
 251 #endif /* PARANOID */
 252
 253         /* need to subtract another once of the denom */
 254         movl    XsigL(%ebx),%eax
 255         movl    XsigH(%ebx),%edx
 256         subl    %eax,FPU_accum_0        /* Subtract from the num local reg */
 257         sbbl    %edx,FPU_accum_1
 258         sbbl    $0,FPU_accum_2
 259
 260 #ifdef PARANOID
 261         jc      L_bugged_2
 262         jne     L_bugged_2
 263 #endif /* PARANOID */
 264
 265         addl    $1,FPU_result_2 /* Correct the answer */
 266         adcl    $0,FPU_result_3
 267
 268 #ifdef PARANOID
 269         jc      L_bugged_2      /* Must check for non-zero result here */
 270 #endif /* PARANOID */
 271
 272 /*----------------------------------------------------------------------*/
 273 /* The division is essentially finished here, we just need to perform
 274    tidying operations.
 275    Deal with the 3rd 32 bits */
 276 LDo_3rd_32_bits:
 277         /* We use an approximation for the third 32 bits.
 278         To take account of the 3rd 32 bits of the divisor
 279         (call them del), we subtract  del * (a/b) */
 280
 281         movl    FPU_result_3,%eax       /* a/b */
 282         mull    XsigLL(%ebx)            /* del */
 283
 284         subl    %edx,FPU_accum_1
 285
 286         /* A borrow indicates that the result is negative */
 287         jnb     LTest_over
 288
 289         movl    XsigH(%ebx),%edx
 290         addl    %edx,FPU_accum_1
 291
 292         subl    $1,FPU_result_2         /* Adjust the answer */
 293         sbbl    $0,FPU_result_3
 294
 295         /* The above addition might not have been enough, check again. */
 296         movl    FPU_accum_1,%edx        /* get the reduced num */
 297         cmpl    XsigH(%ebx),%edx        /* denom */
 298         jb      LDo_3rd_div
 299
 300         movl    XsigH(%ebx),%edx
 301         addl    %edx,FPU_accum_1
 302
 303         subl    $1,FPU_result_2         /* Adjust the answer */
 304         sbbl    $0,FPU_result_3
 305         jmp     LDo_3rd_div
 306
 307 LTest_over:
 308         movl    FPU_accum_1,%edx        /* get the reduced num */
 309
 310         /* need to check for possible subsequent overflow */
 311         cmpl    XsigH(%ebx),%edx        /* denom */
 312         jb      LDo_3rd_div
 313
 314         /* prevent overflow */
 315         subl    XsigH(%ebx),%edx
 316         movl    %edx,FPU_accum_1
 317
 318         addl    $1,FPU_result_2 /* Reflect the subtraction in the answer */
 319         adcl    $0,FPU_result_3
 320
 321 LDo_3rd_div:
 322         movl    FPU_accum_0,%eax
 323         movl    FPU_accum_1,%edx
 324         divl    XsigH(%ebx)
 325
 326         movl    %eax,FPU_result_1       /* Rough estimate of third word */
 327
 328         movl    PARAM3,%esi             /* pointer to answer */
 329
 330         movl    FPU_result_1,%eax
 331         movl    %eax,XsigLL(%esi)
 332         movl    FPU_result_2,%eax
 333         movl    %eax,XsigL(%esi)
 334         movl    FPU_result_3,%eax
 335         movl    %eax,XsigH(%esi)
 336
 337 L_exit:
 338         popl    %ebx
 339         popl    %edi
 340         popl    %esi
 341
 342         leave
 343         ret
 344
 345
 346 #ifdef PARANOID
 347 /* The logic is wrong if we got here */
 348 L_bugged:
 349         pushl   EX_INTERNAL|0x240
 350         call    EXCEPTION
 351         pop     %ebx
 352         jmp     L_exit
 353
 354 L_bugged_1:
 355         pushl   EX_INTERNAL|0x241
 356         call    EXCEPTION
 357         pop     %ebx
 358         jmp     L_exit
 359
 360 L_bugged_2:
 361         pushl   EX_INTERNAL|0x242
 362         call    EXCEPTION
 363         pop     %ebx
 364         jmp     L_exit
 365 #endif /* PARANOID */