Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6] / arch / m68k / fpsp040 / stwotox.S
1 |
2 |       stwotox.sa 3.1 12/10/90
3 |
4 |       stwotox  --- 2**X
5 |       stwotoxd --- 2**X for denormalized X
6 |       stentox  --- 10**X
7 |       stentoxd --- 10**X for denormalized X
8 |
9 |       Input: Double-extended number X in location pointed to
10 |               by address register a0.
11 |
12 |       Output: The function values are returned in Fp0.
13 |
14 |       Accuracy and Monotonicity: The returned result is within 2 ulps in
15 |               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
16 |               result is subsequently rounded to double precision. The
17 |               result is provably monotonic in double precision.
18 |
19 |       Speed: The program stwotox takes approximately 190 cycles and the
20 |               program stentox takes approximately 200 cycles.
21 |
22 |       Algorithm:
23 |
24 |       twotox
25 |       1. If |X| > 16480, go to ExpBig.
26 |
27 |       2. If |X| < 2**(-70), go to ExpSm.
28 |
29 |       3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
30 |               decompose N as
31 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
32 |
33 |       4. Overwrite r := r * log2. Then
34 |               2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
35 |               Go to expr to compute that expression.
36 |
37 |       tentox
38 |       1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
39 |
40 |       2. If |X| < 2**(-70), go to ExpSm.
41 |
42 |       3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
43 |               N := round-to-int(y). Decompose N as
44 |                N = 64(M + M') + j,  j = 0,1,2,...,63.
45 |
46 |       4. Define r as
47 |               r := ((X - N*L1)-N*L2) * L10
48 |               where L1, L2 are the leading and trailing parts of log_10(2)/64
49 |               and L10 is the natural log of 10. Then
50 |               10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
51 |               Go to expr to compute that expression.
52 |
53 |       expr
54 |       1. Fetch 2**(j/64) from table as Fact1 and Fact2.
55 |
56 |       2. Overwrite Fact1 and Fact2 by
57 |               Fact1 := 2**(M) * Fact1
58 |               Fact2 := 2**(M) * Fact2
59 |               Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
60 |
61 |       3. Calculate P where 1 + P approximates exp(r):
62 |               P = r + r*r*(A1+r*(A2+...+r*A5)).
63 |
64 |       4. Let AdjFact := 2**(M'). Return
65 |               AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
66 |               Exit.
67 |
68 |       ExpBig
69 |       1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
70 |               underflow by Tiny * Tiny.
71 |
72 |       ExpSm
73 |       1. Return 1 + X.
74 |
75
76 |               Copyright (C) Motorola, Inc. 1990
77 |                       All Rights Reserved
78 |
79 |       For details on the license for this file, please see the
80 |       file, README, in this same directory.
81
82 |STWOTOX        idnt    2,1 | Motorola 040 Floating Point Software Package
83
84         |section        8
85
86 #include "fpsp.h"
87
88 BOUNDS1:        .long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
89 BOUNDS2:        .long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
90
91 L2TEN64:        .long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
92 L10TWO1:        .long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
93
94 L10TWO2:        .long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
95
96 LOG10:  .long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
97
98 LOG2:   .long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
99
100 EXPA5:  .long 0x3F56C16D,0x6F7BD0B2
101 EXPA4:  .long 0x3F811112,0x302C712C
102 EXPA3:  .long 0x3FA55555,0x55554CC1
103 EXPA2:  .long 0x3FC55555,0x55554A54
104 EXPA1:  .long 0x3FE00000,0x00000000,0x00000000,0x00000000
105
106 HUGE:   .long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
107 TINY:   .long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
108
109 EXPTBL:
110         .long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
111         .long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
112         .long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
113         .long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
114         .long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
115         .long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
116         .long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
117         .long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
118         .long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
119         .long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
120         .long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
121         .long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
122         .long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
123         .long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
124         .long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
125         .long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
126         .long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
127         .long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
128         .long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
129         .long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
130         .long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
131         .long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
132         .long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
133         .long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
134         .long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
135         .long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
136         .long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
137         .long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
138         .long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
139         .long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
140         .long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
141         .long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
142         .long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
143         .long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
144         .long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
145         .long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
146         .long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
147         .long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
148         .long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
149         .long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
150         .long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
151         .long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
152         .long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
153         .long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
154         .long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
155         .long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
156         .long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
157         .long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
158         .long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
159         .long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
160         .long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
161         .long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
162         .long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
163         .long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
164         .long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
165         .long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
166         .long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
167         .long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
168         .long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
169         .long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
170         .long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
171         .long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
172         .long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
173         .long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
174
175         .set    N,L_SCR1
176
177         .set    X,FP_SCR1
178         .set    XDCARE,X+2
179         .set    XFRAC,X+4
180
181         .set    ADJFACT,FP_SCR2
182
183         .set    FACT1,FP_SCR3
184         .set    FACT1HI,FACT1+4
185         .set    FACT1LOW,FACT1+8
186
187         .set    FACT2,FP_SCR4
188         .set    FACT2HI,FACT2+4
189         .set    FACT2LOW,FACT2+8
190
191         | xref  t_unfl
192         |xref   t_ovfl
193         |xref   t_frcinx
194
195         .global stwotoxd
196 stwotoxd:
197 |--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
198
199         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
200         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
201         movel           (%a0),%d0
202         orl             #0x00800001,%d0
203         fadds           %d0,%fp0
204         bra             t_frcinx
205
206         .global stwotox
207 stwotox:
208 |--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
209         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
210
211         movel           (%a0),%d0
212         movew           4(%a0),%d0
213         fmovex          %fp0,X(%a6)
214         andil           #0x7FFFFFFF,%d0
215
216         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
217         bges            TWOOK1
218         bra             EXPBORS
219
220 TWOOK1:
221         cmpil           #0x400D80C0,%d0         | ...|X| > 16480?
222         bles            TWOMAIN
223         bra             EXPBORS
224
225
226 TWOMAIN:
227 |--USUAL CASE, 2^(-70) <= |X| <= 16480
228
229         fmovex          %fp0,%fp1
230         fmuls           #0x42800000,%fp1  | ...64 * X
231
232         fmovel          %fp1,N(%a6)             | ...N = ROUND-TO-INT(64 X)
233         movel           %d2,-(%sp)
234         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
235         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
236         movel           N(%a6),%d0
237         movel           %d0,%d2
238         andil           #0x3F,%d0               | ...D0 IS J
239         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
240         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
241         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
242         movel           %d2,%d0
243         asrl            #1,%d0          | ...D0 IS M
244         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
245         addil           #0x3FFF,%d2
246         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
247         movel           (%sp)+,%d2
248 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
249 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
250 |--ADJFACT = 2^(M').
251 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
252
253         fmuls           #0x3C800000,%fp1  | ...(1/64)*N
254         movel           (%a1)+,FACT1(%a6)
255         movel           (%a1)+,FACT1HI(%a6)
256         movel           (%a1)+,FACT1LOW(%a6)
257         movew           (%a1)+,FACT2(%a6)
258         clrw            FACT2+2(%a6)
259
260         fsubx           %fp1,%fp0               | ...X - (1/64)*INT(64 X)
261
262         movew           (%a1)+,FACT2HI(%a6)
263         clrw            FACT2HI+2(%a6)
264         clrl            FACT2LOW(%a6)
265         addw            %d0,FACT1(%a6)
266
267         fmulx           LOG2,%fp0       | ...FP0 IS R
268         addw            %d0,FACT2(%a6)
269
270         bra             expr
271
272 EXPBORS:
273 |--FPCR, D0 SAVED
274         cmpil           #0x3FFF8000,%d0
275         bgts            EXPBIG
276
277 EXPSM:
278 |--|X| IS SMALL, RETURN 1 + X
279
280         fmovel          %d1,%FPCR               |restore users exceptions
281         fadds           #0x3F800000,%fp0  | ...RETURN 1 + X
282
283         bra             t_frcinx
284
285 EXPBIG:
286 |--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
287 |--REGISTERS SAVE SO FAR ARE FPCR AND  D0
288         movel           X(%a6),%d0
289         cmpil           #0,%d0
290         blts            EXPNEG
291
292         bclrb           #7,(%a0)                |t_ovfl expects positive value
293         bra             t_ovfl
294
295 EXPNEG:
296         bclrb           #7,(%a0)                |t_unfl expects positive value
297         bra             t_unfl
298
299         .global stentoxd
300 stentoxd:
301 |--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
302
303         fmovel          %d1,%fpcr               | ...set user's rounding mode/precision
304         fmoves          #0x3F800000,%fp0  | ...RETURN 1 + X
305         movel           (%a0),%d0
306         orl             #0x00800001,%d0
307         fadds           %d0,%fp0
308         bra             t_frcinx
309
310         .global stentox
311 stentox:
312 |--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
313         fmovemx (%a0),%fp0-%fp0 | ...LOAD INPUT, do not set cc's
314
315         movel           (%a0),%d0
316         movew           4(%a0),%d0
317         fmovex          %fp0,X(%a6)
318         andil           #0x7FFFFFFF,%d0
319
320         cmpil           #0x3FB98000,%d0         | ...|X| >= 2**(-70)?
321         bges            TENOK1
322         bra             EXPBORS
323
324 TENOK1:
325         cmpil           #0x400B9B07,%d0         | ...|X| <= 16480*log2/log10 ?
326         bles            TENMAIN
327         bra             EXPBORS
328
329 TENMAIN:
330 |--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
331
332         fmovex          %fp0,%fp1
333         fmuld           L2TEN64,%fp1    | ...X*64*LOG10/LOG2
334
335         fmovel          %fp1,N(%a6)             | ...N=INT(X*64*LOG10/LOG2)
336         movel           %d2,-(%sp)
337         lea             EXPTBL,%a1      | ...LOAD ADDRESS OF TABLE OF 2^(J/64)
338         fmovel          N(%a6),%fp1             | ...N --> FLOATING FMT
339         movel           N(%a6),%d0
340         movel           %d0,%d2
341         andil           #0x3F,%d0               | ...D0 IS J
342         asll            #4,%d0          | ...DISPLACEMENT FOR 2^(J/64)
343         addal           %d0,%a1         | ...ADDRESS FOR 2^(J/64)
344         asrl            #6,%d2          | ...d2 IS L, N = 64L + J
345         movel           %d2,%d0
346         asrl            #1,%d0          | ...D0 IS M
347         subl            %d0,%d2         | ...d2 IS M', N = 64(M+M') + J
348         addil           #0x3FFF,%d2
349         movew           %d2,ADJFACT(%a6)        | ...ADJFACT IS 2^(M')
350         movel           (%sp)+,%d2
351
352 |--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
353 |--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
354 |--ADJFACT = 2^(M').
355 |--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
356
357         fmovex          %fp1,%fp2
358
359         fmuld           L10TWO1,%fp1    | ...N*(LOG2/64LOG10)_LEAD
360         movel           (%a1)+,FACT1(%a6)
361
362         fmulx           L10TWO2,%fp2    | ...N*(LOG2/64LOG10)_TRAIL
363
364         movel           (%a1)+,FACT1HI(%a6)
365         movel           (%a1)+,FACT1LOW(%a6)
366         fsubx           %fp1,%fp0               | ...X - N L_LEAD
367         movew           (%a1)+,FACT2(%a6)
368
369         fsubx           %fp2,%fp0               | ...X - N L_TRAIL
370
371         clrw            FACT2+2(%a6)
372         movew           (%a1)+,FACT2HI(%a6)
373         clrw            FACT2HI+2(%a6)
374         clrl            FACT2LOW(%a6)
375
376         fmulx           LOG10,%fp0      | ...FP0 IS R
377
378         addw            %d0,FACT1(%a6)
379         addw            %d0,FACT2(%a6)
380
381 expr:
382 |--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
383 |--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
384 |--FP0 IS R. THE FOLLOWING CODE COMPUTES
385 |--     2**(M'+M) * 2**(J/64) * EXP(R)
386
387         fmovex          %fp0,%fp1
388         fmulx           %fp1,%fp1               | ...FP1 IS S = R*R
389
390         fmoved          EXPA5,%fp2      | ...FP2 IS A5
391         fmoved          EXPA4,%fp3      | ...FP3 IS A4
392
393         fmulx           %fp1,%fp2               | ...FP2 IS S*A5
394         fmulx           %fp1,%fp3               | ...FP3 IS S*A4
395
396         faddd           EXPA3,%fp2      | ...FP2 IS A3+S*A5
397         faddd           EXPA2,%fp3      | ...FP3 IS A2+S*A4
398
399         fmulx           %fp1,%fp2               | ...FP2 IS S*(A3+S*A5)
400         fmulx           %fp1,%fp3               | ...FP3 IS S*(A2+S*A4)
401
402         faddd           EXPA1,%fp2      | ...FP2 IS A1+S*(A3+S*A5)
403         fmulx           %fp0,%fp3               | ...FP3 IS R*S*(A2+S*A4)
404
405         fmulx           %fp1,%fp2               | ...FP2 IS S*(A1+S*(A3+S*A5))
406         faddx           %fp3,%fp0               | ...FP0 IS R+R*S*(A2+S*A4)
407
408         faddx           %fp2,%fp0               | ...FP0 IS EXP(R) - 1
409
410
411 |--FINAL RECONSTRUCTION PROCESS
412 |--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
413
414         fmulx           FACT1(%a6),%fp0
415         faddx           FACT2(%a6),%fp0
416         faddx           FACT1(%a6),%fp0
417
418         fmovel          %d1,%FPCR               |restore users exceptions
419         clrw            ADJFACT+2(%a6)
420         movel           #0x80000000,ADJFACT+4(%a6)
421         clrl            ADJFACT+8(%a6)
422         fmulx           ADJFACT(%a6),%fp0       | ...FINAL ADJUSTMENT
423
424         bra             t_frcinx
425
426         |end