git.oblomov.eu Git - linux-2.6/blob - arch/arm/lib/lib1funcs.S

   1 /*
   2  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
   3  *
   4  * Author: Nicolas Pitre <nico@cam.org>
   5  *   - contributed to gcc-3.4 on Sep 30, 2003
   6  *   - adapted for the Linux kernel on Oct 2, 2003
   7  */
   8
   9 /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
  10
  11 This file is free software; you can redistribute it and/or modify it
  12 under the terms of the GNU General Public License as published by the
  13 Free Software Foundation; either version 2, or (at your option) any
  14 later version.
  15
  16 In addition to the permissions in the GNU General Public License, the
  17 Free Software Foundation gives you unlimited permission to link the
  18 compiled version of this file into combinations with other programs,
  19 and to distribute those combinations without any restriction coming
  20 from the use of this file.  (The General Public License restrictions
  21 do apply in other respects; for example, they cover modification of
  22 the file, and distribution when not linked into a combine
  23 executable.)
  24
  25 This file is distributed in the hope that it will be useful, but
  26 WITHOUT ANY WARRANTY; without even the implied warranty of
  27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  28 General Public License for more details.
  29
  30 You should have received a copy of the GNU General Public License
  31 along with this program; see the file COPYING.  If not, write to
  32 the Free Software Foundation, 59 Temple Place - Suite 330,
  33 Boston, MA 02111-1307, USA.  */
  34
  35
  36 #include <linux/linkage.h>
  37 #include <asm/assembler.h>
  38
  39
  40 .macro ARM_DIV_BODY dividend, divisor, result, curbit
  41
  42 #if __LINUX_ARM_ARCH__ >= 5
  43
  44         clz     \curbit, \divisor
  45         clz     \result, \dividend
  46         sub     \result, \curbit, \result
  47         mov     \curbit, #1
  48         mov     \divisor, \divisor, lsl \result
  49         mov     \curbit, \curbit, lsl \result
  50         mov     \result, #0
  51
  52 #else
  53
  54         @ Initially shift the divisor left 3 bits if possible,
  55         @ set curbit accordingly.  This allows for curbit to be located
  56         @ at the left end of each 4 bit nibbles in the division loop
  57         @ to save one loop in most cases.
  58         tst     \divisor, #0xe0000000
  59         moveq   \divisor, \divisor, lsl #3
  60         moveq   \curbit, #8
  61         movne   \curbit, #1
  62
  63         @ Unless the divisor is very big, shift it up in multiples of
  64         @ four bits, since this is the amount of unwinding in the main
  65         @ division loop.  Continue shifting until the divisor is
  66         @ larger than the dividend.
  67 1:      cmp     \divisor, #0x10000000
  68         cmplo   \divisor, \dividend
  69         movlo   \divisor, \divisor, lsl #4
  70         movlo   \curbit, \curbit, lsl #4
  71         blo     1b
  72
  73         @ For very big divisors, we must shift it a bit at a time, or
  74         @ we will be in danger of overflowing.
  75 1:      cmp     \divisor, #0x80000000
  76         cmplo   \divisor, \dividend
  77         movlo   \divisor, \divisor, lsl #1
  78         movlo   \curbit, \curbit, lsl #1
  79         blo     1b
  80
  81         mov     \result, #0
  82
  83 #endif
  84
  85         @ Division loop
  86 1:      cmp     \dividend, \divisor
  87         subhs   \dividend, \dividend, \divisor
  88         orrhs   \result,   \result,   \curbit
  89         cmp     \dividend, \divisor,  lsr #1
  90         subhs   \dividend, \dividend, \divisor, lsr #1
  91         orrhs   \result,   \result,   \curbit,  lsr #1
  92         cmp     \dividend, \divisor,  lsr #2
  93         subhs   \dividend, \dividend, \divisor, lsr #2
  94         orrhs   \result,   \result,   \curbit,  lsr #2
  95         cmp     \dividend, \divisor,  lsr #3
  96         subhs   \dividend, \dividend, \divisor, lsr #3
  97         orrhs   \result,   \result,   \curbit,  lsr #3
  98         cmp     \dividend, #0                   @ Early termination?
  99         movnes  \curbit,   \curbit,  lsr #4     @ No, any more bits to do?
 100         movne   \divisor,  \divisor, lsr #4
 101         bne     1b
 102
 103 .endm
 104
 105
 106 .macro ARM_DIV2_ORDER divisor, order
 107
 108 #if __LINUX_ARM_ARCH__ >= 5
 109
 110         clz     \order, \divisor
 111         rsb     \order, \order, #31
 112
 113 #else
 114
 115         cmp     \divisor, #(1 << 16)
 116         movhs   \divisor, \divisor, lsr #16
 117         movhs   \order, #16
 118         movlo   \order, #0
 119
 120         cmp     \divisor, #(1 << 8)
 121         movhs   \divisor, \divisor, lsr #8
 122         addhs   \order, \order, #8
 123
 124         cmp     \divisor, #(1 << 4)
 125         movhs   \divisor, \divisor, lsr #4
 126         addhs   \order, \order, #4
 127
 128         cmp     \divisor, #(1 << 2)
 129         addhi   \order, \order, #3
 130         addls   \order, \order, \divisor, lsr #1
 131
 132 #endif
 133
 134 .endm
 135
 136
 137 .macro ARM_MOD_BODY dividend, divisor, order, spare
 138
 139 #if __LINUX_ARM_ARCH__ >= 5
 140
 141         clz     \order, \divisor
 142         clz     \spare, \dividend
 143         sub     \order, \order, \spare
 144         mov     \divisor, \divisor, lsl \order
 145
 146 #else
 147
 148         mov     \order, #0
 149
 150         @ Unless the divisor is very big, shift it up in multiples of
 151         @ four bits, since this is the amount of unwinding in the main
 152         @ division loop.  Continue shifting until the divisor is
 153         @ larger than the dividend.
 154 1:      cmp     \divisor, #0x10000000
 155         cmplo   \divisor, \dividend
 156         movlo   \divisor, \divisor, lsl #4
 157         addlo   \order, \order, #4
 158         blo     1b
 159
 160         @ For very big divisors, we must shift it a bit at a time, or
 161         @ we will be in danger of overflowing.
 162 1:      cmp     \divisor, #0x80000000
 163         cmplo   \divisor, \dividend
 164         movlo   \divisor, \divisor, lsl #1
 165         addlo   \order, \order, #1
 166         blo     1b
 167
 168 #endif
 169
 170         @ Perform all needed substractions to keep only the reminder.
 171         @ Do comparisons in batch of 4 first.
 172         subs    \order, \order, #3              @ yes, 3 is intended here
 173         blt     2f
 174
 175 1:      cmp     \dividend, \divisor
 176         subhs   \dividend, \dividend, \divisor
 177         cmp     \dividend, \divisor,  lsr #1
 178         subhs   \dividend, \dividend, \divisor, lsr #1
 179         cmp     \dividend, \divisor,  lsr #2
 180         subhs   \dividend, \dividend, \divisor, lsr #2
 181         cmp     \dividend, \divisor,  lsr #3
 182         subhs   \dividend, \dividend, \divisor, lsr #3
 183         cmp     \dividend, #1
 184         mov     \divisor, \divisor, lsr #4
 185         subges  \order, \order, #4
 186         bge     1b
 187
 188         tst     \order, #3
 189         teqne   \dividend, #0
 190         beq     5f
 191
 192         @ Either 1, 2 or 3 comparison/substractions are left.
 193 2:      cmn     \order, #2
 194         blt     4f
 195         beq     3f
 196         cmp     \dividend, \divisor
 197         subhs   \dividend, \dividend, \divisor
 198         mov     \divisor,  \divisor,  lsr #1
 199 3:      cmp     \dividend, \divisor
 200         subhs   \dividend, \dividend, \divisor
 201         mov     \divisor,  \divisor,  lsr #1
 202 4:      cmp     \dividend, \divisor
 203         subhs   \dividend, \dividend, \divisor
 204 5:
 205 .endm
 206
 207
 208 ENTRY(__udivsi3)
 209
 210         subs    r2, r1, #1
 211         moveq   pc, lr
 212         bcc     Ldiv0
 213         cmp     r0, r1
 214         bls     11f
 215         tst     r1, r2
 216         beq     12f
 217
 218         ARM_DIV_BODY r0, r1, r2, r3
 219
 220         mov     r0, r2
 221         mov     pc, lr
 222
 223 11:     moveq   r0, #1
 224         movne   r0, #0
 225         mov     pc, lr
 226
 227 12:     ARM_DIV2_ORDER r1, r2
 228
 229         mov     r0, r0, lsr r2
 230         mov     pc, lr
 231
 232
 233 ENTRY(__umodsi3)
 234
 235         subs    r2, r1, #1                      @ compare divisor with 1
 236         bcc     Ldiv0
 237         cmpne   r0, r1                          @ compare dividend with divisor
 238         moveq   r0, #0
 239         tsthi   r1, r2                          @ see if divisor is power of 2
 240         andeq   r0, r0, r2
 241         movls   pc, lr
 242
 243         ARM_MOD_BODY r0, r1, r2, r3
 244
 245         mov     pc, lr
 246
 247
 248 ENTRY(__divsi3)
 249
 250         cmp     r1, #0
 251         eor     ip, r0, r1                      @ save the sign of the result.
 252         beq     Ldiv0
 253         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 254         subs    r2, r1, #1                      @ division by 1 or -1 ?
 255         beq     10f
 256         movs    r3, r0
 257         rsbmi   r3, r0, #0                      @ positive dividend value
 258         cmp     r3, r1
 259         bls     11f
 260         tst     r1, r2                          @ divisor is power of 2 ?
 261         beq     12f
 262
 263         ARM_DIV_BODY r3, r1, r0, r2
 264
 265         cmp     ip, #0
 266         rsbmi   r0, r0, #0
 267         mov     pc, lr
 268
 269 10:     teq     ip, r0                          @ same sign ?
 270         rsbmi   r0, r0, #0
 271         mov     pc, lr
 272
 273 11:     movlo   r0, #0
 274         moveq   r0, ip, asr #31
 275         orreq   r0, r0, #1
 276         mov     pc, lr
 277
 278 12:     ARM_DIV2_ORDER r1, r2
 279
 280         cmp     ip, #0
 281         mov     r0, r3, lsr r2
 282         rsbmi   r0, r0, #0
 283         mov     pc, lr
 284
 285
 286 ENTRY(__modsi3)
 287
 288         cmp     r1, #0
 289         beq     Ldiv0
 290         rsbmi   r1, r1, #0                      @ loops below use unsigned.
 291         movs    ip, r0                          @ preserve sign of dividend
 292         rsbmi   r0, r0, #0                      @ if negative make positive
 293         subs    r2, r1, #1                      @ compare divisor with 1
 294         cmpne   r0, r1                          @ compare dividend with divisor
 295         moveq   r0, #0
 296         tsthi   r1, r2                          @ see if divisor is power of 2
 297         andeq   r0, r0, r2
 298         bls     10f
 299
 300         ARM_MOD_BODY r0, r1, r2, r3
 301
 302 10:     cmp     ip, #0
 303         rsbmi   r0, r0, #0
 304         mov     pc, lr
 305
 306
 307 Ldiv0:
 308
 309         str     lr, [sp, #-4]!
 310         bl      __div0
 311         mov     r0, #0                  @ About as wrong as it could be.
 312         ldr     pc, [sp], #4
 313
 314