Merge /scratch/Ksrc/linux-git/
[linux-2.6] / arch / arm / lib / memcpy.S
1 /*
2  *  linux/arch/arm/lib/memcpy.S
3  *
4  *  Copyright (C) 1995-1999 Russell King
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  *  ASM optimised string functions
11  */
12 #include <linux/linkage.h>
13 #include <asm/assembler.h>
14
15                 .text
16
17 #define ENTER   \
18                 mov     ip,sp   ;\
19                 stmfd   sp!,{r0,r4-r9,fp,ip,lr,pc}      ;\
20                 sub     fp,ip,#4
21
22 #define EXIT    \
23                 LOADREGS(ea, fp, {r0, r4 - r9, fp, sp, pc})
24
25 #define EXITEQ  \
26                 LOADREGS(eqea, fp, {r0, r4 - r9, fp, sp, pc})
27
28 /*
29  * Prototype: void memcpy(void *to,const void *from,unsigned long n);
30  */
31 ENTRY(memcpy)
32 ENTRY(memmove)
33                 ENTER
34                 cmp     r1, r0
35                 bcc     23f
36                 subs    r2, r2, #4
37                 blt     6f
38         PLD(    pld     [r1, #0]                )
39                 ands    ip, r0, #3
40                 bne     7f
41                 ands    ip, r1, #3
42                 bne     8f
43
44 1:              subs    r2, r2, #8
45                 blt     5f
46                 subs    r2, r2, #20
47                 blt     4f
48         PLD(    pld     [r1, #28]               )
49         PLD(    subs    r2, r2, #64             )
50         PLD(    blt     3f                      )
51 2:      PLD(    pld     [r1, #60]               )
52         PLD(    pld     [r1, #92]               )
53                 ldmia   r1!, {r3 - r9, ip}
54                 subs    r2, r2, #32
55                 stmgeia r0!, {r3 - r9, ip}
56                 ldmgeia r1!, {r3 - r9, ip}
57                 subges  r2, r2, #32
58                 stmia   r0!, {r3 - r9, ip}
59                 bge     2b
60 3:      PLD(    ldmia   r1!, {r3 - r9, ip}      )
61         PLD(    adds    r2, r2, #32             )
62         PLD(    stmgeia r0!, {r3 - r9, ip}      )
63         PLD(    ldmgeia r1!, {r3 - r9, ip}      )
64         PLD(    subges  r2, r2, #32             )
65         PLD(    stmia   r0!, {r3 - r9, ip}      )
66 4:              cmn     r2, #16
67                 ldmgeia r1!, {r3 - r6}
68                 subge   r2, r2, #16
69                 stmgeia r0!, {r3 - r6}
70                 adds    r2, r2, #20
71                 ldmgeia r1!, {r3 - r5}
72                 subge   r2, r2, #12
73                 stmgeia r0!, {r3 - r5}
74 5:              adds    r2, r2, #8
75                 blt     6f
76                 subs    r2, r2, #4
77                 ldrlt   r3, [r1], #4
78                 ldmgeia r1!, {r4, r5}
79                 subge   r2, r2, #4
80                 strlt   r3, [r0], #4
81                 stmgeia r0!, {r4, r5}
82
83 6:              adds    r2, r2, #4
84                 EXITEQ
85                 cmp     r2, #2
86                 ldrb    r3, [r1], #1
87                 ldrgeb  r4, [r1], #1
88                 ldrgtb  r5, [r1], #1
89                 strb    r3, [r0], #1
90                 strgeb  r4, [r0], #1
91                 strgtb  r5, [r0], #1
92                 EXIT
93
94 7:              rsb     ip, ip, #4
95                 cmp     ip, #2
96                 ldrb    r3, [r1], #1
97                 ldrgeb  r4, [r1], #1
98                 ldrgtb  r5, [r1], #1
99                 strb    r3, [r0], #1
100                 strgeb  r4, [r0], #1
101                 strgtb  r5, [r0], #1
102                 subs    r2, r2, ip
103                 blt     6b
104                 ands    ip, r1, #3
105                 beq     1b
106
107 8:              bic     r1, r1, #3
108                 ldr     r7, [r1], #4
109                 cmp     ip, #2
110                 bgt     18f
111                 beq     13f
112                 cmp     r2, #12
113                 blt     11f
114         PLD(    pld     [r1, #12]               )
115                 sub     r2, r2, #12
116         PLD(    subs    r2, r2, #32             )
117         PLD(    blt     10f                     )
118         PLD(    pld     [r1, #28]               )
119 9:      PLD(    pld     [r1, #44]               )
120 10:             mov     r3, r7, pull #8
121                 ldmia   r1!, {r4 - r7}
122                 subs    r2, r2, #16
123                 orr     r3, r3, r4, push #24
124                 mov     r4, r4, pull #8
125                 orr     r4, r4, r5, push #24
126                 mov     r5, r5, pull #8
127                 orr     r5, r5, r6, push #24
128                 mov     r6, r6, pull #8
129                 orr     r6, r6, r7, push #24
130                 stmia   r0!, {r3 - r6}
131                 bge     9b
132         PLD(    cmn     r2, #32                 )
133         PLD(    bge     10b                     )
134         PLD(    add     r2, r2, #32             )
135                 adds    r2, r2, #12
136                 blt     12f
137 11:             mov     r3, r7, pull #8
138                 ldr     r7, [r1], #4
139                 subs    r2, r2, #4
140                 orr     r3, r3, r7, push #24
141                 str     r3, [r0], #4
142                 bge     11b
143 12:             sub     r1, r1, #3
144                 b       6b
145
146 13:             cmp     r2, #12
147                 blt     16f
148         PLD(    pld     [r1, #12]               )
149                 sub     r2, r2, #12
150         PLD(    subs    r2, r2, #32             )
151         PLD(    blt     15f                     )
152         PLD(    pld     [r1, #28]               )
153 14:     PLD(    pld     [r1, #44]               )
154 15:             mov     r3, r7, pull #16
155                 ldmia   r1!, {r4 - r7}
156                 subs    r2, r2, #16
157                 orr     r3, r3, r4, push #16
158                 mov     r4, r4, pull #16
159                 orr     r4, r4, r5, push #16
160                 mov     r5, r5, pull #16
161                 orr     r5, r5, r6, push #16
162                 mov     r6, r6, pull #16
163                 orr     r6, r6, r7, push #16
164                 stmia   r0!, {r3 - r6}
165                 bge     14b
166         PLD(    cmn     r2, #32                 )
167         PLD(    bge     15b                     )
168         PLD(    add     r2, r2, #32             )
169                 adds    r2, r2, #12
170                 blt     17f
171 16:             mov     r3, r7, pull #16
172                 ldr     r7, [r1], #4
173                 subs    r2, r2, #4
174                 orr     r3, r3, r7, push #16
175                 str     r3, [r0], #4
176                 bge     16b
177 17:             sub     r1, r1, #2
178                 b       6b
179
180 18:             cmp     r2, #12
181                 blt     21f
182         PLD(    pld     [r1, #12]               )
183                 sub     r2, r2, #12
184         PLD(    subs    r2, r2, #32             )
185         PLD(    blt     20f                     )
186         PLD(    pld     [r1, #28]               )
187 19:     PLD(    pld     [r1, #44]               )
188 20:             mov     r3, r7, pull #24
189                 ldmia   r1!, {r4 - r7}
190                 subs    r2, r2, #16
191                 orr     r3, r3, r4, push #8
192                 mov     r4, r4, pull #24
193                 orr     r4, r4, r5, push #8
194                 mov     r5, r5, pull #24
195                 orr     r5, r5, r6, push #8
196                 mov     r6, r6, pull #24
197                 orr     r6, r6, r7, push #8
198                 stmia   r0!, {r3 - r6}
199                 bge     19b
200         PLD(    cmn     r2, #32                 )
201         PLD(    bge     20b                     )
202         PLD(    add     r2, r2, #32             )
203                 adds    r2, r2, #12
204                 blt     22f
205 21:             mov     r3, r7, pull #24
206                 ldr     r7, [r1], #4
207                 subs    r2, r2, #4
208                 orr     r3, r3, r7, push #8
209                 str     r3, [r0], #4
210                 bge     21b
211 22:             sub     r1, r1, #1
212                 b       6b
213
214
215 23:             add     r1, r1, r2
216                 add     r0, r0, r2
217                 subs    r2, r2, #4
218                 blt     29f
219         PLD(    pld     [r1, #-4]               )
220                 ands    ip, r0, #3
221                 bne     30f
222                 ands    ip, r1, #3
223                 bne     31f
224
225 24:             subs    r2, r2, #8
226                 blt     28f
227                 subs    r2, r2, #20
228                 blt     27f
229         PLD(    pld     [r1, #-32]              )
230         PLD(    subs    r2, r2, #64             )
231         PLD(    blt     26f                     )
232 25:     PLD(    pld     [r1, #-64]              )
233         PLD(    pld     [r1, #-96]              )
234                 ldmdb   r1!, {r3 - r9, ip}
235                 subs    r2, r2, #32
236                 stmgedb r0!, {r3 - r9, ip}
237                 ldmgedb r1!, {r3 - r9, ip}
238                 subges  r2, r2, #32
239                 stmdb   r0!, {r3 - r9, ip}
240                 bge     25b
241 26:     PLD(    ldmdb   r1!, {r3 - r9, ip}      )
242         PLD(    adds    r2, r2, #32             )
243         PLD(    stmgedb r0!, {r3 - r9, ip}      )
244         PLD(    ldmgedb r1!, {r3 - r9, ip}      )
245         PLD(    subges  r2, r2, #32             )
246         PLD(    stmdb   r0!, {r3 - r9, ip}      )
247 27:             cmn     r2, #16
248                 ldmgedb r1!, {r3 - r6}
249                 subge   r2, r2, #16
250                 stmgedb r0!, {r3 - r6}
251                 adds    r2, r2, #20
252                 ldmgedb r1!, {r3 - r5}
253                 subge   r2, r2, #12
254                 stmgedb r0!, {r3 - r5}
255 28:             adds    r2, r2, #8
256                 blt     29f
257                 subs    r2, r2, #4
258                 ldrlt   r3, [r1, #-4]!
259                 ldmgedb r1!, {r4, r5}
260                 subge   r2, r2, #4
261                 strlt   r3, [r0, #-4]!
262                 stmgedb r0!, {r4, r5}
263
264 29:             adds    r2, r2, #4
265                 EXITEQ
266                 cmp     r2, #2
267                 ldrb    r3, [r1, #-1]!
268                 ldrgeb  r4, [r1, #-1]!
269                 ldrgtb  r5, [r1, #-1]!
270                 strb    r3, [r0, #-1]!
271                 strgeb  r4, [r0, #-1]!
272                 strgtb  r5, [r0, #-1]!
273                 EXIT
274
275 30:             cmp     ip, #2
276                 ldrb    r3, [r1, #-1]!
277                 ldrgeb  r4, [r1, #-1]!
278                 ldrgtb  r5, [r1, #-1]!
279                 strb    r3, [r0, #-1]!
280                 strgeb  r4, [r0, #-1]!
281                 strgtb  r5, [r0, #-1]!
282                 subs    r2, r2, ip
283                 blt     29b
284                 ands    ip, r1, #3
285                 beq     24b
286
287 31:             bic     r1, r1, #3
288                 ldr     r3, [r1], #0
289                 cmp     ip, #2
290                 blt     41f
291                 beq     36f
292                 cmp     r2, #12
293                 blt     34f
294         PLD(    pld     [r1, #-16]              )
295                 sub     r2, r2, #12
296         PLD(    subs    r2, r2, #32             )
297         PLD(    blt     33f                     )
298         PLD(    pld     [r1, #-32]              )
299 32:     PLD(    pld     [r1, #-48]              )
300 33:             mov     r7, r3, push #8
301                 ldmdb   r1!, {r3, r4, r5, r6}
302                 subs    r2, r2, #16
303                 orr     r7, r7, r6, pull #24
304                 mov     r6, r6, push #8
305                 orr     r6, r6, r5, pull #24
306                 mov     r5, r5, push #8
307                 orr     r5, r5, r4, pull #24
308                 mov     r4, r4, push #8
309                 orr     r4, r4, r3, pull #24
310                 stmdb   r0!, {r4, r5, r6, r7}
311                 bge     32b
312         PLD(    cmn     r2, #32                 )
313         PLD(    bge     33b                     )
314         PLD(    add     r2, r2, #32             )
315                 adds    r2, r2, #12
316                 blt     35f
317 34:             mov     ip, r3, push #8
318                 ldr     r3, [r1, #-4]!
319                 subs    r2, r2, #4
320                 orr     ip, ip, r3, pull #24
321                 str     ip, [r0, #-4]!
322                 bge     34b
323 35:             add     r1, r1, #3
324                 b       29b
325
326 36:             cmp     r2, #12
327                 blt     39f
328         PLD(    pld     [r1, #-16]              )
329                 sub     r2, r2, #12
330         PLD(    subs    r2, r2, #32             )
331         PLD(    blt     38f                     )
332         PLD(    pld     [r1, #-32]              )
333 37:     PLD(    pld     [r1, #-48]              )
334 38:             mov     r7, r3, push #16
335                 ldmdb   r1!, {r3, r4, r5, r6}
336                 subs    r2, r2, #16
337                 orr     r7, r7, r6, pull #16
338                 mov     r6, r6, push #16
339                 orr     r6, r6, r5, pull #16
340                 mov     r5, r5, push #16
341                 orr     r5, r5, r4, pull #16
342                 mov     r4, r4, push #16
343                 orr     r4, r4, r3, pull #16
344                 stmdb   r0!, {r4, r5, r6, r7}
345                 bge     37b
346         PLD(    cmn     r2, #32                 )
347         PLD(    bge     38b                     )
348         PLD(    add     r2, r2, #32             )
349                 adds    r2, r2, #12
350                 blt     40f
351 39:             mov     ip, r3, push #16
352                 ldr     r3, [r1, #-4]!
353                 subs    r2, r2, #4
354                 orr     ip, ip, r3, pull #16
355                 str     ip, [r0, #-4]!
356                 bge     39b
357 40:             add     r1, r1, #2
358                 b       29b
359
360 41:             cmp     r2, #12
361                 blt     44f
362         PLD(    pld     [r1, #-16]              )
363                 sub     r2, r2, #12
364         PLD(    subs    r2, r2, #32             )
365         PLD(    blt     43f                     )
366         PLD(    pld     [r1, #-32]              )
367 42:     PLD(    pld     [r1, #-48]              )
368 43:             mov     r7, r3, push #24
369                 ldmdb   r1!, {r3, r4, r5, r6}
370                 subs    r2, r2, #16
371                 orr     r7, r7, r6, pull #8
372                 mov     r6, r6, push #24
373                 orr     r6, r6, r5, pull #8
374                 mov     r5, r5, push #24
375                 orr     r5, r5, r4, pull #8
376                 mov     r4, r4, push #24
377                 orr     r4, r4, r3, pull #8
378                 stmdb   r0!, {r4, r5, r6, r7}
379                 bge     42b
380         PLD(    cmn     r2, #32                 )
381         PLD(    bge     43b                     )
382         PLD(    add     r2, r2, #32             )
383                 adds    r2, r2, #12
384                 blt     45f
385 44:             mov     ip, r3, push #24
386                 ldr     r3, [r1, #-4]!
387                 subs    r2, r2, #4
388                 orr     ip, ip, r3, pull #8
389                 str     ip, [r0, #-4]!
390                 bge     44b
391 45:             add     r1, r1, #1
392                 b       29b
393