2 * linux/arch/arm26/lib/csumpartialcopygeneric.S
4 * Copyright (C) 1995-2001 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 * JMA 01/06/03 Commented out some shl0s; probobly irrelevant to arm26
16 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
17 * r0 = src, r1 = dst, r2 = len, r3 = sum
18 * Returns : r0 = checksum
20 * Note that 'tst' and 'teq' preserve the carry flag.
25 stmfd sp!, {r1, r4 - r8, fp, ip, lr, pc}
39 * Align an unaligned destination pointer. We know that
40 * we have >= 8 bytes here, so we don't need to check
41 * the length. Note that the source pointer hasn't been
44 .dst_unaligned: tst dst, #1
49 adcs sum, sum, ip, lsl #byte(1) @ update checksum
52 moveq pc, lr @ dst is now 32bit aligned
54 .dst_16bit: load2b r8, ip
56 adcs sum, sum, r8, lsl #byte(0)
58 adcs sum, sum, ip, lsl #byte(1)
60 mov pc, lr @ dst is now 32bit aligned
63 * Handle 0 to 7 bytes, with any alignment of source and
64 * destination pointers. Note that when we get here, C = 0
66 .less8: teq len, #0 @ check for zero count
69 /* we must have at least one byte. */
70 tst dst, #1 @ dst 16-bit aligned
76 adcs sum, sum, ip, lsl #byte(1) @ update checksum
83 adcs sum, sum, r8, lsl #byte(0)
85 adcs sum, sum, ip, lsl #byte(1)
87 .less8_aligned: tst len, #6
93 adcs sum, sum, r8, lsl #byte(0) @ update checksum
102 cmp len, #8 @ Ensure that we have at least
103 blo .less8 @ 8 bytes to copy.
105 adds sum, sum, #0 @ C = 0
106 tst dst, #3 @ Test destination alignment
107 blne .dst_unaligned @ align destination, return here
110 * Ok, the dst pointer is now 32bit aligned, and we know
111 * that we must have more than 4 bytes to copy. Note
112 * that C contains the carry from the dst alignment above.
115 tst src, #3 @ Test source alignment
118 /* Routine for src & dst aligned */
123 1: load4l r4, r5, r6, r7
124 stmia dst!, {r4, r5, r6, r7}
152 /* mov r5, r4, lsr #byte(0)
153 FIXME? 0 Shift anyhow!
156 adcs sum, sum, r4, push #16
158 mov r5, r4, lsr #byte(1)
160 mov r5, r4, lsr #byte(2)
164 adcnes sum, sum, r5, lsl #byte(0)
167 * If the dst pointer was not 16-bit aligned, we
168 * need to rotate the checksum here to get around
169 * the inefficient byte manipulations in the
170 * architecture independent code.
172 .done: adc r0, sum, #0
173 ldr sum, [sp, #0] @ dst
175 movne sum, r0, lsl #8
176 orrne r0, sum, r0, lsr #24
180 adc sum, sum, #0 @ include C from dst alignment
187 mov r4, r5, pull #8 @ C = 0
190 1: load4l r5, r6, r7, r8
191 orr r4, r4, r5, push #24
193 orr r5, r5, r6, push #24
195 orr r6, r6, r7, push #24
197 orr r7, r7, r8, push #24
198 stmia dst!, {r4, r5, r6, r7}
212 orr r4, r4, r5, push #24
214 orr r5, r5, r6, push #24
222 orr r4, r4, r5, push #24
228 /* mov r5, r4, lsr #byte(0)
229 FIXME? 0 Shift anyhow
233 adcs sum, sum, r4, push #16
235 mov r5, r4, lsr #byte(1)
237 mov r5, r4, lsr #byte(2)
240 .src2_aligned: mov r4, r5, pull #16
244 1: load4l r5, r6, r7, r8
245 orr r4, r4, r5, push #16
247 orr r5, r5, r6, push #16
249 orr r6, r6, r7, push #16
251 orr r7, r7, r8, push #16
252 stmia dst!, {r4, r5, r6, r7}
266 orr r4, r4, r5, push #16
268 orr r5, r5, r6, push #16
276 orr r4, r4, r5, push #16
282 /* mov r5, r4, lsr #byte(0)
283 FIXME? 0 Shift anyhow
289 mov r5, r4, lsr #byte(1)
296 .src3_aligned: mov r4, r5, pull #24
300 1: load4l r5, r6, r7, r8
301 orr r4, r4, r5, push #8
303 orr r5, r5, r6, push #8
305 orr r6, r6, r7, push #8
307 orr r7, r7, r8, push #8
308 stmia dst!, {r4, r5, r6, r7}
322 orr r4, r4, r5, push #8
324 orr r5, r5, r6, push #8
332 orr r4, r4, r5, push #8
338 /* mov r5, r4, lsr #byte(0)
339 FIXME? 0 Shift anyhow
346 /* mov r5, r4, lsr #byte(0)
347 FIXME? 0 Shift anyhow
350 adcs sum, sum, r4, push #24
351 mov r5, r4, lsr #byte(1)