2 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 * Copyright (C) 1995-2001 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14 * r0 = src, r1 = dst, r2 = len, r3 = sum
15 * Returns : r0 = checksum
17 * Note that 'tst' and 'teq' preserve the carry flag.
29 * Align an unaligned destination pointer. We know that
30 * we have >= 8 bytes here, so we don't need to check
31 * the length. Note that the source pointer hasn't been
40 adcs sum, sum, ip, put_byte_1 @ update checksum
43 moveq pc, lr @ dst is now 32bit aligned
45 .Ldst_16bit: load2b r8, ip
47 adcs sum, sum, r8, put_byte_0
49 adcs sum, sum, ip, put_byte_1
51 mov pc, lr @ dst is now 32bit aligned
54 * Handle 0 to 7 bytes, with any alignment of source and
55 * destination pointers. Note that when we get here, C = 0
57 .Lless8: teq len, #0 @ check for zero count
60 /* we must have at least one byte. */
61 tst dst, #1 @ dst 16-bit aligned
67 adcs sum, sum, ip, put_byte_1 @ update checksum
74 adcs sum, sum, r8, put_byte_0
76 adcs sum, sum, ip, put_byte_1
85 adcs sum, sum, r8, put_byte_0 @ update checksum
92 cmp len, #8 @ Ensure that we have at least
93 blo .Lless8 @ 8 bytes to copy.
95 adds sum, sum, #0 @ C = 0
96 tst dst, #3 @ Test destination alignment
97 blne .Ldst_unaligned @ align destination, return here
100 * Ok, the dst pointer is now 32bit aligned, and we know
101 * that we must have more than 4 bytes to copy. Note
102 * that C contains the carry from the dst alignment above.
105 tst src, #3 @ Test source alignment
106 bne .Lsrc_not_aligned
108 /* Routine for src & dst aligned */
113 1: load4l r4, r5, r6, r7
114 stmia dst!, {r4, r5, r6, r7}
142 mov r5, r4, get_byte_0
144 adcs sum, sum, r4, push #16
146 mov r5, r4, get_byte_1
148 mov r5, r4, get_byte_2
152 adcnes sum, sum, r5, put_byte_0
155 * If the dst pointer was not 16-bit aligned, we
156 * need to rotate the checksum here to get around
157 * the inefficient byte manipulations in the
158 * architecture independent code.
160 .Ldone: adc r0, sum, #0
161 ldr sum, [sp, #0] @ dst
167 adc sum, sum, #0 @ include C from dst alignment
174 mov r4, r5, pull #8 @ C = 0
177 1: load4l r5, r6, r7, r8
178 orr r4, r4, r5, push #24
180 orr r5, r5, r6, push #24
182 orr r6, r6, r7, push #24
184 orr r7, r7, r8, push #24
185 stmia dst!, {r4, r5, r6, r7}
199 orr r4, r4, r5, push #24
201 orr r5, r5, r6, push #24
209 orr r4, r4, r5, push #24
215 mov r5, r4, get_byte_0
218 adcs sum, sum, r4, push #16
220 mov r5, r4, get_byte_1
222 mov r5, r4, get_byte_2
225 .Lsrc2_aligned: mov r4, r5, pull #16
229 1: load4l r5, r6, r7, r8
230 orr r4, r4, r5, push #16
232 orr r5, r5, r6, push #16
234 orr r6, r6, r7, push #16
236 orr r7, r7, r8, push #16
237 stmia dst!, {r4, r5, r6, r7}
251 orr r4, r4, r5, push #16
253 orr r5, r5, r6, push #16
261 orr r4, r4, r5, push #16
267 mov r5, r4, get_byte_0
272 mov r5, r4, get_byte_1
279 .Lsrc3_aligned: mov r4, r5, pull #24
283 1: load4l r5, r6, r7, r8
284 orr r4, r4, r5, push #8
286 orr r5, r5, r6, push #8
288 orr r6, r6, r7, push #8
290 orr r7, r7, r8, push #8
291 stmia dst!, {r4, r5, r6, r7}
305 orr r4, r4, r5, push #8
307 orr r5, r5, r6, push #8
315 orr r4, r4, r5, push #8
321 mov r5, r4, get_byte_0
327 mov r5, r4, get_byte_0
329 adcs sum, sum, r4, push #24
330 mov r5, r4, get_byte_1