2  *  linux/arch/arm/lib/csumpartialcopygeneric.S
 
   4  *  Copyright (C) 1995-2001 Russell King
 
   6  * This program is free software; you can redistribute it and/or modify
 
   7  * it under the terms of the GNU General Public License version 2 as
 
   8  * published by the Free Software Foundation.
 
  13  * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
 
  14  *  r0 = src, r1 = dst, r2 = len, r3 = sum
 
  15  *  Returns : r0 = checksum
 
  17  * Note that 'tst' and 'teq' preserve the carry flag.
 
  29                  * Align an unaligned destination pointer.  We know that
 
  30                  * we have >= 8 bytes here, so we don't need to check
 
  31                  * the length.  Note that the source pointer hasn't been
 
  40                 adcs    sum, sum, ip, put_byte_1        @ update checksum
 
  43                 moveq   pc, lr                  @ dst is now 32bit aligned
 
  45 .Ldst_16bit:    load2b  r8, ip
 
  47                 adcs    sum, sum, r8, put_byte_0
 
  49                 adcs    sum, sum, ip, put_byte_1
 
  51                 mov     pc, lr                  @ dst is now 32bit aligned
 
  54                  * Handle 0 to 7 bytes, with any alignment of source and
 
  55                  * destination pointers.  Note that when we get here, C = 0
 
  57 .Lless8:        teq     len, #0                 @ check for zero count
 
  60                 /* we must have at least one byte. */
 
  61                 tst     dst, #1                 @ dst 16-bit aligned
 
  67                 adcs    sum, sum, ip, put_byte_1        @ update checksum
 
  74                 adcs    sum, sum, r8, put_byte_0
 
  76                 adcs    sum, sum, ip, put_byte_1
 
  85                 adcs    sum, sum, r8, put_byte_0        @ update checksum
 
  92                 cmp     len, #8                 @ Ensure that we have at least
 
  93                 blo     .Lless8                 @ 8 bytes to copy.
 
  95                 adds    sum, sum, #0            @ C = 0
 
  96                 tst     dst, #3                 @ Test destination alignment
 
  97                 blne    .Ldst_unaligned         @ align destination, return here
 
 100                  * Ok, the dst pointer is now 32bit aligned, and we know
 
 101                  * that we must have more than 4 bytes to copy.  Note
 
 102                  * that C contains the carry from the dst alignment above.
 
 105                 tst     src, #3                 @ Test source alignment
 
 106                 bne     .Lsrc_not_aligned
 
 108                 /* Routine for src & dst aligned */
 
 113 1:              load4l  r4, r5, r6, r7
 
 114                 stmia   dst!, {r4, r5, r6, r7}
 
 142                 mov     r5, r4, get_byte_0
 
 144                 adcs    sum, sum, r4, push #16
 
 146                 mov     r5, r4, get_byte_1
 
 148                 mov     r5, r4, get_byte_2
 
 152                 adcnes  sum, sum, r5, put_byte_0
 
 155                  * If the dst pointer was not 16-bit aligned, we
 
 156                  * need to rotate the checksum here to get around
 
 157                  * the inefficient byte manipulations in the
 
 158                  * architecture independent code.
 
 160 .Ldone:         adc     r0, sum, #0
 
 161                 ldr     sum, [sp, #0]           @ dst
 
 167                 adc     sum, sum, #0            @ include C from dst alignment
 
 174                 mov     r4, r5, pull #8         @ C = 0
 
 177 1:              load4l  r5, r6, r7, r8
 
 178                 orr     r4, r4, r5, push #24
 
 180                 orr     r5, r5, r6, push #24
 
 182                 orr     r6, r6, r7, push #24
 
 184                 orr     r7, r7, r8, push #24
 
 185                 stmia   dst!, {r4, r5, r6, r7}
 
 199                 orr     r4, r4, r5, push #24
 
 201                 orr     r5, r5, r6, push #24
 
 209                 orr     r4, r4, r5, push #24
 
 215                 mov     r5, r4, get_byte_0
 
 218                 adcs    sum, sum, r4, push #16
 
 220                 mov     r5, r4, get_byte_1
 
 222                 mov     r5, r4, get_byte_2
 
 225 .Lsrc2_aligned: mov     r4, r5, pull #16
 
 229 1:              load4l  r5, r6, r7, r8
 
 230                 orr     r4, r4, r5, push #16
 
 232                 orr     r5, r5, r6, push #16
 
 234                 orr     r6, r6, r7, push #16
 
 236                 orr     r7, r7, r8, push #16
 
 237                 stmia   dst!, {r4, r5, r6, r7}
 
 251                 orr     r4, r4, r5, push #16
 
 253                 orr     r5, r5, r6, push #16
 
 261                 orr     r4, r4, r5, push #16
 
 267                 mov     r5, r4, get_byte_0
 
 272                 mov     r5, r4, get_byte_1
 
 279 .Lsrc3_aligned: mov     r4, r5, pull #24
 
 283 1:              load4l  r5, r6, r7, r8
 
 284                 orr     r4, r4, r5, push #8
 
 286                 orr     r5, r5, r6, push #8
 
 288                 orr     r6, r6, r7, push #8
 
 290                 orr     r7, r7, r8, push #8
 
 291                 stmia   dst!, {r4, r5, r6, r7}
 
 305                 orr     r4, r4, r5, push #8
 
 307                 orr     r5, r5, r6, push #8
 
 315                 orr     r4, r4, r5, push #8
 
 321                 mov     r5, r4, get_byte_0
 
 327                 mov     r5, r4, get_byte_0
 
 329                 adcs    sum, sum, r4, push #24
 
 330                 mov     r5, r4, get_byte_1