git.oblomov.eu Git - linux-2.6/blob - arch/x86_64/lib/memcpy.S

   1 /* Copyright 2002 Andi Kleen */
   2
   3 #include <linux/linkage.h>
   4 #include <asm/dwarf2.h>
   5 #include <asm/cpufeature.h>
   6
   7 /*
   8  * memcpy - Copy a memory block.
   9  *
  10  * Input:
  11  * rdi destination
  12  * rsi source
  13  * rdx count
  14  *
  15  * Output:
  16  * rax original destination
  17  */
  18
  19         ALIGN
  20 memcpy_c:
  21         CFI_STARTPROC
  22         movq %rdi,%rax
  23         movl %edx,%ecx
  24         shrl $3,%ecx
  25         andl $7,%edx
  26         rep movsq
  27         movl %edx,%ecx
  28         rep movsb
  29         ret
  30         CFI_ENDPROC
  31 ENDPROC(memcpy_c)
  32
  33 ENTRY(__memcpy)
  34 ENTRY(memcpy)
  35         CFI_STARTPROC
  36         pushq %rbx
  37         CFI_ADJUST_CFA_OFFSET 8
  38         CFI_REL_OFFSET rbx, 0
  39         movq %rdi,%rax
  40
  41         movl %edx,%ecx
  42         shrl $6,%ecx
  43         jz .Lhandle_tail
  44
  45         .p2align 4
  46 .Lloop_64:
  47         decl %ecx
  48
  49         movq (%rsi),%r11
  50         movq 8(%rsi),%r8
  51
  52         movq %r11,(%rdi)
  53         movq %r8,1*8(%rdi)
  54
  55         movq 2*8(%rsi),%r9
  56         movq 3*8(%rsi),%r10
  57
  58         movq %r9,2*8(%rdi)
  59         movq %r10,3*8(%rdi)
  60
  61         movq 4*8(%rsi),%r11
  62         movq 5*8(%rsi),%r8
  63
  64         movq %r11,4*8(%rdi)
  65         movq %r8,5*8(%rdi)
  66
  67         movq 6*8(%rsi),%r9
  68         movq 7*8(%rsi),%r10
  69
  70         movq %r9,6*8(%rdi)
  71         movq %r10,7*8(%rdi)
  72
  73         leaq 64(%rsi),%rsi
  74         leaq 64(%rdi),%rdi
  75         jnz  .Lloop_64
  76
  77 .Lhandle_tail:
  78         movl %edx,%ecx
  79         andl $63,%ecx
  80         shrl $3,%ecx
  81         jz   .Lhandle_7
  82         .p2align 4
  83 .Lloop_8:
  84         decl %ecx
  85         movq (%rsi),%r8
  86         movq %r8,(%rdi)
  87         leaq 8(%rdi),%rdi
  88         leaq 8(%rsi),%rsi
  89         jnz  .Lloop_8
  90
  91 .Lhandle_7:
  92         movl %edx,%ecx
  93         andl $7,%ecx
  94         jz .Lende
  95         .p2align 4
  96 .Lloop_1:
  97         movb (%rsi),%r8b
  98         movb %r8b,(%rdi)
  99         incq %rdi
 100         incq %rsi
 101         decl %ecx
 102         jnz .Lloop_1
 103
 104 .Lende:
 105         popq %rbx
 106         CFI_ADJUST_CFA_OFFSET -8
 107         CFI_RESTORE rbx
 108         ret
 109 .Lfinal:
 110         CFI_ENDPROC
 111 ENDPROC(memcpy)
 112 ENDPROC(__memcpy)
 113
 114         /* Some CPUs run faster using the string copy instructions.
 115            It is also a lot simpler. Use this when possible */
 116
 117         .section .altinstr_replacement,"ax"
 118 1:      .byte 0xeb                              /* jmp <disp8> */
 119         .byte (memcpy_c - memcpy) - (2f - 1b)   /* offset */
 120 2:
 121         .previous
 122         .section .altinstructions,"a"
 123         .align 8
 124         .quad memcpy
 125         .quad 1b
 126         .byte X86_FEATURE_REP_GOOD
 127         .byte .Lfinal - memcpy
 128         .byte 2b - 1b
 129         .previous