git.oblomov.eu Git - linux-2.6/blob - arch/x86_64/lib/copy_page.S

   1 /* Written 2003 by Andi Kleen, based on a kernel by Evandro Menezes */
   2
   3 #include <linux/config.h>
   4 #include <linux/linkage.h>
   5 #include <asm/dwarf2.h>
   6
   7         ALIGN
   8 copy_page_c:
   9         CFI_STARTPROC
  10         movl $4096/8,%ecx
  11         rep movsq
  12         ret
  13         CFI_ENDPROC
  14 ENDPROC(copy_page_c)
  15
  16 /* Don't use streaming store because it's better when the target
  17    ends up in cache. */
  18
  19 /* Could vary the prefetch distance based on SMP/UP */
  20
  21 ENTRY(copy_page)
  22         CFI_STARTPROC
  23         subq    $3*8,%rsp
  24         CFI_ADJUST_CFA_OFFSET 3*8
  25         movq    %rbx,(%rsp)
  26         CFI_REL_OFFSET rbx, 0
  27         movq    %r12,1*8(%rsp)
  28         CFI_REL_OFFSET r12, 1*8
  29         movq    %r13,2*8(%rsp)
  30         CFI_REL_OFFSET r13, 2*8
  31
  32         movl    $(4096/64)-5,%ecx
  33         .p2align 4
  34 .Loop64:
  35         dec     %rcx
  36
  37         movq        (%rsi), %rax
  38         movq      8 (%rsi), %rbx
  39         movq     16 (%rsi), %rdx
  40         movq     24 (%rsi), %r8
  41         movq     32 (%rsi), %r9
  42         movq     40 (%rsi), %r10
  43         movq     48 (%rsi), %r11
  44         movq     56 (%rsi), %r12
  45
  46         prefetcht0 5*64(%rsi)
  47
  48         movq     %rax,    (%rdi)
  49         movq     %rbx,  8 (%rdi)
  50         movq     %rdx, 16 (%rdi)
  51         movq     %r8,  24 (%rdi)
  52         movq     %r9,  32 (%rdi)
  53         movq     %r10, 40 (%rdi)
  54         movq     %r11, 48 (%rdi)
  55         movq     %r12, 56 (%rdi)
  56
  57         leaq    64 (%rsi), %rsi
  58         leaq    64 (%rdi), %rdi
  59
  60         jnz     .Loop64
  61
  62         movl    $5,%ecx
  63         .p2align 4
  64 .Loop2:
  65         decl   %ecx
  66
  67         movq        (%rsi), %rax
  68         movq      8 (%rsi), %rbx
  69         movq     16 (%rsi), %rdx
  70         movq     24 (%rsi), %r8
  71         movq     32 (%rsi), %r9
  72         movq     40 (%rsi), %r10
  73         movq     48 (%rsi), %r11
  74         movq     56 (%rsi), %r12
  75
  76         movq     %rax,    (%rdi)
  77         movq     %rbx,  8 (%rdi)
  78         movq     %rdx, 16 (%rdi)
  79         movq     %r8,  24 (%rdi)
  80         movq     %r9,  32 (%rdi)
  81         movq     %r10, 40 (%rdi)
  82         movq     %r11, 48 (%rdi)
  83         movq     %r12, 56 (%rdi)
  84
  85         leaq    64(%rdi),%rdi
  86         leaq    64(%rsi),%rsi
  87
  88         jnz     .Loop2
  89
  90         movq    (%rsp),%rbx
  91         CFI_RESTORE rbx
  92         movq    1*8(%rsp),%r12
  93         CFI_RESTORE r12
  94         movq    2*8(%rsp),%r13
  95         CFI_RESTORE r13
  96         addq    $3*8,%rsp
  97         CFI_ADJUST_CFA_OFFSET -3*8
  98         ret
  99 .Lcopy_page_end:
 100         CFI_ENDPROC
 101 ENDPROC(copy_page)
 102
 103         /* Some CPUs run faster using the string copy instructions.
 104            It is also a lot simpler. Use this when possible */
 105
 106 #include <asm/cpufeature.h>
 107
 108         .section .altinstr_replacement,"ax"
 109 1:      .byte 0xeb                                      /* jmp <disp8> */
 110         .byte (copy_page_c - copy_page) - (2f - 1b)     /* offset */
 111 2:
 112         .previous
 113         .section .altinstructions,"a"
 114         .align 8
 115         .quad copy_page
 116         .quad 1b
 117         .byte X86_FEATURE_REP_GOOD
 118         .byte .Lcopy_page_end - copy_page
 119         .byte 2b - 1b
 120         .previous