git.oblomov.eu Git - linux-2.6/blob - arch/x86/lib/copy_user_nocache_64.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #include <linux/linkage.h>
   8 #include <asm/dwarf2.h>
   9
  10 #define FIX_ALIGNMENT 1
  11
  12 #include <asm/current.h>
  13 #include <asm/asm-offsets.h>
  14 #include <asm/thread_info.h>
  15 #include <asm/cpufeature.h>
  16
  17 /*
  18  * copy_user_nocache - Uncached memory copy with exception handling
  19  * This will force destination/source out of cache for more performance.
  20  *
  21  * Input:
  22  * rdi destination
  23  * rsi source
  24  * rdx count
  25  * rcx zero flag        when 1 zero on exception
  26  *
  27  * Output:
  28  * eax uncopied bytes or 0 if successful.
  29  */
  30 ENTRY(__copy_user_nocache)
  31         CFI_STARTPROC
  32         pushq %rbx
  33         CFI_ADJUST_CFA_OFFSET 8
  34         CFI_REL_OFFSET rbx, 0
  35         pushq %rcx              /* save zero flag */
  36         CFI_ADJUST_CFA_OFFSET 8
  37         CFI_REL_OFFSET rcx, 0
  38
  39         xorl %eax,%eax          /* zero for the exception handler */
  40
  41 #ifdef FIX_ALIGNMENT
  42         /* check for bad alignment of destination */
  43         movl %edi,%ecx
  44         andl $7,%ecx
  45         jnz  .Lbad_alignment
  46 .Lafter_bad_alignment:
  47 #endif
  48
  49         movq %rdx,%rcx
  50
  51         movl $64,%ebx
  52         shrq $6,%rdx
  53         decq %rdx
  54         js   .Lhandle_tail
  55
  56         .p2align 4
  57 .Lloop:
  58 .Ls1:   movq (%rsi),%r11
  59 .Ls2:   movq 1*8(%rsi),%r8
  60 .Ls3:   movq 2*8(%rsi),%r9
  61 .Ls4:   movq 3*8(%rsi),%r10
  62 .Ld1:   movnti %r11,(%rdi)
  63 .Ld2:   movnti %r8,1*8(%rdi)
  64 .Ld3:   movnti %r9,2*8(%rdi)
  65 .Ld4:   movnti %r10,3*8(%rdi)
  66
  67 .Ls5:   movq 4*8(%rsi),%r11
  68 .Ls6:   movq 5*8(%rsi),%r8
  69 .Ls7:   movq 6*8(%rsi),%r9
  70 .Ls8:   movq 7*8(%rsi),%r10
  71 .Ld5:   movnti %r11,4*8(%rdi)
  72 .Ld6:   movnti %r8,5*8(%rdi)
  73 .Ld7:   movnti %r9,6*8(%rdi)
  74 .Ld8:   movnti %r10,7*8(%rdi)
  75
  76         dec  %rdx
  77
  78         leaq 64(%rsi),%rsi
  79         leaq 64(%rdi),%rdi
  80
  81         jns  .Lloop
  82
  83         .p2align 4
  84 .Lhandle_tail:
  85         movl %ecx,%edx
  86         andl $63,%ecx
  87         shrl $3,%ecx
  88         jz   .Lhandle_7
  89         movl $8,%ebx
  90         .p2align 4
  91 .Lloop_8:
  92 .Ls9:   movq (%rsi),%r8
  93 .Ld9:   movnti %r8,(%rdi)
  94         decl %ecx
  95         leaq 8(%rdi),%rdi
  96         leaq 8(%rsi),%rsi
  97         jnz .Lloop_8
  98
  99 .Lhandle_7:
 100         movl %edx,%ecx
 101         andl $7,%ecx
 102         jz   .Lende
 103         .p2align 4
 104 .Lloop_1:
 105 .Ls10:  movb (%rsi),%bl
 106 .Ld10:  movb %bl,(%rdi)
 107         incq %rdi
 108         incq %rsi
 109         decl %ecx
 110         jnz .Lloop_1
 111
 112         CFI_REMEMBER_STATE
 113 .Lende:
 114         popq %rcx
 115         CFI_ADJUST_CFA_OFFSET -8
 116         CFI_RESTORE %rcx
 117         popq %rbx
 118         CFI_ADJUST_CFA_OFFSET -8
 119         CFI_RESTORE rbx
 120         sfence
 121         ret
 122         CFI_RESTORE_STATE
 123
 124 #ifdef FIX_ALIGNMENT
 125         /* align destination */
 126         .p2align 4
 127 .Lbad_alignment:
 128         movl $8,%r9d
 129         subl %ecx,%r9d
 130         movl %r9d,%ecx
 131         cmpq %r9,%rdx
 132         jz   .Lhandle_7
 133         js   .Lhandle_7
 134 .Lalign_1:
 135 .Ls11:  movb (%rsi),%bl
 136 .Ld11:  movb %bl,(%rdi)
 137         incq %rsi
 138         incq %rdi
 139         decl %ecx
 140         jnz .Lalign_1
 141         subq %r9,%rdx
 142         jmp .Lafter_bad_alignment
 143 #endif
 144
 145         /* table sorted by exception address */
 146         .section __ex_table,"a"
 147         .align 8
 148         .quad .Ls1,.Ls1e
 149         .quad .Ls2,.Ls2e
 150         .quad .Ls3,.Ls3e
 151         .quad .Ls4,.Ls4e
 152         .quad .Ld1,.Ls1e
 153         .quad .Ld2,.Ls2e
 154         .quad .Ld3,.Ls3e
 155         .quad .Ld4,.Ls4e
 156         .quad .Ls5,.Ls5e
 157         .quad .Ls6,.Ls6e
 158         .quad .Ls7,.Ls7e
 159         .quad .Ls8,.Ls8e
 160         .quad .Ld5,.Ls5e
 161         .quad .Ld6,.Ls6e
 162         .quad .Ld7,.Ls7e
 163         .quad .Ld8,.Ls8e
 164         .quad .Ls9,.Le_quad
 165         .quad .Ld9,.Le_quad
 166         .quad .Ls10,.Le_byte
 167         .quad .Ld10,.Le_byte
 168 #ifdef FIX_ALIGNMENT
 169         .quad .Ls11,.Lzero_rest
 170         .quad .Ld11,.Lzero_rest
 171 #endif
 172         .quad .Le5,.Le_zero
 173         .previous
 174
 175         /* compute 64-offset for main loop. 8 bytes accuracy with error on the
 176            pessimistic side. this is gross. it would be better to fix the
 177            interface. */
 178         /* eax: zero, ebx: 64 */
 179 .Ls1e:  addl $8,%eax
 180 .Ls2e:  addl $8,%eax
 181 .Ls3e:  addl $8,%eax
 182 .Ls4e:  addl $8,%eax
 183 .Ls5e:  addl $8,%eax
 184 .Ls6e:  addl $8,%eax
 185 .Ls7e:  addl $8,%eax
 186 .Ls8e:  addl $8,%eax
 187         addq %rbx,%rdi  /* +64 */
 188         subq %rax,%rdi  /* correct destination with computed offset */
 189
 190         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 191         addq %rax,%rdx  /* add offset to loopcnt */
 192         andl $63,%ecx   /* remaining bytes */
 193         addq %rcx,%rdx  /* add them */
 194         jmp .Lzero_rest
 195
 196         /* exception on quad word loop in tail handling */
 197         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 198 .Le_quad:
 199         shll $3,%ecx
 200         andl $7,%edx
 201         addl %ecx,%edx
 202         /* edx: bytes to zero, rdi: dest, eax:zero */
 203 .Lzero_rest:
 204         cmpl $0,(%rsp)  /* zero flag set? */
 205         jz   .Le_zero
 206         movq %rdx,%rcx
 207 .Le_byte:
 208         xorl %eax,%eax
 209 .Le5:   rep
 210         stosb
 211         /* when there is another exception while zeroing the rest just return */
 212 .Le_zero:
 213         movq %rdx,%rax
 214         jmp .Lende
 215         CFI_ENDPROC
 216 ENDPROC(__copy_user_nocache)
 217
 218