git.oblomov.eu Git - linux-2.6/blob - arch/x86/lib/copy_user_64.S

   1 /* Copyright 2002 Andi Kleen, SuSE Labs.
   2  * Subject to the GNU Public License v2.
   3  *
   4  * Functions to copy from and to user space.
   5  */
   6
   7 #include <linux/linkage.h>
   8 #include <asm/dwarf2.h>
   9
  10 #define FIX_ALIGNMENT 1
  11
  12 #include <asm/current.h>
  13 #include <asm/asm-offsets.h>
  14 #include <asm/thread_info.h>
  15 #include <asm/cpufeature.h>
  16
  17         .macro ALTERNATIVE_JUMP feature,orig,alt
  18 0:
  19         .byte 0xe9      /* 32bit jump */
  20         .long \orig-1f  /* by default jump to orig */
  21 1:
  22         .section .altinstr_replacement,"ax"
  23 2:      .byte 0xe9                   /* near jump with 32bit immediate */
  24         .long \alt-1b /* offset */   /* or alternatively to alt */
  25         .previous
  26         .section .altinstructions,"a"
  27         .align 8
  28         .quad  0b
  29         .quad  2b
  30         .byte  \feature              /* when feature is set */
  31         .byte  5
  32         .byte  5
  33         .previous
  34         .endm
  35
  36 /* Standard copy_to_user with segment limit checking */
  37 ENTRY(copy_to_user)
  38         CFI_STARTPROC
  39         GET_THREAD_INFO(%rax)
  40         movq %rdi,%rcx
  41         addq %rdx,%rcx
  42         jc  bad_to_user
  43         cmpq threadinfo_addr_limit(%rax),%rcx
  44         jae bad_to_user
  45         xorl %eax,%eax  /* clear zero flag */
  46         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  47         CFI_ENDPROC
  48
  49 ENTRY(copy_user_generic)
  50         CFI_STARTPROC
  51         movl $1,%ecx    /* set zero flag */
  52         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  53         CFI_ENDPROC
  54
  55 ENTRY(__copy_from_user_inatomic)
  56         CFI_STARTPROC
  57         xorl %ecx,%ecx  /* clear zero flag */
  58         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  59         CFI_ENDPROC
  60
  61 /* Standard copy_from_user with segment limit checking */
  62 ENTRY(copy_from_user)
  63         CFI_STARTPROC
  64         GET_THREAD_INFO(%rax)
  65         movq %rsi,%rcx
  66         addq %rdx,%rcx
  67         jc  bad_from_user
  68         cmpq threadinfo_addr_limit(%rax),%rcx
  69         jae  bad_from_user
  70         movl $1,%ecx    /* set zero flag */
  71         ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
  72         CFI_ENDPROC
  73 ENDPROC(copy_from_user)
  74
  75         .section .fixup,"ax"
  76         /* must zero dest */
  77 bad_from_user:
  78         CFI_STARTPROC
  79         movl %edx,%ecx
  80         xorl %eax,%eax
  81         rep
  82         stosb
  83 bad_to_user:
  84         movl    %edx,%eax
  85         ret
  86         CFI_ENDPROC
  87 END(bad_from_user)
  88         .previous
  89
  90
  91 /*
  92  * copy_user_generic_unrolled - memory copy with exception handling.
  93  * This version is for CPUs like P4 that don't have efficient micro code for rep movsq
  94  *
  95  * Input:
  96  * rdi destination
  97  * rsi source
  98  * rdx count
  99  * ecx zero flag -- if true zero destination on error
 100  *
 101  * Output:
 102  * eax uncopied bytes or 0 if successful.
 103  */
 104 ENTRY(copy_user_generic_unrolled)
 105         CFI_STARTPROC
 106         pushq %rbx
 107         CFI_ADJUST_CFA_OFFSET 8
 108         CFI_REL_OFFSET rbx, 0
 109         pushq %rcx
 110         CFI_ADJUST_CFA_OFFSET 8
 111         CFI_REL_OFFSET rcx, 0
 112         xorl %eax,%eax          /*zero for the exception handler */
 113
 114 #ifdef FIX_ALIGNMENT
 115         /* check for bad alignment of destination */
 116         movl %edi,%ecx
 117         andl $7,%ecx
 118         jnz  .Lbad_alignment
 119 .Lafter_bad_alignment:
 120 #endif
 121
 122         movq %rdx,%rcx
 123
 124         movl $64,%ebx
 125         shrq $6,%rdx
 126         decq %rdx
 127         js   .Lhandle_tail
 128
 129         .p2align 4
 130 .Lloop:
 131 .Ls1:   movq (%rsi),%r11
 132 .Ls2:   movq 1*8(%rsi),%r8
 133 .Ls3:   movq 2*8(%rsi),%r9
 134 .Ls4:   movq 3*8(%rsi),%r10
 135 .Ld1:   movq %r11,(%rdi)
 136 .Ld2:   movq %r8,1*8(%rdi)
 137 .Ld3:   movq %r9,2*8(%rdi)
 138 .Ld4:   movq %r10,3*8(%rdi)
 139
 140 .Ls5:   movq 4*8(%rsi),%r11
 141 .Ls6:   movq 5*8(%rsi),%r8
 142 .Ls7:   movq 6*8(%rsi),%r9
 143 .Ls8:   movq 7*8(%rsi),%r10
 144 .Ld5:   movq %r11,4*8(%rdi)
 145 .Ld6:   movq %r8,5*8(%rdi)
 146 .Ld7:   movq %r9,6*8(%rdi)
 147 .Ld8:   movq %r10,7*8(%rdi)
 148
 149         decq %rdx
 150
 151         leaq 64(%rsi),%rsi
 152         leaq 64(%rdi),%rdi
 153
 154         jns  .Lloop
 155
 156         .p2align 4
 157 .Lhandle_tail:
 158         movl %ecx,%edx
 159         andl $63,%ecx
 160         shrl $3,%ecx
 161         jz   .Lhandle_7
 162         movl $8,%ebx
 163         .p2align 4
 164 .Lloop_8:
 165 .Ls9:   movq (%rsi),%r8
 166 .Ld9:   movq %r8,(%rdi)
 167         decl %ecx
 168         leaq 8(%rdi),%rdi
 169         leaq 8(%rsi),%rsi
 170         jnz .Lloop_8
 171
 172 .Lhandle_7:
 173         movl %edx,%ecx
 174         andl $7,%ecx
 175         jz   .Lende
 176         .p2align 4
 177 .Lloop_1:
 178 .Ls10:  movb (%rsi),%bl
 179 .Ld10:  movb %bl,(%rdi)
 180         incq %rdi
 181         incq %rsi
 182         decl %ecx
 183         jnz .Lloop_1
 184
 185         CFI_REMEMBER_STATE
 186 .Lende:
 187         popq %rcx
 188         CFI_ADJUST_CFA_OFFSET -8
 189         CFI_RESTORE rcx
 190         popq %rbx
 191         CFI_ADJUST_CFA_OFFSET -8
 192         CFI_RESTORE rbx
 193         ret
 194         CFI_RESTORE_STATE
 195
 196 #ifdef FIX_ALIGNMENT
 197         /* align destination */
 198         .p2align 4
 199 .Lbad_alignment:
 200         movl $8,%r9d
 201         subl %ecx,%r9d
 202         movl %r9d,%ecx
 203         cmpq %r9,%rdx
 204         jz   .Lhandle_7
 205         js   .Lhandle_7
 206 .Lalign_1:
 207 .Ls11:  movb (%rsi),%bl
 208 .Ld11:  movb %bl,(%rdi)
 209         incq %rsi
 210         incq %rdi
 211         decl %ecx
 212         jnz .Lalign_1
 213         subq %r9,%rdx
 214         jmp .Lafter_bad_alignment
 215 #endif
 216
 217         /* table sorted by exception address */
 218         .section __ex_table,"a"
 219         .align 8
 220         .quad .Ls1,.Ls1e        /* Ls1-Ls4 have copied zero bytes */
 221         .quad .Ls2,.Ls1e
 222         .quad .Ls3,.Ls1e
 223         .quad .Ls4,.Ls1e
 224         .quad .Ld1,.Ls1e        /* Ld1-Ld4 have copied 0-24 bytes */
 225         .quad .Ld2,.Ls2e
 226         .quad .Ld3,.Ls3e
 227         .quad .Ld4,.Ls4e
 228         .quad .Ls5,.Ls5e        /* Ls5-Ls8 have copied 32 bytes */
 229         .quad .Ls6,.Ls5e
 230         .quad .Ls7,.Ls5e
 231         .quad .Ls8,.Ls5e
 232         .quad .Ld5,.Ls5e        /* Ld5-Ld8 have copied 32-56 bytes */
 233         .quad .Ld6,.Ls6e
 234         .quad .Ld7,.Ls7e
 235         .quad .Ld8,.Ls8e
 236         .quad .Ls9,.Le_quad
 237         .quad .Ld9,.Le_quad
 238         .quad .Ls10,.Le_byte
 239         .quad .Ld10,.Le_byte
 240 #ifdef FIX_ALIGNMENT
 241         .quad .Ls11,.Lzero_rest
 242         .quad .Ld11,.Lzero_rest
 243 #endif
 244         .quad .Le5,.Le_zero
 245         .previous
 246
 247         /* eax: zero, ebx: 64 */
 248 .Ls1e:  addl $8,%eax            /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
 249 .Ls2e:  addl $8,%eax
 250 .Ls3e:  addl $8,%eax
 251 .Ls4e:  addl $8,%eax
 252 .Ls5e:  addl $8,%eax
 253 .Ls6e:  addl $8,%eax
 254 .Ls7e:  addl $8,%eax
 255 .Ls8e:  addl $8,%eax
 256         addq %rbx,%rdi  /* +64 */
 257         subq %rax,%rdi  /* correct destination with computed offset */
 258
 259         shlq $6,%rdx    /* loop counter * 64 (stride length) */
 260         addq %rax,%rdx  /* add offset to loopcnt */
 261         andl $63,%ecx   /* remaining bytes */
 262         addq %rcx,%rdx  /* add them */
 263         jmp .Lzero_rest
 264
 265         /* exception on quad word loop in tail handling */
 266         /* ecx: loopcnt/8, %edx: length, rdi: correct */
 267 .Le_quad:
 268         shll $3,%ecx
 269         andl $7,%edx
 270         addl %ecx,%edx
 271         /* edx: bytes to zero, rdi: dest, eax:zero */
 272 .Lzero_rest:
 273         cmpl $0,(%rsp)
 274         jz   .Le_zero
 275         movq %rdx,%rcx
 276 .Le_byte:
 277         xorl %eax,%eax
 278 .Le5:   rep
 279         stosb
 280         /* when there is another exception while zeroing the rest just return */
 281 .Le_zero:
 282         movq %rdx,%rax
 283         jmp .Lende
 284         CFI_ENDPROC
 285 ENDPROC(copy_user_generic)
 286
 287
 288         /* Some CPUs run faster using the string copy instructions.
 289            This is also a lot simpler. Use them when possible.
 290            Patch in jmps to this code instead of copying it fully
 291            to avoid unwanted aliasing in the exception tables. */
 292
 293  /* rdi destination
 294   * rsi source
 295   * rdx count
 296   * ecx zero flag
 297   *
 298   * Output:
 299   * eax uncopied bytes or 0 if successfull.
 300   *
 301   * Only 4GB of copy is supported. This shouldn't be a problem
 302   * because the kernel normally only writes from/to page sized chunks
 303   * even if user space passed a longer buffer.
 304   * And more would be dangerous because both Intel and AMD have
 305   * errata with rep movsq > 4GB. If someone feels the need to fix
 306   * this please consider this.
 307   */
 308 ENTRY(copy_user_generic_string)
 309         CFI_STARTPROC
 310         movl %ecx,%r8d          /* save zero flag */
 311         movl %edx,%ecx
 312         shrl $3,%ecx
 313         andl $7,%edx
 314         jz   10f
 315 1:      rep
 316         movsq
 317         movl %edx,%ecx
 318 2:      rep
 319         movsb
 320 9:      movl %ecx,%eax
 321         ret
 322
 323         /* multiple of 8 byte */
 324 10:     rep
 325         movsq
 326         xor %eax,%eax
 327         ret
 328
 329         /* exception handling */
 330 3:      lea (%rdx,%rcx,8),%rax  /* exception on quad loop */
 331         jmp 6f
 332 5:      movl %ecx,%eax          /* exception on byte loop */
 333         /* eax: left over bytes */
 334 6:      testl %r8d,%r8d         /* zero flag set? */
 335         jz 7f
 336         movl %eax,%ecx          /* initialize x86 loop counter */
 337         push %rax
 338         xorl %eax,%eax
 339 8:      rep
 340         stosb                   /* zero the rest */
 341 11:     pop %rax
 342 7:      ret
 343         CFI_ENDPROC
 344 END(copy_user_generic_c)
 345
 346         .section __ex_table,"a"
 347         .quad 1b,3b
 348         .quad 2b,5b
 349         .quad 8b,11b
 350         .quad 10b,3b
 351         .previous