1 /* Copyright 2002 Andi Kleen, SuSE Labs.
2 * Subject to the GNU Public License v2.
4 * Functions to copy from and to user space.
7 #define FIX_ALIGNMENT 1
9 #include <asm/current.h>
10 #include <asm/asm-offsets.h>
11 #include <asm/thread_info.h>
12 #include <asm/cpufeature.h>
14 /* Standard copy_to_user with segment limit checking */
22 cmpq threadinfo_addr_limit(%rax),%rcx
25 .byte 0xe9 /* 32bit jump */
29 .section .altinstr_replacement,"ax"
30 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
31 .long copy_user_generic_c-1b /* offset */
33 .section .altinstructions,"a"
37 .byte X86_FEATURE_REP_GOOD
42 /* Standard copy_from_user with segment limit checking */
50 cmpq threadinfo_addr_limit(%rax),%rcx
52 /* FALL THROUGH to copy_user_generic */
68 * copy_user_generic - memory copy with exception handling.
76 * eax uncopied bytes or 0 if successful.
78 .globl copy_user_generic
81 .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
84 .section .altinstr_replacement,"ax"
85 2: .byte 0xe9 /* near jump with 32bit immediate */
86 .long copy_user_generic_c-1b /* offset */
88 .section .altinstructions,"a"
90 .quad copy_user_generic
92 .byte X86_FEATURE_REP_GOOD
98 xorl %eax,%eax /*zero for the exception handler */
101 /* check for bad alignment of destination */
105 .Lafter_bad_alignment:
117 .Ls1: movq (%rsi),%r11
118 .Ls2: movq 1*8(%rsi),%r8
119 .Ls3: movq 2*8(%rsi),%r9
120 .Ls4: movq 3*8(%rsi),%r10
121 .Ld1: movq %r11,(%rdi)
122 .Ld2: movq %r8,1*8(%rdi)
123 .Ld3: movq %r9,2*8(%rdi)
124 .Ld4: movq %r10,3*8(%rdi)
126 .Ls5: movq 4*8(%rsi),%r11
127 .Ls6: movq 5*8(%rsi),%r8
128 .Ls7: movq 6*8(%rsi),%r9
129 .Ls8: movq 7*8(%rsi),%r10
130 .Ld5: movq %r11,4*8(%rdi)
131 .Ld6: movq %r8,5*8(%rdi)
132 .Ld7: movq %r9,6*8(%rdi)
133 .Ld8: movq %r10,7*8(%rdi)
151 .Ls9: movq (%rsi),%r8
152 .Ld9: movq %r8,(%rdi)
164 .Ls10: movb (%rsi),%bl
165 .Ld10: movb %bl,(%rdi)
176 /* align destination */
186 .Ls11: movb (%rsi),%bl
187 .Ld11: movb %bl,(%rdi)
193 jmp .Lafter_bad_alignment
196 /* table sorted by exception address */
197 .section __ex_table,"a"
220 .quad .Ls11,.Lzero_rest
221 .quad .Ld11,.Lzero_rest
226 /* compute 64-offset for main loop. 8 bytes accuracy with error on the
227 pessimistic side. this is gross. it would be better to fix the
229 /* eax: zero, ebx: 64 */
238 addq %rbx,%rdi /* +64 */
239 subq %rax,%rdi /* correct destination with computed offset */
241 shlq $6,%rdx /* loop counter * 64 (stride length) */
242 addq %rax,%rdx /* add offset to loopcnt */
243 andl $63,%ecx /* remaining bytes */
244 addq %rcx,%rdx /* add them */
247 /* exception on quad word loop in tail handling */
248 /* ecx: loopcnt/8, %edx: length, rdi: correct */
253 /* edx: bytes to zero, rdi: dest, eax:zero */
260 /* when there is another exception while zeroing the rest just return */
265 /* Some CPUs run faster using the string copy instructions.
266 This is also a lot simpler. Use them when possible.
267 Patch in jmps to this code instead of copying it fully
268 to avoid unwanted aliasing in the exception tables. */
275 * eax uncopied bytes or 0 if successfull.
277 * Only 4GB of copy is supported. This shouldn't be a problem
278 * because the kernel normally only writes from/to page sized chunks
279 * even if user space passed a longer buffer.
280 * And more would be dangerous because both Intel and AMD have
281 * errata with rep movsq > 4GB. If someone feels the need to fix
282 * this please consider this.
295 3: lea (%rdx,%rcx,8),%rax
298 .section __ex_table,"a"