diff options
Diffstat (limited to 'arch/x86/lib/clear_page_64.S')
-rw-r--r-- | arch/x86/lib/clear_page_64.S | 141 |
1 files changed, 141 insertions, 0 deletions
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S new file mode 100644 index 0000000000..f74a3e704a --- /dev/null +++ b/arch/x86/lib/clear_page_64.S @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/export.h> + +/* + * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is + * recommended to use this when possible and we do use them by default. + * If enhanced REP MOVSB/STOSB is not available, try to use fast string. + * Otherwise, use original. + */ + +/* + * Zero a page. + * %rdi - page + */ +SYM_FUNC_START(clear_page_rep) + movl $4096/8,%ecx + xorl %eax,%eax + rep stosq + RET +SYM_FUNC_END(clear_page_rep) +EXPORT_SYMBOL_GPL(clear_page_rep) + +SYM_FUNC_START(clear_page_orig) + xorl %eax,%eax + movl $4096/64,%ecx + .p2align 4 +.Lloop: + decl %ecx +#define PUT(x) movq %rax,x*8(%rdi) + movq %rax,(%rdi) + PUT(1) + PUT(2) + PUT(3) + PUT(4) + PUT(5) + PUT(6) + PUT(7) + leaq 64(%rdi),%rdi + jnz .Lloop + nop + RET +SYM_FUNC_END(clear_page_orig) +EXPORT_SYMBOL_GPL(clear_page_orig) + +SYM_FUNC_START(clear_page_erms) + movl $4096,%ecx + xorl %eax,%eax + rep stosb + RET +SYM_FUNC_END(clear_page_erms) +EXPORT_SYMBOL_GPL(clear_page_erms) + +/* + * Default clear user-space. + * Input: + * rdi destination + * rcx count + * rax is zero + * + * Output: + * rcx: uncleared bytes or 0 if successful. + */ +SYM_FUNC_START(rep_stos_alternative) + cmpq $64,%rcx + jae .Lunrolled + + cmp $8,%ecx + jae .Lword + + testl %ecx,%ecx + je .Lexit + +.Lclear_user_tail: +0: movb %al,(%rdi) + inc %rdi + dec %rcx + jnz .Lclear_user_tail +.Lexit: + RET + + _ASM_EXTABLE_UA( 0b, .Lexit) + +.Lword: +1: movq %rax,(%rdi) + addq $8,%rdi + sub $8,%ecx + je .Lexit + cmp $8,%ecx + jae .Lword + jmp .Lclear_user_tail + + .p2align 4 +.Lunrolled: +10: movq %rax,(%rdi) +11: movq %rax,8(%rdi) +12: movq %rax,16(%rdi) +13: movq %rax,24(%rdi) +14: movq %rax,32(%rdi) +15: movq %rax,40(%rdi) +16: movq %rax,48(%rdi) +17: movq %rax,56(%rdi) + addq $64,%rdi + subq $64,%rcx + cmpq $64,%rcx + jae .Lunrolled + cmpl $8,%ecx + jae .Lword + testl %ecx,%ecx + jne .Lclear_user_tail + RET + + /* + * If we take an exception on any of the + * word stores, we know that %rcx isn't zero, + * so we can just go to the tail clearing to + * get the exact count. + * + * The unrolled case might end up clearing + * some bytes twice. Don't care. + * + * We could use the value in %rdi to avoid + * a second fault on the exact count case, + * but do we really care? No. + * + * Finally, we could try to align %rdi at the + * top of the unrolling. But unaligned stores + * just aren't that common or expensive. + */ + _ASM_EXTABLE_UA( 1b, .Lclear_user_tail) + _ASM_EXTABLE_UA(10b, .Lclear_user_tail) + _ASM_EXTABLE_UA(11b, .Lclear_user_tail) + _ASM_EXTABLE_UA(12b, .Lclear_user_tail) + _ASM_EXTABLE_UA(13b, .Lclear_user_tail) + _ASM_EXTABLE_UA(14b, .Lclear_user_tail) + _ASM_EXTABLE_UA(15b, .Lclear_user_tail) + _ASM_EXTABLE_UA(16b, .Lclear_user_tail) + _ASM_EXTABLE_UA(17b, .Lclear_user_tail) +SYM_FUNC_END(rep_stos_alternative) +EXPORT_SYMBOL(rep_stos_alternative) |