summaryrefslogtreecommitdiffstats
path: root/arch/x86/lib/clear_page_64.S
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /arch/x86/lib/clear_page_64.S
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/lib/clear_page_64.S')
-rw-r--r--arch/x86/lib/clear_page_64.S141
1 files changed, 141 insertions, 0 deletions
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
new file mode 100644
index 0000000000..f74a3e704a
--- /dev/null
+++ b/arch/x86/lib/clear_page_64.S
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/export.h>
+
+/*
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi - page
+ */
+SYM_FUNC_START(clear_page_rep)
+ movl $4096/8,%ecx
+ xorl %eax,%eax
+ rep stosq
+ RET
+SYM_FUNC_END(clear_page_rep)
+EXPORT_SYMBOL_GPL(clear_page_rep)
+
+SYM_FUNC_START(clear_page_orig)
+ xorl %eax,%eax
+ movl $4096/64,%ecx
+ .p2align 4
+.Lloop:
+ decl %ecx
+#define PUT(x) movq %rax,x*8(%rdi)
+ movq %rax,(%rdi)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop
+ nop
+ RET
+SYM_FUNC_END(clear_page_orig)
+EXPORT_SYMBOL_GPL(clear_page_orig)
+
+SYM_FUNC_START(clear_page_erms)
+ movl $4096,%ecx
+ xorl %eax,%eax
+ rep stosb
+ RET
+SYM_FUNC_END(clear_page_erms)
+EXPORT_SYMBOL_GPL(clear_page_erms)
+
+/*
+ * Default clear user-space.
+ * Input:
+ * rdi destination
+ * rcx count
+ * rax is zero
+ *
+ * Output:
+ * rcx: uncleared bytes or 0 if successful.
+ */
+SYM_FUNC_START(rep_stos_alternative)
+ cmpq $64,%rcx
+ jae .Lunrolled
+
+ cmp $8,%ecx
+ jae .Lword
+
+ testl %ecx,%ecx
+ je .Lexit
+
+.Lclear_user_tail:
+0: movb %al,(%rdi)
+ inc %rdi
+ dec %rcx
+ jnz .Lclear_user_tail
+.Lexit:
+ RET
+
+ _ASM_EXTABLE_UA( 0b, .Lexit)
+
+.Lword:
+1: movq %rax,(%rdi)
+ addq $8,%rdi
+ sub $8,%ecx
+ je .Lexit
+ cmp $8,%ecx
+ jae .Lword
+ jmp .Lclear_user_tail
+
+ .p2align 4
+.Lunrolled:
+10: movq %rax,(%rdi)
+11: movq %rax,8(%rdi)
+12: movq %rax,16(%rdi)
+13: movq %rax,24(%rdi)
+14: movq %rax,32(%rdi)
+15: movq %rax,40(%rdi)
+16: movq %rax,48(%rdi)
+17: movq %rax,56(%rdi)
+ addq $64,%rdi
+ subq $64,%rcx
+ cmpq $64,%rcx
+ jae .Lunrolled
+ cmpl $8,%ecx
+ jae .Lword
+ testl %ecx,%ecx
+ jne .Lclear_user_tail
+ RET
+
+ /*
+ * If we take an exception on any of the
+ * word stores, we know that %rcx isn't zero,
+ * so we can just go to the tail clearing to
+ * get the exact count.
+ *
+ * The unrolled case might end up clearing
+ * some bytes twice. Don't care.
+ *
+ * We could use the value in %rdi to avoid
+ * a second fault on the exact count case,
+ * but do we really care? No.
+ *
+ * Finally, we could try to align %rdi at the
+ * top of the unrolling. But unaligned stores
+ * just aren't that common or expensive.
+ */
+ _ASM_EXTABLE_UA( 1b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(10b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(11b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(12b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(13b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(14b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(15b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(16b, .Lclear_user_tail)
+ _ASM_EXTABLE_UA(17b, .Lclear_user_tail)
+SYM_FUNC_END(rep_stos_alternative)
+EXPORT_SYMBOL(rep_stos_alternative)