summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /arch/powerpc/lib
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/Makefile83
-rw-r--r--arch/powerpc/lib/checksum_32.S309
-rw-r--r--arch/powerpc/lib/checksum_64.S443
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c39
-rw-r--r--arch/powerpc/lib/code-patching.c500
-rw-r--r--arch/powerpc/lib/copy_32.S515
-rw-r--r--arch/powerpc/lib/copy_mc_64.S242
-rw-r--r--arch/powerpc/lib/copypage_64.S118
-rw-r--r--arch/powerpc/lib/copypage_power7.S153
-rw-r--r--arch/powerpc/lib/copyuser_64.S564
-rw-r--r--arch/powerpc/lib/copyuser_power7.S695
-rw-r--r--arch/powerpc/lib/crtsavres.S545
-rw-r--r--arch/powerpc/lib/div64.S55
-rw-r--r--arch/powerpc/lib/error-inject.c16
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S862
-rw-r--r--arch/powerpc/lib/feature-fixups.c1012
-rw-r--r--arch/powerpc/lib/hweight_64.S104
-rw-r--r--arch/powerpc/lib/ldstfp.S237
-rw-r--r--arch/powerpc/lib/locks.c65
-rw-r--r--arch/powerpc/lib/mem_64.S142
-rw-r--r--arch/powerpc/lib/memcmp_32.S45
-rw-r--r--arch/powerpc/lib/memcmp_64.S638
-rw-r--r--arch/powerpc/lib/memcpy_64.S230
-rw-r--r--arch/powerpc/lib/memcpy_power7.S641
-rw-r--r--arch/powerpc/lib/pmem.c87
-rw-r--r--arch/powerpc/lib/qspinlock.c1007
-rw-r--r--arch/powerpc/lib/quad.S58
-rw-r--r--arch/powerpc/lib/restart_table.c56
-rw-r--r--arch/powerpc/lib/rheap.c747
-rw-r--r--arch/powerpc/lib/sstep.c3666
-rw-r--r--arch/powerpc/lib/string.S65
-rw-r--r--arch/powerpc/lib/string_32.S90
-rw-r--r--arch/powerpc/lib/string_64.S179
-rw-r--r--arch/powerpc/lib/strlen_32.S78
-rw-r--r--arch/powerpc/lib/test-code-patching.c362
-rw-r--r--arch/powerpc/lib/test_emulate_step.c1741
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S150
-rw-r--r--arch/powerpc/lib/vmx-helper.c75
-rw-r--r--arch/powerpc/lib/xor_vmx.c156
-rw-r--r--arch/powerpc/lib/xor_vmx.h22
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c63
41 files changed, 16855 insertions, 0 deletions
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
new file mode 100644
index 000000000..6eac63e79
--- /dev/null
+++ b/arch/powerpc/lib/Makefile
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for ppc-specific library files..
+#
+
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
+CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
+
+KASAN_SANITIZE_code-patching.o := n
+KASAN_SANITIZE_feature-fixups.o := n
+# restart_table.o contains functions called in the NMI interrupt path
+# which can be in real mode. Disable KASAN.
+KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+obj-y += code-patching.o feature-fixups.o pmem.o
+
+obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
+
+ifndef CONFIG_KASAN
+obj-y += string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32) += strlen_32.o
+endif
+
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+# See corresponding test in arch/powerpc/Makefile
+# 64-bit linker creates .sfpr on demand for final link (vmlinux),
+# so it is only needed for modules, and only for older linkers which
+# do not support --save-restore-funcs
+ifndef CONFIG_LD_IS_BFD
+always-$(CONFIG_PPC64) += crtsavres.o
+endif
+
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+ memcpy_power7.o restart_table.o
+
+obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
+ memcpy_64.o copy_mc_64.o
+
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP) += qspinlock.o
+else
+obj64-$(CONFIG_SMP) += locks.o
+endif
+
+obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
+ test_emulate_step_exec_instr.o
+
+obj-y += checksum_$(BITS).o checksum_wrappers.o \
+ string_$(BITS).o
+
+obj-y += sstep.o
+obj-$(CONFIG_PPC_FPU) += ldstfp.o
+obj64-y += quad.o
+
+obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
+
+obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
+CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
+# Enable <altivec.h>
+CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
new file mode 100644
index 000000000..cd00b9bdd
--- /dev/null
+++ b/arch/powerpc/lib/checksum_32.S
@@ -0,0 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/export.h>
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+ .text
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * __csum_partial(buff, len, sum)
+ */
+_GLOBAL(__csum_partial)
+ subi r3,r3,4
+ srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
+ beq 3f /* if we're doing < 4 bytes */
+ andi. r0,r3,2 /* Align buffer to longword boundary */
+ beq+ 1f
+ lhz r0,4(r3) /* do 2 bytes to get aligned */
+ subi r4,r4,2
+ addi r3,r3,2
+ srwi. r6,r4,2 /* # words to do */
+ adde r5,r5,r0
+ beq 3f
+1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
+ beq 21f
+ mtctr r6
+2: lwzu r0,4(r3)
+ adde r5,r5,r0
+ bdnz 2b
+21: srwi. r6,r4,4 /* # blocks of 4 words to do */
+ beq 3f
+ lwz r0,4(r3)
+ mtctr r6
+ lwz r6,8(r3)
+ adde r5,r5,r0
+ lwz r7,12(r3)
+ adde r5,r5,r6
+ lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdz 23f
+22: lwz r0,4(r3)
+ adde r5,r5,r8
+ lwz r6,8(r3)
+ adde r5,r5,r0
+ lwz r7,12(r3)
+ adde r5,r5,r6
+ lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdnz 22b
+23: adde r5,r5,r8
+3: andi. r0,r4,2
+ beq+ 4f
+ lhz r0,4(r3)
+ addi r3,r3,2
+ adde r5,r5,r0
+4: andi. r0,r4,1
+ beq+ 5f
+ lbz r0,4(r3)
+ slwi r0,r0,8 /* Upper byte of word */
+ adde r5,r5,r0
+5: addze r3,r5 /* add in final carry */
+ blr
+EXPORT_SYMBOL(__csum_partial)
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in 0xffffffff, while copying the block to dst.
+ * If an access exception occurs it returns zero.
+ *
+ * csum_partial_copy_generic(src, dst, len)
+ */
+#define CSUM_COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+ adde r12,r12,r7; \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+ adde r12,r12,r8; \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+ adde r12,r12,r9; \
+8 ## n ## 7: \
+ stwu r10,16(r6); \
+ adde r12,r12,r10
+
+#define CSUM_COPY_16_BYTES_EXCODE(n) \
+ EX_TABLE(8 ## n ## 0b, fault); \
+ EX_TABLE(8 ## n ## 1b, fault); \
+ EX_TABLE(8 ## n ## 2b, fault); \
+ EX_TABLE(8 ## n ## 3b, fault); \
+ EX_TABLE(8 ## n ## 4b, fault); \
+ EX_TABLE(8 ## n ## 5b, fault); \
+ EX_TABLE(8 ## n ## 6b, fault); \
+ EX_TABLE(8 ## n ## 7b, fault);
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(csum_partial_copy_generic)
+ li r12,-1
+ addic r0,r0,0 /* clear carry */
+ addi r6,r4,-4
+ neg r0,r4
+ addi r4,r3,-4
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ crset 4*cr7+eq
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ rlwinm r7,r6,3,0x8
+ rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+ cmplwi cr7,r7,0 /* is destination address even ? */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+ li r3,0
+70: lbz r9,4(r4) /* do some bytes */
+ addi r4,r4,1
+ slwi r3,r3,8
+ rlwimi r3,r9,0,24,31
+71: stb r9,4(r6)
+ addi r6,r6,1
+ bdnz 70b
+ adde r12,r12,r3
+61: subf r5,r0,r5
+ srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+ adde r12,r12,r9
+73: stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ beq 63f
+
+ /* Here we decide how far ahead to prefetch the source */
+ li r3,4
+ cmpwi r0,1
+ li r7,0
+ ble 114f
+ li r7,1
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ ble 112f
+ li r7,MAX_COPY_PREFETCH
+112: mtctr r7
+111: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 111b
+#else
+ dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114: subf r8,r7,r0
+ mr r0,r7
+ mtctr r8
+
+53: dcbt r3,r4
+54: dcbz r11,r6
+/* the main body of the cacheline loop */
+ CSUM_COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_BYTES >= 32
+ CSUM_COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_BYTES >= 64
+ CSUM_COPY_16_BYTES_WITHEX(2)
+ CSUM_COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_BYTES >= 128
+ CSUM_COPY_16_BYTES_WITHEX(4)
+ CSUM_COPY_16_BYTES_WITHEX(5)
+ CSUM_COPY_16_BYTES_WITHEX(6)
+ CSUM_COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+ bdnz 53b
+ cmpwi r0,0
+ li r3,4
+ li r7,0
+ bne 114b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+ adde r12,r12,r0
+31: stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,2
+ beq+ 65f
+40: lhz r0,4(r4)
+ addi r4,r4,2
+41: sth r0,4(r6)
+ adde r12,r12,r0
+ addi r6,r6,2
+65: andi. r0,r5,1
+ beq+ 66f
+50: lbz r0,4(r4)
+51: stb r0,4(r6)
+ slwi r0,r0,8
+ adde r12,r12,r0
+66: addze r3,r12
+ beqlr+ cr7
+ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
+ blr
+
+fault:
+ li r3,0
+ blr
+
+ EX_TABLE(70b, fault);
+ EX_TABLE(71b, fault);
+ EX_TABLE(72b, fault);
+ EX_TABLE(73b, fault);
+ EX_TABLE(54b, fault);
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * fault (if in read part) or fault (if in write part)
+ */
+ CSUM_COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_BYTES >= 32
+ CSUM_COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_BYTES >= 64
+ CSUM_COPY_16_BYTES_EXCODE(2)
+ CSUM_COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_BYTES >= 128
+ CSUM_COPY_16_BYTES_EXCODE(4)
+ CSUM_COPY_16_BYTES_EXCODE(5)
+ CSUM_COPY_16_BYTES_EXCODE(6)
+ CSUM_COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+ EX_TABLE(30b, fault);
+ EX_TABLE(31b, fault);
+ EX_TABLE(40b, fault);
+ EX_TABLE(41b, fault);
+ EX_TABLE(50b, fault);
+ EX_TABLE(51b, fault);
+
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ lwz r8, 0(r3)
+ lwz r9, 4(r3)
+ addc r0, r7, r8
+ lwz r10, 8(r3)
+ adde r0, r0, r9
+ lwz r11, 12(r3)
+ adde r0, r0, r10
+ lwz r8, 0(r4)
+ adde r0, r0, r11
+ lwz r9, 4(r4)
+ adde r0, r0, r8
+ lwz r10, 8(r4)
+ adde r0, r0, r9
+ lwz r11, 12(r4)
+ adde r0, r0, r10
+ add r5, r5, r6 /* assumption: len + proto doesn't carry */
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotlwi r3, r0, 16
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
+ blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
new file mode 100644
index 000000000..d53d8f09a
--- /dev/null
+++ b/arch/powerpc/lib/checksum_64.S
@@ -0,0 +1,443 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains assembly-language implementations
+ * of IP-style 1's complement checksum routines.
+ *
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
+ */
+
+#include <linux/export.h>
+#include <linux/sys.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * Computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit).
+ *
+ * __csum_partial(r3=buff, r4=len, r5=sum)
+ */
+_GLOBAL(__csum_partial)
+ addic r0,r5,0 /* clear carry */
+
+ srdi. r6,r4,3 /* less than 8 bytes? */
+ beq .Lcsum_tail_word
+
+ /*
+ * If only halfword aligned, align to a double word. Since odd
+ * aligned addresses should be rare and they would require more
+ * work to calculate the correct checksum, we ignore that case
+ * and take the potential slowdown of unaligned loads.
+ */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
+ beq .Lcsum_aligned
+
+ li r7,4
+ sub r6,r7,r6
+ mtctr r6
+
+1:
+ lhz r6,0(r3) /* align to doubleword */
+ subi r4,r4,2
+ addi r3,r3,2
+ adde r0,r0,r6
+ bdnz 1b
+
+.Lcsum_aligned:
+ /*
+ * We unroll the loop such that each iteration is 64 bytes with an
+ * entry and exit limb of 64 bytes, meaning a minimum size of
+ * 128 bytes.
+ */
+ srdi. r6,r4,7
+ beq .Lcsum_tail_doublewords /* len < 128 */
+
+ srdi r6,r4,6
+ subi r6,r6,1
+ mtctr r6
+
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ ld r6,0(r3)
+ ld r9,8(r3)
+
+ ld r10,16(r3)
+ ld r11,24(r3)
+
+ /*
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+ * because of the XER dependency. This means the fastest this loop can
+ * go is 16 cycles per iteration. The scheduling of the loop below has
+ * been shown to hit this on both POWER6 and POWER7.
+ */
+ .align 5
+2:
+ adde r0,r0,r6
+ ld r12,32(r3)
+ ld r14,40(r3)
+
+ adde r0,r0,r9
+ ld r15,48(r3)
+ ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+
+ adde r0,r0,r11
+
+ adde r0,r0,r12
+
+ adde r0,r0,r14
+
+ adde r0,r0,r15
+ ld r6,0(r3)
+ ld r9,8(r3)
+
+ adde r0,r0,r16
+ ld r10,16(r3)
+ ld r11,24(r3)
+ bdnz 2b
+
+
+ adde r0,r0,r6
+ ld r12,32(r3)
+ ld r14,40(r3)
+
+ adde r0,r0,r9
+ ld r15,48(r3)
+ ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+ adde r0,r0,r11
+ adde r0,r0,r12
+ adde r0,r0,r14
+ adde r0,r0,r15
+ adde r0,r0,r16
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ andi. r4,r4,63
+
+.Lcsum_tail_doublewords: /* Up to 127 bytes to go */
+ srdi. r6,r4,3
+ beq .Lcsum_tail_word
+
+ mtctr r6
+3:
+ ld r6,0(r3)
+ addi r3,r3,8
+ adde r0,r0,r6
+ bdnz 3b
+
+ andi. r4,r4,7
+
+.Lcsum_tail_word: /* Up to 7 bytes to go */
+ srdi. r6,r4,2
+ beq .Lcsum_tail_halfword
+
+ lwz r6,0(r3)
+ addi r3,r3,4
+ adde r0,r0,r6
+ subi r4,r4,4
+
+.Lcsum_tail_halfword: /* Up to 3 bytes to go */
+ srdi. r6,r4,1
+ beq .Lcsum_tail_byte
+
+ lhz r6,0(r3)
+ addi r3,r3,2
+ adde r0,r0,r6
+ subi r4,r4,2
+
+.Lcsum_tail_byte: /* Up to 1 byte to go */
+ andi. r6,r4,1
+ beq .Lcsum_finish
+
+ lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
+ sldi r9,r6,8 /* Pad the byte out to 16 bits */
+ adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
+
+.Lcsum_finish:
+ addze r0,r0 /* add in final carry */
+ rldicl r4,r0,32,0 /* fold two 32 bit halves together */
+ add r3,r4,r0
+ srdi r3,r3,32
+ blr
+EXPORT_SYMBOL(__csum_partial)
+
+
+ .macro srcnr
+100:
+ EX_TABLE(100b,.Lerror_nr)
+ .endm
+
+ .macro source
+150:
+ EX_TABLE(150b,.Lerror)
+ .endm
+
+ .macro dstnr
+200:
+ EX_TABLE(200b,.Lerror_nr)
+ .endm
+
+ .macro dest
+250:
+ EX_TABLE(250b,.Lerror)
+ .endm
+
+/*
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in 0xffffffff (32-bit), while copying the block to dst.
+ * If an access exception occurs, it returns 0.
+ *
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
+ */
+_GLOBAL(csum_partial_copy_generic)
+ li r6,-1
+ addic r0,r6,0 /* clear carry */
+
+ srdi. r6,r5,3 /* less than 8 bytes? */
+ beq .Lcopy_tail_word
+
+ /*
+ * If only halfword aligned, align to a double word. Since odd
+ * aligned addresses should be rare and they would require more
+ * work to calculate the correct checksum, we ignore that case
+ * and take the potential slowdown of unaligned loads.
+ *
+ * If the source and destination are relatively unaligned we only
+ * align the source. This keeps things simple.
+ */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
+ beq .Lcopy_aligned
+
+ li r9,4
+ sub r6,r9,r6
+ mtctr r6
+
+1:
+srcnr; lhz r6,0(r3) /* align to doubleword */
+ subi r5,r5,2
+ addi r3,r3,2
+ adde r0,r0,r6
+dstnr; sth r6,0(r4)
+ addi r4,r4,2
+ bdnz 1b
+
+.Lcopy_aligned:
+ /*
+ * We unroll the loop such that each iteration is 64 bytes with an
+ * entry and exit limb of 64 bytes, meaning a minimum size of
+ * 128 bytes.
+ */
+ srdi. r6,r5,7
+ beq .Lcopy_tail_doublewords /* len < 128 */
+
+ srdi r6,r5,6
+ subi r6,r6,1
+ mtctr r6
+
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+source; ld r6,0(r3)
+source; ld r9,8(r3)
+
+source; ld r10,16(r3)
+source; ld r11,24(r3)
+
+ /*
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+ * because of the XER dependency. This means the fastest this loop can
+ * go is 16 cycles per iteration. The scheduling of the loop below has
+ * been shown to hit this on both POWER6 and POWER7.
+ */
+ .align 5
+2:
+ adde r0,r0,r6
+source; ld r12,32(r3)
+source; ld r14,40(r3)
+
+ adde r0,r0,r9
+source; ld r15,48(r3)
+source; ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+dest; std r6,0(r4)
+dest; std r9,8(r4)
+
+ adde r0,r0,r11
+dest; std r10,16(r4)
+dest; std r11,24(r4)
+
+ adde r0,r0,r12
+dest; std r12,32(r4)
+dest; std r14,40(r4)
+
+ adde r0,r0,r14
+dest; std r15,48(r4)
+dest; std r16,56(r4)
+ addi r4,r4,64
+
+ adde r0,r0,r15
+source; ld r6,0(r3)
+source; ld r9,8(r3)
+
+ adde r0,r0,r16
+source; ld r10,16(r3)
+source; ld r11,24(r3)
+ bdnz 2b
+
+
+ adde r0,r0,r6
+source; ld r12,32(r3)
+source; ld r14,40(r3)
+
+ adde r0,r0,r9
+source; ld r15,48(r3)
+source; ld r16,56(r3)
+ addi r3,r3,64
+
+ adde r0,r0,r10
+dest; std r6,0(r4)
+dest; std r9,8(r4)
+
+ adde r0,r0,r11
+dest; std r10,16(r4)
+dest; std r11,24(r4)
+
+ adde r0,r0,r12
+dest; std r12,32(r4)
+dest; std r14,40(r4)
+
+ adde r0,r0,r14
+dest; std r15,48(r4)
+dest; std r16,56(r4)
+ addi r4,r4,64
+
+ adde r0,r0,r15
+ adde r0,r0,r16
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ andi. r5,r5,63
+
+.Lcopy_tail_doublewords: /* Up to 127 bytes to go */
+ srdi. r6,r5,3
+ beq .Lcopy_tail_word
+
+ mtctr r6
+3:
+srcnr; ld r6,0(r3)
+ addi r3,r3,8
+ adde r0,r0,r6
+dstnr; std r6,0(r4)
+ addi r4,r4,8
+ bdnz 3b
+
+ andi. r5,r5,7
+
+.Lcopy_tail_word: /* Up to 7 bytes to go */
+ srdi. r6,r5,2
+ beq .Lcopy_tail_halfword
+
+srcnr; lwz r6,0(r3)
+ addi r3,r3,4
+ adde r0,r0,r6
+dstnr; stw r6,0(r4)
+ addi r4,r4,4
+ subi r5,r5,4
+
+.Lcopy_tail_halfword: /* Up to 3 bytes to go */
+ srdi. r6,r5,1
+ beq .Lcopy_tail_byte
+
+srcnr; lhz r6,0(r3)
+ addi r3,r3,2
+ adde r0,r0,r6
+dstnr; sth r6,0(r4)
+ addi r4,r4,2
+ subi r5,r5,2
+
+.Lcopy_tail_byte: /* Up to 1 byte to go */
+ andi. r6,r5,1
+ beq .Lcopy_finish
+
+srcnr; lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
+ sldi r9,r6,8 /* Pad the byte out to 16 bits */
+ adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
+dstnr; stb r6,0(r4)
+
+.Lcopy_finish:
+ addze r0,r0 /* add in final carry */
+ rldicl r4,r0,32,0 /* fold two 32 bit halves together */
+ add r3,r4,r0
+ srdi r3,r3,32
+ blr
+
+.Lerror:
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Lerror_nr:
+ li r3,0
+ blr
+
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ ld r8, 0(r3)
+ ld r9, 8(r3)
+ add r5, r5, r6
+ addc r0, r8, r9
+ ld r10, 0(r4)
+ ld r11, 8(r4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ rotldi r5, r5, 8
+#endif
+ adde r0, r0, r10
+ add r5, r5, r7
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotldi r3, r0, 32 /* fold two 32 bit halves together */
+ add r3, r0, r3
+ srdi r0, r3, 32
+ rotlwi r3, r0, 16 /* fold two 16 bit halves together */
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
+ blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
new file mode 100644
index 000000000..1a14c8780
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/checksum.h>
+#include <linux/uaccess.h>
+
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
+ int len)
+{
+ __wsum csum;
+
+ if (unlikely(!user_read_access_begin(src, len)))
+ return 0;
+
+ csum = csum_partial_copy_generic((void __force *)src, dst, len);
+
+ user_read_access_end();
+ return csum;
+}
+
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
+{
+ __wsum csum;
+
+ if (unlikely(!user_write_access_begin(dst, len)))
+ return 0;
+
+ csum = csum_partial_copy_generic(src, (void __force *)dst, len);
+
+ user_write_access_end();
+ return csum;
+}
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
new file mode 100644
index 000000000..b00112d7a
--- /dev/null
+++ b/arch/powerpc/lib/code-patching.c
@@ -0,0 +1,500 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/cpuhotplug.h>
+#include <linux/uaccess.h>
+#include <linux/jump_label.h>
+
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/page.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
+{
+ if (!ppc_inst_prefixed(instr)) {
+ u32 val = ppc_inst_val(instr);
+
+ __put_kernel_nofault(patch_addr, &val, u32, failed);
+ } else {
+ u64 val = ppc_inst_as_ulong(instr);
+
+ __put_kernel_nofault(patch_addr, &val, u64, failed);
+ }
+
+ asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+ "r" (exec_addr));
+
+ return 0;
+
+failed:
+ return -EPERM;
+}
+
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ return __patch_instruction(addr, instr, addr);
+}
+
+struct patch_context {
+ union {
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ };
+ unsigned long addr;
+ pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
+
+static int map_patch_area(void *addr, unsigned long text_poke_addr);
+static void unmap_patch_area(unsigned long addr);
+
+static bool mm_patch_enabled(void)
+{
+ return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *orig_mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+ WARN_ON(!mm_is_thread_local(temp_mm));
+
+ suspend_breakpoints();
+ return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+ struct mm_struct *orig_mm)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(temp_mm, orig_mm, current);
+ restore_breakpoints();
+}
+
+static int text_area_cpu_up(unsigned int cpu)
+{
+ struct vm_struct *area;
+ unsigned long addr;
+ int err;
+
+ area = get_vm_area(PAGE_SIZE, VM_ALLOC);
+ if (!area) {
+ WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+ cpu);
+ return -1;
+ }
+
+ // Map/unmap the area to ensure all page tables are pre-allocated
+ addr = (unsigned long)area->addr;
+ err = map_patch_area(empty_zero_page, addr);
+ if (err)
+ return err;
+
+ unmap_patch_area(addr);
+
+ this_cpu_write(cpu_patching_context.area, area);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
+
+ return 0;
+}
+
+static int text_area_cpu_down(unsigned int cpu)
+{
+ free_vm_area(this_cpu_read(cpu_patching_context.area));
+ this_cpu_write(cpu_patching_context.area, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
+ return 0;
+}
+
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm);
+ free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+ mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+ struct mm_struct *mm;
+ unsigned long addr;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ mm = mm_alloc();
+ if (WARN_ON(!mm))
+ goto fail_no_mm;
+
+ /*
+ * Choose a random page-aligned address from the interval
+ * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+ * The lower address bound is PAGE_SIZE to avoid the zero-page.
+ */
+ addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+ /*
+ * PTE allocation uses GFP_KERNEL which means we need to
+ * pre-allocate the PTE here because we cannot do the
+ * allocation during patching when IRQs are disabled.
+ *
+ * Using get_locked_pte() to avoid open coding, the lock
+ * is unnecessary.
+ */
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto fail_no_pte;
+ pte_unmap_unlock(pte, ptl);
+
+ this_cpu_write(cpu_patching_context.mm, mm);
+ this_cpu_write(cpu_patching_context.addr, addr);
+
+ return 0;
+
+fail_no_pte:
+ put_patching_mm(mm, addr);
+fail_no_mm:
+ return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
+{
+ put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+ this_cpu_read(cpu_patching_context.addr));
+
+ this_cpu_write(cpu_patching_context.mm, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+
+ return 0;
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
+
+void __init poking_init(void)
+{
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ return;
+
+ if (mm_patch_enabled())
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke_mm:online",
+ text_area_cpu_up_mm,
+ text_area_cpu_down_mm);
+ else
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down);
+
+ /* cpuhp_setup_state returns >= 0 on success */
+ if (WARN_ON(ret < 0))
+ return;
+
+ static_branch_enable(&poking_init_done);
+}
+
+static unsigned long get_patch_pfn(void *addr)
+{
+ if (IS_ENABLED(CONFIG_MODULES) && is_vmalloc_or_module_addr(addr))
+ return vmalloc_to_pfn(addr);
+ else
+ return __pa_symbol(addr) >> PAGE_SHIFT;
+}
+
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
+{
+ unsigned long pfn = get_patch_pfn(addr);
+
+ return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
+}
+
+static void unmap_patch_area(unsigned long addr)
+{
+ pte_t *ptep;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ p4d_t *p4dp;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgdp)))
+ return;
+
+ p4dp = p4d_offset(pgdp, addr);
+ if (WARN_ON(p4d_none(*p4dp)))
+ return;
+
+ pudp = pud_offset(p4dp, addr);
+ if (WARN_ON(pud_none(*pudp)))
+ return;
+
+ pmdp = pmd_offset(pudp, addr);
+ if (WARN_ON(pmd_none(*pmdp)))
+ return;
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (WARN_ON(pte_none(*ptep)))
+ return;
+
+ /*
+ * In hash, pte_clear flushes the tlb, in radix, we have to
+ */
+ pte_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+}
+
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+ struct mm_struct *patching_mm;
+ struct mm_struct *orig_mm;
+ spinlock_t *ptl;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync": : :"memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ /* hwsync performed by __patch_instruction (sync) if successful */
+ if (err)
+ mb(); /* sync */
+
+ /* context synchronisation performed by __patch_instruction (isync or exception) */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
+}
+
+static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+
+ err = __patch_instruction(addr, instr, patch_addr);
+
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
+}
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ int err;
+ unsigned long flags;
+
+ /*
+ * During early early boot patch_instruction is called
+ * when text_poke_area is not ready, but we still need
+ * to allow patching. We just do the plain old patching
+ */
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+ !static_branch_likely(&poking_init_done))
+ return raw_patch_instruction(addr, instr);
+
+ local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_instruction_mm(addr, instr);
+ else
+ err = __do_patch_instruction(addr, instr);
+ local_irq_restore(flags);
+
+ return err;
+}
+NOKPROBE_SYMBOL(patch_instruction);
+
+int patch_branch(u32 *addr, unsigned long target, int flags)
+{
+ ppc_inst_t instr;
+
+ if (create_branch(&instr, addr, target, flags))
+ return -ERANGE;
+
+ return patch_instruction(addr, instr);
+}
+
+/*
+ * Helper to check if a given instruction is a conditional branch
+ * Derived from the conditional checks in analyse_instr()
+ */
+bool is_conditional_branch(ppc_inst_t instr)
+{
+ unsigned int opcode = ppc_inst_primary_opcode(instr);
+
+ if (opcode == 16) /* bc, bca, bcl, bcla */
+ return true;
+ if (opcode == 19) {
+ switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
+ case 16: /* bclr, bclrl */
+ case 528: /* bcctr, bcctrl */
+ case 560: /* bctar, bctarl */
+ return true;
+ }
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(is_conditional_branch);
+
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags)
+{
+ long offset;
+
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (!is_offset_in_cond_branch_range(offset))
+ return 1;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
+
+ return 0;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr)
+{
+ if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
+ return 0;
+
+ return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
+}
+
+int instr_is_relative_link_branch(ppc_inst_t instr)
+{
+ return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
+}
+
+static unsigned long branch_iform_target(const u32 *instr)
+{
+ signed long imm;
+
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x2000000)
+ imm -= 0x4000000;
+
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+static unsigned long branch_bform_target(const u32 *instr)
+{
+ signed long imm;
+
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x8000)
+ imm -= 0x10000;
+
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+unsigned long branch_target(const u32 *instr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)))
+ return branch_iform_target(instr);
+ else if (instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_bform_target(instr);
+
+ return 0;
+}
+
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
+{
+ unsigned long target;
+ target = branch_target(src);
+
+ if (instr_is_branch_iform(ppc_inst_read(src)))
+ return create_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
+ else if (instr_is_branch_bform(ppc_inst_read(src)))
+ return create_cond_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
+
+ return 1;
+}
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
new file mode 100644
index 000000000..933b685e7
--- /dev/null
+++ b/arch/powerpc/lib/copy_32.S
@@ -0,0 +1,515 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Memory copy functions for 32-bit PowerPC.
+ *
+ * Copyright (C) 1996-2005 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
+
+#define COPY_16_BYTES \
+ lwz r7,4(r4); \
+ lwz r8,8(r4); \
+ lwz r9,12(r4); \
+ lwzu r10,16(r4); \
+ stw r7,4(r6); \
+ stw r8,8(r6); \
+ stw r9,12(r6); \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+8 ## n ## 7: \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n) \
+9 ## n ## 0: \
+ addi r5,r5,-(16 * n); \
+ b 104f; \
+9 ## n ## 1: \
+ addi r5,r5,-(16 * n); \
+ b 105f; \
+ EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+#ifndef CONFIG_KASAN
+_GLOBAL(memset16)
+ rlwinm. r0 ,r5, 31, 1, 31
+ addi r6, r3, -4
+ beq- 2f
+ rlwimi r4 ,r4 ,16 ,0 ,15
+ mtctr r0
+1: stwu r4, 4(r6)
+ bdnz 1b
+2: andi. r0, r5, 1
+ beqlr
+ sth r4, 4(r6)
+ blr
+EXPORT_SYMBOL(memset16)
+#endif
+
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable. -- paulus
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore skip the optimised bloc that uses dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
+ */
+_GLOBAL_KASAN(memset)
+ cmplwi 0,r5,4
+ blt 7f
+
+ rlwimi r4,r4,8,16,23
+ rlwimi r4,r4,16,0,15
+
+ stw r4,0(r3)
+ beqlr
+ andi. r0,r3,3
+ add r5,r0,r5
+ subf r6,r0,r3
+ cmplwi 0,r4,0
+ /*
+ * Skip optimised bloc until cache is enabled. Will be replaced
+ * by 'bne' during boot to use normal procedure if r4 is not zero
+ */
+5: b 2f
+ patch_site 5b, patch__memset_nocache
+
+ clrlwi r7,r6,32-LG_CACHELINE_BYTES
+ add r8,r7,r5
+ srwi r9,r8,LG_CACHELINE_BYTES
+ addic. r9,r9,-1 /* total number of complete cachelines */
+ ble 2f
+ xori r0,r7,CACHELINE_MASK & ~3
+ srwi. r0,r0,2
+ beq 3f
+ mtctr r0
+4: stwu r4,4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7,4
+10: dcbz r7,r6
+ addi r6,r6,CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r5,r8,32-LG_CACHELINE_BYTES
+ addi r5,r5,4
+
+2: srwi r0,r5,2
+ mtctr r0
+ bdz 6f
+1: stwu r4,4(r6)
+ bdnz 1b
+6: andi. r5,r5,3
+ beqlr
+ mtctr r5
+ addi r6,r6,3
+8: stbu r4,1(r6)
+ bdnz 8b
+ blr
+
+7: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+9: stbu r4,1(r6)
+ bdnz 9b
+ blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
+
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic. This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
+ */
+_GLOBAL_KASAN(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ /* fall through */
+
+_GLOBAL_KASAN(memcpy)
+1: b generic_memcpy
+ patch_site 1b, patch__memcpy_nocache
+
+ add r7,r3,r5 /* test if the src & dst overlap */
+ add r8,r4,r5
+ cmplw 0,r4,r7
+ cmplw 1,r3,r8
+ crand 0,0,4 /* cr0.lt &= cr1.lt */
+ blt generic_memcpy /* if regions overlap */
+
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ subf r5,r0,r5
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+ addi r4,r4,1
+ addi r6,r6,1
+ stb r9,3(r6)
+ bdnz 70b
+61: srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+ stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ mtctr r0
+ beq 63f
+53:
+ dcbz r11,r6
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+ COPY_16_BYTES
+ COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+#endif
+#endif
+#endif
+ bdnz 53b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+ stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+ addi r4,r4,3
+ addi r6,r6,3
+40: lbzu r0,1(r4)
+ stbu r0,1(r6)
+ bdnz 40b
+65: blr
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
+
+generic_memcpy:
+ srwi. r7,r5,3
+ addi r6,r3,-4
+ addi r4,r4,-4
+ beq 2f /* if less than 8 bytes to do */
+ andi. r0,r6,3 /* get dest word aligned */
+ mtctr r7
+ bne 5f
+1: lwz r7,4(r4)
+ lwzu r8,8(r4)
+ stw r7,4(r6)
+ stwu r8,8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,4(r4)
+ addi r5,r5,-4
+ stwu r0,4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r4,r4,3
+ addi r6,r6,3
+4: lbzu r0,1(r4)
+ stbu r0,1(r6)
+ bdnz 4b
+ blr
+5: subfic r0,r0,4
+ mtctr r0
+6: lbz r7,4(r4)
+ addi r4,r4,1
+ stb r7,4(r6)
+ addi r6,r6,1
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(backwards_memcpy)
+ rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
+ add r6,r3,r5
+ add r4,r4,r5
+ beq 2f
+ andi. r0,r6,3
+ mtctr r7
+ bne 5f
+1: lwz r7,-4(r4)
+ lwzu r8,-8(r4)
+ stw r7,-4(r6)
+ stwu r8,-8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,-4(r4)
+ subi r5,r5,4
+ stwu r0,-4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+4: lbzu r0,-1(r4)
+ stbu r0,-1(r6)
+ bdnz 4b
+ blr
+5: mtctr r0
+6: lbzu r7,-1(r4)
+ stbu r7,-1(r6)
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+_GLOBAL(__copy_tofrom_user)
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+71: stb r9,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 70b
+61: subf r5,r0,r5
+ srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+73: stwu r9,4(r6)
+ bdnz 72b
+
+ EX_TABLE(70b,100f)
+ EX_TABLE(71b,101f)
+ EX_TABLE(72b,102f)
+ EX_TABLE(73b,103f)
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ beq 63f
+
+ /* Here we decide how far ahead to prefetch the source */
+ li r3,4
+ cmpwi r0,1
+ li r7,0
+ ble 114f
+ li r7,1
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ ble 112f
+ li r7,MAX_COPY_PREFETCH
+112: mtctr r7
+111: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 111b
+#else
+ dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114: subf r8,r7,r0
+ mr r0,r7
+ mtctr r8
+
+53: dcbt r3,r4
+54: dcbz r11,r6
+ EX_TABLE(54b,105f)
+/* the main body of the cacheline loop */
+ COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_BYTES >= 32
+ COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_BYTES >= 64
+ COPY_16_BYTES_WITHEX(2)
+ COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_BYTES >= 128
+ COPY_16_BYTES_WITHEX(4)
+ COPY_16_BYTES_WITHEX(5)
+ COPY_16_BYTES_WITHEX(6)
+ COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+ bdnz 53b
+ cmpwi r0,0
+ li r3,4
+ li r7,0
+ bne 114b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+31: stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+40: lbz r0,4(r4)
+41: stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 40b
+65: li r3,0
+ blr
+
+/* read fault, initial single-byte copy */
+100: li r9,0
+ b 90f
+/* write fault, initial single-byte copy */
+101: li r9,1
+90: subf r5,r8,r5
+ li r3,0
+ b 99f
+/* read fault, initial word copy */
+102: li r9,0
+ b 91f
+/* write fault, initial word copy */
+103: li r9,1
+91: li r3,2
+ b 99f
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * 104f (if in read part) or 105f (if in write part), after updating r5
+ */
+ COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_BYTES >= 32
+ COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_BYTES >= 64
+ COPY_16_BYTES_EXCODE(2)
+ COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_BYTES >= 128
+ COPY_16_BYTES_EXCODE(4)
+ COPY_16_BYTES_EXCODE(5)
+ COPY_16_BYTES_EXCODE(6)
+ COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+/* read fault in cacheline loop */
+104: li r9,0
+ b 92f
+/* fault on dcbz (effectively a write fault) */
+/* or write fault in cacheline loop */
+105: li r9,1
+92: li r3,LG_CACHELINE_BYTES
+ mfctr r8
+ add r0,r0,r8
+ b 106f
+/* read fault in final word loop */
+108: li r9,0
+ b 93f
+/* write fault in final word loop */
+109: li r9,1
+93: andi. r5,r5,3
+ li r3,2
+ b 99f
+/* read fault in final byte loop */
+110: li r9,0
+ b 94f
+/* write fault in final byte loop */
+111: li r9,1
+94: li r5,0
+ li r3,0
+/*
+ * At this stage the number of bytes not copied is
+ * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
+ */
+99: mfctr r0
+106: slw r3,r0,r3
+ add. r3,r3,r5
+ beq 120f /* shouldn't happen */
+ cmpwi 0,r9,0
+ bne 120f
+/* for a read fault, first try to continue the copy one byte at a time */
+ mtctr r3
+130: lbz r0,4(r4)
+131: stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 130b
+/* then clear out the destination: r3 bytes starting at 4(r6) */
+132: mfctr r3
+120: blr
+
+ EX_TABLE(30b,108b)
+ EX_TABLE(31b,109b)
+ EX_TABLE(40b,110b)
+ EX_TABLE(41b,111b)
+ EX_TABLE(130b,132b)
+ EX_TABLE(131b,120b)
+
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
new file mode 100644
index 000000000..bf1014b28
--- /dev/null
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(copy_mc_generic)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
new file mode 100644
index 000000000..f33a2e608
--- /dev/null
+++ b/arch/powerpc/lib/copypage_64.S
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2008 Mark Nelson, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+
+_GLOBAL_TOC(copy_page)
+BEGIN_FTR_SECTION
+ lis r5,PAGE_SIZE@h
+FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
+ b copypage_power7
+#endif
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+ ori r5,r5,PAGE_SIZE@l
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /*
+ * Hack for toolchain - prefixed instructions cause label difference to
+ * be non-constant even if 8 byte alignment is known, so they can not
+ * be put in FTR sections.
+ */
+ LOAD_REG_ADDR(r10, ppc64_caches)
+BEGIN_FTR_SECTION
+#else
+BEGIN_FTR_SECTION
+ LOAD_REG_ADDR(r10, ppc64_caches)
+#endif
+ lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */
+ lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */
+ li r9,0
+ srd r8,r5,r11
+
+ mtctr r8
+.Lsetup:
+ dcbt r9,r4
+ dcbz r9,r3
+ add r9,r9,r12
+ bdnz .Lsetup
+END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
+ addi r3,r3,-8
+ srdi r8,r5,7 /* page is copied in 128 byte strides */
+ addi r8,r8,-1 /* one stride copied outside loop */
+
+ mtctr r8
+
+ ld r5,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ldu r8,24(r4)
+1: std r5,8(r3)
+ std r6,16(r3)
+ ld r9,8(r4)
+ ld r10,16(r4)
+ std r7,24(r3)
+ std r8,32(r3)
+ ld r11,24(r4)
+ ld r12,32(r4)
+ std r9,40(r3)
+ std r10,48(r3)
+ ld r5,40(r4)
+ ld r6,48(r4)
+ std r11,56(r3)
+ std r12,64(r3)
+ ld r7,56(r4)
+ ld r8,64(r4)
+ std r5,72(r3)
+ std r6,80(r3)
+ ld r9,72(r4)
+ ld r10,80(r4)
+ std r7,88(r3)
+ std r8,96(r3)
+ ld r11,88(r4)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ ld r5,104(r4)
+ ld r6,112(r4)
+ std r11,120(r3)
+ stdu r12,128(r3)
+ ld r7,120(r4)
+ ldu r8,128(r4)
+ bdnz 1b
+
+ std r5,8(r3)
+ std r6,16(r3)
+ ld r9,8(r4)
+ ld r10,16(r4)
+ std r7,24(r3)
+ std r8,32(r3)
+ ld r11,24(r4)
+ ld r12,32(r4)
+ std r9,40(r3)
+ std r10,48(r3)
+ ld r5,40(r4)
+ ld r6,48(r4)
+ std r11,56(r3)
+ std r12,64(r3)
+ ld r7,56(r4)
+ ld r8,64(r4)
+ std r5,72(r3)
+ std r6,80(r3)
+ ld r9,72(r4)
+ ld r10,80(r4)
+ std r7,88(r3)
+ std r8,96(r3)
+ ld r11,88(r4)
+ ld r12,96(r4)
+ std r9,104(r3)
+ std r10,112(r3)
+ std r11,120(r3)
+ std r12,128(r3)
+ blr
+EXPORT_SYMBOL(copy_page)
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
new file mode 100644
index 000000000..a783973f1
--- /dev/null
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copypage_power7)
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side. Since source and destination are page
+ * aligned we don't need to clear the bottom 7 bits of either
+ * address.
+ */
+ ori r9,r3,1 /* stream=1 => to */
+
+#ifdef CONFIG_PPC_64K_PAGES
+ lis r7,0x0E01 /* depth=7
+ * units/cachelines=512 */
+#else
+ lis r7,0x0E00 /* depth=7 */
+ ori r7,r7,0x1000 /* units/cachelines=32 */
+#endif
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+ /* setup read stream 0 */
+ dcbt 0,r4,0b01000 /* addr from */
+ dcbt 0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst 0,r9,0b01000 /* addr to */
+ dcbtst 0,r10,0b01010 /* length and depth to */
+ eieio
+ dcbt 0,r8,0b01010 /* all streams GO */
+
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl CFUNC(enter_vmx_ops)
+ cmpwi r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ mtlr r0
+
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ beq .Lnonvmx_copy
+
+ addi r1,r1,STACKFRAMESIZE
+
+ li r6,16
+ li r7,32
+ li r8,48
+ li r9,64
+ li r10,80
+ li r11,96
+ li r12,112
+
+ .align 5
+1: lvx v7,0,r4
+ lvx v6,r4,r6
+ lvx v5,r4,r7
+ lvx v4,r4,r8
+ lvx v3,r4,r9
+ lvx v2,r4,r10
+ lvx v1,r4,r11
+ lvx v0,r4,r12
+ addi r4,r4,128
+ stvx v7,0,r3
+ stvx v6,r3,r6
+ stvx v5,r3,r7
+ stvx v4,r3,r8
+ stvx v3,r3,r9
+ stvx v2,r3,r10
+ stvx v1,r3,r11
+ stvx v0,r3,r12
+ addi r3,r3,128
+ bdnz 1b
+
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
+
+#else
+ li r0,(PAGE_SIZE/128)
+ mtctr r0
+
+ stdu r1,-STACKFRAMESIZE(r1)
+#endif
+
+.Lnonvmx_copy:
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+
+1: ld r0,0(r4)
+ ld r5,8(r4)
+ ld r6,16(r4)
+ ld r7,24(r4)
+ ld r8,32(r4)
+ ld r9,40(r4)
+ ld r10,48(r4)
+ ld r11,56(r4)
+ ld r12,64(r4)
+ ld r14,72(r4)
+ ld r15,80(r4)
+ ld r16,88(r4)
+ ld r17,96(r4)
+ ld r18,104(r4)
+ ld r19,112(r4)
+ ld r20,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r5,8(r3)
+ std r6,16(r3)
+ std r7,24(r3)
+ std r8,32(r3)
+ std r9,40(r3)
+ std r10,48(r3)
+ std r11,56(r3)
+ std r12,64(r3)
+ std r14,72(r3)
+ std r15,80(r3)
+ std r16,88(r3)
+ std r17,96(r3)
+ std r18,104(r3)
+ std r19,112(r3)
+ std r20,120(r3)
+ addi r3,r3,128
+ bdnz 1b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ addi r1,r1,STACKFRAMESIZE
+ blr
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
new file mode 100644
index 000000000..9af969d2c
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -0,0 +1,564 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE 0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define sLd sld /* Shift towards low-numbered address. */
+#define sHd srd /* Shift towards high-numbered address. */
+#else
+#define sLd srd /* Shift towards low-numbered address. */
+#define sHd sld /* Shift towards high-numbered address. */
+#endif
+
+/*
+ * These macros are used to generate exception table entries.
+ * The exception handlers below use the original arguments
+ * (stored on the stack) and the point where we're up to in
+ * the destination buffer, i.e. the address of the first
+ * unmodified byte. Generally r3 points into the destination
+ * buffer, but the first unmodified byte is at a variable
+ * offset from r3. In the code below, the symbol r3_offset
+ * is set to indicate the current offset at each point in
+ * the code. This offset is then used as a negative offset
+ * from the exception handler code, and those instructions
+ * before the exception handlers are addi instructions that
+ * adjust r3 to point to the correct place.
+ */
+ .macro lex /* exception handler for load */
+100: EX_TABLE(100b, .Lld_exc - r3_offset)
+ .endm
+
+ .macro stex /* exception handler for store */
+100: EX_TABLE(100b, .Lst_exc - r3_offset)
+ .endm
+
+ .align 7
+_GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
+ b __copy_tofrom_user_power7
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
+_GLOBAL(__copy_tofrom_user_base)
+ /* first check for a 4kB copy on a 4kB boundary */
+ cmpldi cr1,r5,16
+ cmpdi cr6,r5,4096
+ or r0,r3,r4
+ neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
+ andi. r0,r0,4095
+ std r3,-24(r1)
+ crand cr0*4+2,cr0*4+2,cr6*4+2
+ std r4,-16(r1)
+ std r5,-8(r1)
+ dcbt 0,r4
+ beq .Lcopy_page_4K
+ andi. r6,r6,7
+ PPC_MTOCRF(0x01,r5)
+ blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ * cleared.
+ * At the time of writing the only CPU that has this combination of bits
+ * set is Power6.
+ */
+test_feature = (SELFTEST_CASE == 1)
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
+ bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
+.Ldst_aligned:
+ addi r3,r3,-16
+r3_offset = 16
+test_feature = (SELFTEST_CASE == 0)
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
+ bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
+ blt cr1,.Ldo_tail /* if < 16 bytes to copy */
+ srdi r0,r5,5
+ cmpdi cr1,r0,0
+lex; ld r7,0(r4)
+lex; ld r6,8(r4)
+ addi r4,r4,16
+ mtctr r0
+ andi. r0,r5,0x10
+ beq 22f
+ addi r3,r3,16
+r3_offset = 0
+ addi r4,r4,-16
+ mr r9,r7
+ mr r8,r6
+ beq cr1,72f
+21:
+lex; ld r7,16(r4)
+lex; ld r6,24(r4)
+ addi r4,r4,32
+stex; std r9,0(r3)
+r3_offset = 8
+stex; std r8,8(r3)
+r3_offset = 16
+22:
+lex; ld r9,0(r4)
+lex; ld r8,8(r4)
+stex; std r7,16(r3)
+r3_offset = 24
+stex; std r6,24(r3)
+ addi r3,r3,32
+r3_offset = 0
+ bdnz 21b
+72:
+stex; std r9,0(r3)
+r3_offset = 8
+stex; std r8,8(r3)
+r3_offset = 16
+ andi. r5,r5,0xf
+ beq+ 3f
+ addi r4,r4,16
+.Ldo_tail:
+ addi r3,r3,16
+r3_offset = 0
+ bf cr7*4+0,246f
+lex; ld r9,0(r4)
+ addi r4,r4,8
+stex; std r9,0(r3)
+ addi r3,r3,8
+246: bf cr7*4+1,1f
+lex; lwz r9,0(r4)
+ addi r4,r4,4
+stex; stw r9,0(r3)
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+lex; lhz r9,0(r4)
+ addi r4,r4,2
+stex; sth r9,0(r3)
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+lex; lbz r9,0(r4)
+stex; stb r9,0(r3)
+3: li r3,0
+ blr
+
+.Lsrc_unaligned:
+r3_offset = 16
+ srdi r6,r5,3
+ addi r5,r5,-16
+ subf r4,r0,r4
+ srdi r7,r5,4
+ sldi r10,r0,3
+ cmpldi cr6,r6,3
+ andi. r5,r5,7
+ mtctr r7
+ subfic r11,r10,64
+ add r5,r5,r0
+ bt cr7*4+0,28f
+
+lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
+lex; ld r0,8(r4)
+ sLd r6,r9,r10
+lex; ldu r9,16(r4)
+ sHd r7,r0,r11
+ sLd r8,r0,r10
+ or r7,r7,r6
+ blt cr6,79f
+lex; ld r0,8(r4)
+ b 2f
+
+28:
+lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
+lex; ldu r9,8(r4)
+ sLd r8,r0,r10
+ addi r3,r3,-8
+r3_offset = 24
+ blt cr6,5f
+lex; ld r0,8(r4)
+ sHd r12,r9,r11
+ sLd r6,r9,r10
+lex; ldu r9,16(r4)
+ or r12,r8,r12
+ sHd r7,r0,r11
+ sLd r8,r0,r10
+ addi r3,r3,16
+r3_offset = 8
+ beq cr6,78f
+
+1: or r7,r7,r6
+lex; ld r0,8(r4)
+stex; std r12,8(r3)
+r3_offset = 16
+2: sHd r12,r9,r11
+ sLd r6,r9,r10
+lex; ldu r9,16(r4)
+ or r12,r8,r12
+stex; stdu r7,16(r3)
+r3_offset = 8
+ sHd r7,r0,r11
+ sLd r8,r0,r10
+ bdnz 1b
+
+78:
+stex; std r12,8(r3)
+r3_offset = 16
+ or r7,r7,r6
+79:
+stex; std r7,16(r3)
+r3_offset = 24
+5: sHd r12,r9,r11
+ or r12,r8,r12
+stex; std r12,24(r3)
+r3_offset = 32
+ bne 6f
+ li r3,0
+ blr
+6: cmpwi cr1,r5,8
+ addi r3,r3,32
+r3_offset = 0
+ sLd r9,r9,r10
+ ble cr1,7f
+lex; ld r0,8(r4)
+ sHd r7,r0,r11
+ or r9,r7,r9
+7:
+ bf cr7*4+1,1f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,32
+#endif
+stex; stw r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,32
+#endif
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,16
+#endif
+stex; sth r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,16
+#endif
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+#ifdef __BIG_ENDIAN__
+ rotldi r9,r9,8
+#endif
+stex; stb r9,0(r3)
+#ifdef __LITTLE_ENDIAN__
+ rotrdi r9,r9,8
+#endif
+3: li r3,0
+ blr
+
+.Ldst_unaligned:
+r3_offset = 0
+ PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
+ subf r5,r6,r5
+ li r7,0
+ cmpldi cr1,r5,16
+ bf cr7*4+3,1f
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lbz r0,0(r4)
+100: EX_TABLE(100b, .Lst_exc_r7)
+ stb r0,0(r3)
+ addi r7,r7,1
+1: bf cr7*4+2,2f
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lhzx r0,r7,r4
+100: EX_TABLE(100b, .Lst_exc_r7)
+ sthx r0,r7,r3
+ addi r7,r7,2
+2: bf cr7*4+1,3f
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lwzx r0,r7,r4
+100: EX_TABLE(100b, .Lst_exc_r7)
+ stwx r0,r7,r3
+3: PPC_MTOCRF(0x01,r5)
+ add r4,r6,r4
+ add r3,r6,r3
+ b .Ldst_aligned
+
+.Lshort_copy:
+r3_offset = 0
+ bf cr7*4+0,1f
+lex; lwz r0,0(r4)
+lex; lwz r9,4(r4)
+ addi r4,r4,8
+stex; stw r0,0(r3)
+stex; stw r9,4(r3)
+ addi r3,r3,8
+1: bf cr7*4+1,2f
+lex; lwz r0,0(r4)
+ addi r4,r4,4
+stex; stw r0,0(r3)
+ addi r3,r3,4
+2: bf cr7*4+2,3f
+lex; lhz r0,0(r4)
+ addi r4,r4,2
+stex; sth r0,0(r3)
+ addi r3,r3,2
+3: bf cr7*4+3,4f
+lex; lbz r0,0(r4)
+stex; stb r0,0(r3)
+4: li r3,0
+ blr
+
+/*
+ * exception handlers follow
+ * we have to return the number of bytes not copied
+ * for an exception on a load, we set the rest of the destination to 0
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lld_exc - r3_offset as the handler address.
+ */
+
+.Lld_exc_r7:
+ add r3,r3,r7
+ b .Lld_exc
+
+ /* adjust by 24 */
+ addi r3,r3,8
+ nop
+ /* adjust by 16 */
+ addi r3,r3,8
+ nop
+ /* adjust by 8 */
+ addi r3,r3,8
+ nop
+
+/*
+ * Here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination. We use the original arguments
+ * and r3 to work out how much wasn't copied. Since we load some
+ * distance ahead of the stores, we continue copying byte-by-byte until
+ * we hit the load fault again in order to copy as much as possible.
+ */
+.Lld_exc:
+ ld r6,-24(r1)
+ ld r4,-16(r1)
+ ld r5,-8(r1)
+ subf r6,r6,r3
+ add r4,r4,r6
+ subf r5,r6,r5 /* #bytes left to go */
+
+/*
+ * first see if we can copy any more bytes before hitting another exception
+ */
+ mtctr r5
+r3_offset = 0
+100: EX_TABLE(100b, .Ldone)
+43: lbz r0,0(r4)
+ addi r4,r4,1
+stex; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 43b
+ li r3,0 /* huh? all copied successfully this time? */
+ blr
+
+/*
+ * here we have trapped again, amount remaining is in ctr.
+ */
+.Ldone:
+ mfctr r3
+ blr
+
+/*
+ * exception handlers for stores: we need to work out how many bytes
+ * weren't copied, and we may need to copy some more.
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lst_exc - r3_offset as the handler address.
+ */
+.Lst_exc_r7:
+ add r3,r3,r7
+ b .Lst_exc
+
+ /* adjust by 24 */
+ addi r3,r3,8
+ nop
+ /* adjust by 16 */
+ addi r3,r3,8
+ nop
+ /* adjust by 8 */
+ addi r3,r3,4
+ /* adjust by 4 */
+ addi r3,r3,4
+.Lst_exc:
+ ld r6,-24(r1) /* original destination pointer */
+ ld r4,-16(r1) /* original source pointer */
+ ld r5,-8(r1) /* original number of bytes */
+ add r7,r6,r5
+ /*
+ * If the destination pointer isn't 8-byte aligned,
+ * we may have got the exception as a result of a
+ * store that overlapped a page boundary, so we may be
+ * able to copy a few more bytes.
+ */
+17: andi. r0,r3,7
+ beq 19f
+ subf r8,r6,r3 /* #bytes copied */
+100: EX_TABLE(100b,19f)
+ lbzx r0,r8,r4
+100: EX_TABLE(100b,19f)
+ stb r0,0(r3)
+ addi r3,r3,1
+ cmpld r3,r7
+ blt 17b
+19: subf r3,r3,r7 /* #bytes not copied in r3 */
+ blr
+
+/*
+ * Routine to copy a whole page of data, optimized for POWER4.
+ * On POWER4 it is more than 50% faster than the simple loop
+ * above (following the .Ldst_aligned label).
+ */
+ .macro exc
+100: EX_TABLE(100b, .Labort)
+ .endm
+.Lcopy_page_4K:
+ std r31,-32(1)
+ std r30,-40(1)
+ std r29,-48(1)
+ std r28,-56(1)
+ std r27,-64(1)
+ std r26,-72(1)
+ std r25,-80(1)
+ std r24,-88(1)
+ std r23,-96(1)
+ std r22,-104(1)
+ std r21,-112(1)
+ std r20,-120(1)
+ li r5,4096/32 - 1
+ addi r3,r3,-8
+ li r0,5
+0: addi r5,r5,-24
+ mtctr r0
+exc; ld r22,640(4)
+exc; ld r21,512(4)
+exc; ld r20,384(4)
+exc; ld r11,256(4)
+exc; ld r9,128(4)
+exc; ld r7,0(4)
+exc; ld r25,648(4)
+exc; ld r24,520(4)
+exc; ld r23,392(4)
+exc; ld r10,264(4)
+exc; ld r8,136(4)
+exc; ldu r6,8(4)
+ cmpwi r5,24
+1:
+exc; std r22,648(3)
+exc; std r21,520(3)
+exc; std r20,392(3)
+exc; std r11,264(3)
+exc; std r9,136(3)
+exc; std r7,8(3)
+exc; ld r28,648(4)
+exc; ld r27,520(4)
+exc; ld r26,392(4)
+exc; ld r31,264(4)
+exc; ld r30,136(4)
+exc; ld r29,8(4)
+exc; std r25,656(3)
+exc; std r24,528(3)
+exc; std r23,400(3)
+exc; std r10,272(3)
+exc; std r8,144(3)
+exc; std r6,16(3)
+exc; ld r22,656(4)
+exc; ld r21,528(4)
+exc; ld r20,400(4)
+exc; ld r11,272(4)
+exc; ld r9,144(4)
+exc; ld r7,16(4)
+exc; std r28,664(3)
+exc; std r27,536(3)
+exc; std r26,408(3)
+exc; std r31,280(3)
+exc; std r30,152(3)
+exc; stdu r29,24(3)
+exc; ld r25,664(4)
+exc; ld r24,536(4)
+exc; ld r23,408(4)
+exc; ld r10,280(4)
+exc; ld r8,152(4)
+exc; ldu r6,24(4)
+ bdnz 1b
+exc; std r22,648(3)
+exc; std r21,520(3)
+exc; std r20,392(3)
+exc; std r11,264(3)
+exc; std r9,136(3)
+exc; std r7,8(3)
+ addi r4,r4,640
+ addi r3,r3,648
+ bge 0b
+ mtctr r5
+exc; ld r7,0(4)
+exc; ld r8,8(4)
+exc; ldu r9,16(4)
+3:
+exc; ld r10,8(4)
+exc; std r7,8(3)
+exc; ld r7,16(4)
+exc; std r8,16(3)
+exc; ld r8,24(4)
+exc; std r9,24(3)
+exc; ldu r9,32(4)
+exc; stdu r10,32(3)
+ bdnz 3b
+4:
+exc; ld r10,8(4)
+exc; std r7,8(3)
+exc; std r8,16(3)
+exc; std r9,24(3)
+exc; std r10,32(3)
+9: ld r20,-120(1)
+ ld r21,-112(1)
+ ld r22,-104(1)
+ ld r23,-96(1)
+ ld r24,-88(1)
+ ld r25,-80(1)
+ ld r26,-72(1)
+ ld r27,-64(1)
+ ld r28,-56(1)
+ ld r29,-48(1)
+ ld r30,-40(1)
+ ld r31,-32(1)
+ li r3,0
+ blr
+
+/*
+ * on an exception, reset to the beginning and jump back into the
+ * standard __copy_tofrom_user
+ */
+.Labort:
+ ld r20,-120(1)
+ ld r21,-112(1)
+ ld r22,-104(1)
+ ld r23,-96(1)
+ ld r24,-88(1)
+ ld r25,-80(1)
+ ld r26,-72(1)
+ ld r27,-64(1)
+ ld r28,-56(1)
+ ld r29,-48(1)
+ ld r30,-40(1)
+ ld r31,-32(1)
+ ld r3,-24(r1)
+ ld r4,-16(r1)
+ li r5,4096
+ b .Ldst_aligned
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
new file mode 100644
index 000000000..ac41053c3
--- /dev/null
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -0,0 +1,695 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE 0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+#ifdef CONFIG_ALTIVEC
+ .macro err3
+300:
+ EX_TABLE(300b,.Ldo_err3)
+ .endm
+
+ .macro err4
+400:
+ EX_TABLE(400b,.Ldo_err4)
+ .endm
+
+
+.Ldo_err4:
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+.Ldo_err3:
+ bl CFUNC(exit_vmx_usercopy)
+ ld r0,STACKFRAMESIZE+16(r1)
+ mtlr r0
+ b .Lexit
+#endif /* CONFIG_ALTIVEC */
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+.Lexit:
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+ b __copy_tofrom_user_base
+
+
+_GLOBAL(__copy_tofrom_user_power7)
+ cmpldi r5,16
+ cmpldi cr1,r5,3328
+
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+
+ blt .Lshort_copy
+
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+ bgt cr1,.Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r7,16(r4)
+err2; ld r8,24(r4)
+err2; ld r9,32(r4)
+err2; ld r10,40(r4)
+err2; ld r11,48(r4)
+err2; ld r12,56(r4)
+err2; ld r14,64(r4)
+err2; ld r15,72(r4)
+err2; ld r16,80(r4)
+err2; ld r17,88(r4)
+err2; ld r18,96(r4)
+err2; ld r19,104(r4)
+err2; ld r20,112(r4)
+err2; ld r21,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r7,16(r3)
+err2; std r8,24(r3)
+err2; std r9,32(r3)
+err2; std r10,40(r3)
+err2; std r11,48(r3)
+err2; std r12,56(r3)
+err2; std r14,64(r3)
+err2; std r15,72(r3)
+err2; std r16,80(r3)
+err2; std r17,88(r3)
+err2; std r18,96(r3)
+err2; std r19,104(r3)
+err2; std r20,112(r3)
+err2; std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r7,16(r4)
+err1; ld r8,24(r4)
+err1; ld r9,32(r4)
+err1; ld r10,40(r4)
+err1; ld r11,48(r4)
+err1; ld r12,56(r4)
+ addi r4,r4,64
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r7,16(r3)
+err1; std r8,24(r3)
+err1; std r9,32(r3)
+err1; std r10,40(r3)
+err1; std r11,48(r3)
+err1; std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r7,16(r4)
+err1; ld r8,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r7,16(r3)
+err1; std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
+.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl CFUNC(enter_vmx_usercopy)
+ cmpwi cr1,r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
+ mtlr r0
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+ /* setup read stream 0 */
+ dcbt 0,r6,0b01000 /* addr from */
+ dcbt 0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst 0,r9,0b01000 /* addr to */
+ dcbtst 0,r10,0b01010 /* length and depth to */
+ eieio
+ dcbt 0,r8,0b01010 /* all streams GO */
+
+ beq cr1,.Lunwind_stack_nonvmx_copy
+
+ /*
+ * If source and destination are not relatively aligned we use a
+ * slower permute loop.
+ */
+ xor r6,r4,r3
+ rldicl. r6,r6,0,(64-4)
+ bne .Lvmx_unaligned_copy
+
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+err3; ld r0,0(r4)
+ addi r4,r4,8
+err3; std r0,0(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ bf cr7*4+3,5f
+err3; lvx v1,0,r4
+ addi r4,r4,16
+err3; stvx v1,0,r3
+ addi r3,r3,16
+
+5: bf cr7*4+2,6f
+err3; lvx v1,0,r4
+err3; lvx v0,r4,r9
+ addi r4,r4,32
+err3; stvx v1,0,r3
+err3; stvx v0,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+err3; lvx v3,0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
+ addi r4,r4,64
+err3; stvx v3,0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+err4; lvx v7,0,r4
+err4; lvx v6,r4,r9
+err4; lvx v5,r4,r10
+err4; lvx v4,r4,r11
+err4; lvx v3,r4,r12
+err4; lvx v2,r4,r14
+err4; lvx v1,r4,r15
+err4; lvx v0,r4,r16
+ addi r4,r4,128
+err4; stvx v7,0,r3
+err4; stvx v6,r3,r9
+err4; stvx v5,r3,r10
+err4; stvx v4,r3,r11
+err4; stvx v3,r3,r12
+err4; stvx v2,r3,r14
+err4; stvx v1,r3,r15
+err4; stvx v0,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+err3; lvx v3,0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
+ addi r4,r4,64
+err3; stvx v3,0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+err3; lvx v1,0,r4
+err3; lvx v0,r4,r9
+ addi r4,r4,32
+err3; stvx v1,0,r3
+err3; stvx v0,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+err3; lvx v1,0,r4
+ addi r4,r4,16
+err3; stvx v1,0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err3; ld r0,0(r4)
+ addi r4,r4,8
+err3; std r0,0(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err3; lbz r0,0(r4)
+err3; stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
+
+.Lvmx_unaligned_copy:
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err3; lwz r7,4(r4)
+ addi r4,r4,8
+err3; stw r0,0(r3)
+err3; stw r7,4(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ LVS(v16,0,r4) /* Setup permute control vector */
+err3; lvx v0,0,r4
+ addi r4,r4,16
+
+ bf cr7*4+3,5f
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ addi r4,r4,16
+err3; stvx v8,0,r3
+ addi r3,r3,16
+ vor v0,v1,v1
+
+5: bf cr7*4+2,6f
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
+ addi r4,r4,32
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+err3; lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
+ addi r4,r4,64
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+err4; lvx v7,0,r4
+ VPERM(v8,v0,v7,v16)
+err4; lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+err4; lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+err4; lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+err4; lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+err4; lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+err4; lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+err4; lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
+ addi r4,r4,128
+err4; stvx v8,0,r3
+err4; stvx v9,r3,r9
+err4; stvx v10,r3,r10
+err4; stvx v11,r3,r11
+err4; stvx v12,r3,r12
+err4; stvx v13,r3,r14
+err4; stvx v14,r3,r15
+err4; stvx v15,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+err3; lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
+ addi r4,r4,64
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
+ addi r4,r4,32
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ addi r4,r4,16
+err3; stvx v8,0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ addi r4,r4,-16 /* Unwind the +16 load offset */
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err3; lwz r6,4(r4)
+ addi r4,r4,8
+err3; stw r0,0(r3)
+err3; stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+err3; lwz r0,0(r4)
+ addi r4,r4,4
+err3; stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+err3; lhz r0,0(r4)
+ addi r4,r4,2
+err3; sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+err3; lbz r0,0(r4)
+err3; stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
new file mode 100644
index 000000000..7e5e1c28e
--- /dev/null
+++ b/arch/powerpc/lib/crtsavres.S
@@ -0,0 +1,545 @@
+/*
+ * Special support for eabi and SVR4
+ *
+ * Copyright (C) 1995, 1996, 1998, 2000, 2001 Free Software Foundation, Inc.
+ * Copyright 2008 Freescale Semiconductor, Inc.
+ * Written By Michael Meissner
+ *
+ * Based on gcc/config/rs6000/crtsavres.asm from gcc
+ * 64 bit additions from reading the PPC elf64abi document.
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * In addition to the permissions in the GNU General Public License, the
+ * Free Software Foundation gives you unlimited permission to link the
+ * compiled version of this file with other programs, and to distribute
+ * those programs without any restriction coming from the use of this
+ * file. (The General Public License restrictions do apply in other
+ * respects; for example, they cover modification of the file, and
+ * distribution when not linked into another program.)
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING. If not, write to
+ * the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * As a special exception, if you link this library with files
+ * compiled with GCC to produce an executable, this does not cause
+ * the resulting executable to be covered by the GNU General Public License.
+ * This exception does not however invalidate any other reasons why
+ * the executable file might be covered by the GNU General Public License.
+ */
+
+#include <asm/ppc_asm.h>
+
+ .file "crtsavres.S"
+
+#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
+
+ .section ".text"
+
+#ifndef CONFIG_PPC64
+
+/* Routines for saving integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer save area. */
+
+_GLOBAL(_savegpr_14)
+_GLOBAL(_save32gpr_14)
+ stw 14,-72(11) /* save gp registers */
+_GLOBAL(_savegpr_15)
+_GLOBAL(_save32gpr_15)
+ stw 15,-68(11)
+_GLOBAL(_savegpr_16)
+_GLOBAL(_save32gpr_16)
+ stw 16,-64(11)
+_GLOBAL(_savegpr_17)
+_GLOBAL(_save32gpr_17)
+ stw 17,-60(11)
+_GLOBAL(_savegpr_18)
+_GLOBAL(_save32gpr_18)
+ stw 18,-56(11)
+_GLOBAL(_savegpr_19)
+_GLOBAL(_save32gpr_19)
+ stw 19,-52(11)
+_GLOBAL(_savegpr_20)
+_GLOBAL(_save32gpr_20)
+ stw 20,-48(11)
+_GLOBAL(_savegpr_21)
+_GLOBAL(_save32gpr_21)
+ stw 21,-44(11)
+_GLOBAL(_savegpr_22)
+_GLOBAL(_save32gpr_22)
+ stw 22,-40(11)
+_GLOBAL(_savegpr_23)
+_GLOBAL(_save32gpr_23)
+ stw 23,-36(11)
+_GLOBAL(_savegpr_24)
+_GLOBAL(_save32gpr_24)
+ stw 24,-32(11)
+_GLOBAL(_savegpr_25)
+_GLOBAL(_save32gpr_25)
+ stw 25,-28(11)
+_GLOBAL(_savegpr_26)
+_GLOBAL(_save32gpr_26)
+ stw 26,-24(11)
+_GLOBAL(_savegpr_27)
+_GLOBAL(_save32gpr_27)
+ stw 27,-20(11)
+_GLOBAL(_savegpr_28)
+_GLOBAL(_save32gpr_28)
+ stw 28,-16(11)
+_GLOBAL(_savegpr_29)
+_GLOBAL(_save32gpr_29)
+ stw 29,-12(11)
+_GLOBAL(_savegpr_30)
+_GLOBAL(_save32gpr_30)
+ stw 30,-8(11)
+_GLOBAL(_savegpr_31)
+_GLOBAL(_save32gpr_31)
+ stw 31,-4(11)
+ blr
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+
+_GLOBAL(_restgpr_14)
+_GLOBAL(_rest32gpr_14)
+ lwz 14,-72(11) /* restore gp registers */
+_GLOBAL(_restgpr_15)
+_GLOBAL(_rest32gpr_15)
+ lwz 15,-68(11)
+_GLOBAL(_restgpr_16)
+_GLOBAL(_rest32gpr_16)
+ lwz 16,-64(11)
+_GLOBAL(_restgpr_17)
+_GLOBAL(_rest32gpr_17)
+ lwz 17,-60(11)
+_GLOBAL(_restgpr_18)
+_GLOBAL(_rest32gpr_18)
+ lwz 18,-56(11)
+_GLOBAL(_restgpr_19)
+_GLOBAL(_rest32gpr_19)
+ lwz 19,-52(11)
+_GLOBAL(_restgpr_20)
+_GLOBAL(_rest32gpr_20)
+ lwz 20,-48(11)
+_GLOBAL(_restgpr_21)
+_GLOBAL(_rest32gpr_21)
+ lwz 21,-44(11)
+_GLOBAL(_restgpr_22)
+_GLOBAL(_rest32gpr_22)
+ lwz 22,-40(11)
+_GLOBAL(_restgpr_23)
+_GLOBAL(_rest32gpr_23)
+ lwz 23,-36(11)
+_GLOBAL(_restgpr_24)
+_GLOBAL(_rest32gpr_24)
+ lwz 24,-32(11)
+_GLOBAL(_restgpr_25)
+_GLOBAL(_rest32gpr_25)
+ lwz 25,-28(11)
+_GLOBAL(_restgpr_26)
+_GLOBAL(_rest32gpr_26)
+ lwz 26,-24(11)
+_GLOBAL(_restgpr_27)
+_GLOBAL(_rest32gpr_27)
+ lwz 27,-20(11)
+_GLOBAL(_restgpr_28)
+_GLOBAL(_rest32gpr_28)
+ lwz 28,-16(11)
+_GLOBAL(_restgpr_29)
+_GLOBAL(_rest32gpr_29)
+ lwz 29,-12(11)
+_GLOBAL(_restgpr_30)
+_GLOBAL(_rest32gpr_30)
+ lwz 30,-8(11)
+_GLOBAL(_restgpr_31)
+_GLOBAL(_rest32gpr_31)
+ lwz 31,-4(11)
+ blr
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+
+_GLOBAL(_restgpr_14_x)
+_GLOBAL(_rest32gpr_14_x)
+ lwz 14,-72(11) /* restore gp registers */
+_GLOBAL(_restgpr_15_x)
+_GLOBAL(_rest32gpr_15_x)
+ lwz 15,-68(11)
+_GLOBAL(_restgpr_16_x)
+_GLOBAL(_rest32gpr_16_x)
+ lwz 16,-64(11)
+_GLOBAL(_restgpr_17_x)
+_GLOBAL(_rest32gpr_17_x)
+ lwz 17,-60(11)
+_GLOBAL(_restgpr_18_x)
+_GLOBAL(_rest32gpr_18_x)
+ lwz 18,-56(11)
+_GLOBAL(_restgpr_19_x)
+_GLOBAL(_rest32gpr_19_x)
+ lwz 19,-52(11)
+_GLOBAL(_restgpr_20_x)
+_GLOBAL(_rest32gpr_20_x)
+ lwz 20,-48(11)
+_GLOBAL(_restgpr_21_x)
+_GLOBAL(_rest32gpr_21_x)
+ lwz 21,-44(11)
+_GLOBAL(_restgpr_22_x)
+_GLOBAL(_rest32gpr_22_x)
+ lwz 22,-40(11)
+_GLOBAL(_restgpr_23_x)
+_GLOBAL(_rest32gpr_23_x)
+ lwz 23,-36(11)
+_GLOBAL(_restgpr_24_x)
+_GLOBAL(_rest32gpr_24_x)
+ lwz 24,-32(11)
+_GLOBAL(_restgpr_25_x)
+_GLOBAL(_rest32gpr_25_x)
+ lwz 25,-28(11)
+_GLOBAL(_restgpr_26_x)
+_GLOBAL(_rest32gpr_26_x)
+ lwz 26,-24(11)
+_GLOBAL(_restgpr_27_x)
+_GLOBAL(_rest32gpr_27_x)
+ lwz 27,-20(11)
+_GLOBAL(_restgpr_28_x)
+_GLOBAL(_rest32gpr_28_x)
+ lwz 28,-16(11)
+_GLOBAL(_restgpr_29_x)
+_GLOBAL(_rest32gpr_29_x)
+ lwz 29,-12(11)
+_GLOBAL(_restgpr_30_x)
+_GLOBAL(_rest32gpr_30_x)
+ lwz 30,-8(11)
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+ lwz 0,4(11)
+ lwz 31,-4(11)
+ mtlr 0
+ mr 1,11
+ blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+_GLOBAL(_savevr_20)
+ li r11,-192
+ stvx v20,r11,r0
+_GLOBAL(_savevr_21)
+ li r11,-176
+ stvx v21,r11,r0
+_GLOBAL(_savevr_22)
+ li r11,-160
+ stvx v22,r11,r0
+_GLOBAL(_savevr_23)
+ li r11,-144
+ stvx v23,r11,r0
+_GLOBAL(_savevr_24)
+ li r11,-128
+ stvx v24,r11,r0
+_GLOBAL(_savevr_25)
+ li r11,-112
+ stvx v25,r11,r0
+_GLOBAL(_savevr_26)
+ li r11,-96
+ stvx v26,r11,r0
+_GLOBAL(_savevr_27)
+ li r11,-80
+ stvx v27,r11,r0
+_GLOBAL(_savevr_28)
+ li r11,-64
+ stvx v28,r11,r0
+_GLOBAL(_savevr_29)
+ li r11,-48
+ stvx v29,r11,r0
+_GLOBAL(_savevr_30)
+ li r11,-32
+ stvx v30,r11,r0
+_GLOBAL(_savevr_31)
+ li r11,-16
+ stvx v31,r11,r0
+ blr
+
+_GLOBAL(_restvr_20)
+ li r11,-192
+ lvx v20,r11,r0
+_GLOBAL(_restvr_21)
+ li r11,-176
+ lvx v21,r11,r0
+_GLOBAL(_restvr_22)
+ li r11,-160
+ lvx v22,r11,r0
+_GLOBAL(_restvr_23)
+ li r11,-144
+ lvx v23,r11,r0
+_GLOBAL(_restvr_24)
+ li r11,-128
+ lvx v24,r11,r0
+_GLOBAL(_restvr_25)
+ li r11,-112
+ lvx v25,r11,r0
+_GLOBAL(_restvr_26)
+ li r11,-96
+ lvx v26,r11,r0
+_GLOBAL(_restvr_27)
+ li r11,-80
+ lvx v27,r11,r0
+_GLOBAL(_restvr_28)
+ li r11,-64
+ lvx v28,r11,r0
+_GLOBAL(_restvr_29)
+ li r11,-48
+ lvx v29,r11,r0
+_GLOBAL(_restvr_30)
+ li r11,-32
+ lvx v30,r11,r0
+_GLOBAL(_restvr_31)
+ li r11,-16
+ lvx v31,r11,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#else /* CONFIG_PPC64 */
+
+.globl _savegpr0_14
+_savegpr0_14:
+ std r14,-144(r1)
+.globl _savegpr0_15
+_savegpr0_15:
+ std r15,-136(r1)
+.globl _savegpr0_16
+_savegpr0_16:
+ std r16,-128(r1)
+.globl _savegpr0_17
+_savegpr0_17:
+ std r17,-120(r1)
+.globl _savegpr0_18
+_savegpr0_18:
+ std r18,-112(r1)
+.globl _savegpr0_19
+_savegpr0_19:
+ std r19,-104(r1)
+.globl _savegpr0_20
+_savegpr0_20:
+ std r20,-96(r1)
+.globl _savegpr0_21
+_savegpr0_21:
+ std r21,-88(r1)
+.globl _savegpr0_22
+_savegpr0_22:
+ std r22,-80(r1)
+.globl _savegpr0_23
+_savegpr0_23:
+ std r23,-72(r1)
+.globl _savegpr0_24
+_savegpr0_24:
+ std r24,-64(r1)
+.globl _savegpr0_25
+_savegpr0_25:
+ std r25,-56(r1)
+.globl _savegpr0_26
+_savegpr0_26:
+ std r26,-48(r1)
+.globl _savegpr0_27
+_savegpr0_27:
+ std r27,-40(r1)
+.globl _savegpr0_28
+_savegpr0_28:
+ std r28,-32(r1)
+.globl _savegpr0_29
+_savegpr0_29:
+ std r29,-24(r1)
+.globl _savegpr0_30
+_savegpr0_30:
+ std r30,-16(r1)
+.globl _savegpr0_31
+_savegpr0_31:
+ std r31,-8(r1)
+ std r0,16(r1)
+ blr
+
+.globl _restgpr0_14
+_restgpr0_14:
+ ld r14,-144(r1)
+.globl _restgpr0_15
+_restgpr0_15:
+ ld r15,-136(r1)
+.globl _restgpr0_16
+_restgpr0_16:
+ ld r16,-128(r1)
+.globl _restgpr0_17
+_restgpr0_17:
+ ld r17,-120(r1)
+.globl _restgpr0_18
+_restgpr0_18:
+ ld r18,-112(r1)
+.globl _restgpr0_19
+_restgpr0_19:
+ ld r19,-104(r1)
+.globl _restgpr0_20
+_restgpr0_20:
+ ld r20,-96(r1)
+.globl _restgpr0_21
+_restgpr0_21:
+ ld r21,-88(r1)
+.globl _restgpr0_22
+_restgpr0_22:
+ ld r22,-80(r1)
+.globl _restgpr0_23
+_restgpr0_23:
+ ld r23,-72(r1)
+.globl _restgpr0_24
+_restgpr0_24:
+ ld r24,-64(r1)
+.globl _restgpr0_25
+_restgpr0_25:
+ ld r25,-56(r1)
+.globl _restgpr0_26
+_restgpr0_26:
+ ld r26,-48(r1)
+.globl _restgpr0_27
+_restgpr0_27:
+ ld r27,-40(r1)
+.globl _restgpr0_28
+_restgpr0_28:
+ ld r28,-32(r1)
+.globl _restgpr0_29
+_restgpr0_29:
+ ld r0,16(r1)
+ ld r29,-24(r1)
+ mtlr r0
+ ld r30,-16(r1)
+ ld r31,-8(r1)
+ blr
+
+.globl _restgpr0_30
+_restgpr0_30:
+ ld r30,-16(r1)
+.globl _restgpr0_31
+_restgpr0_31:
+ ld r0,16(r1)
+ ld r31,-8(r1)
+ mtlr r0
+ blr
+
+#ifdef CONFIG_ALTIVEC
+/* Called with r0 pointing just beyond the end of the vector save area. */
+
+.globl _savevr_20
+_savevr_20:
+ li r12,-192
+ stvx v20,r12,r0
+.globl _savevr_21
+_savevr_21:
+ li r12,-176
+ stvx v21,r12,r0
+.globl _savevr_22
+_savevr_22:
+ li r12,-160
+ stvx v22,r12,r0
+.globl _savevr_23
+_savevr_23:
+ li r12,-144
+ stvx v23,r12,r0
+.globl _savevr_24
+_savevr_24:
+ li r12,-128
+ stvx v24,r12,r0
+.globl _savevr_25
+_savevr_25:
+ li r12,-112
+ stvx v25,r12,r0
+.globl _savevr_26
+_savevr_26:
+ li r12,-96
+ stvx v26,r12,r0
+.globl _savevr_27
+_savevr_27:
+ li r12,-80
+ stvx v27,r12,r0
+.globl _savevr_28
+_savevr_28:
+ li r12,-64
+ stvx v28,r12,r0
+.globl _savevr_29
+_savevr_29:
+ li r12,-48
+ stvx v29,r12,r0
+.globl _savevr_30
+_savevr_30:
+ li r12,-32
+ stvx v30,r12,r0
+.globl _savevr_31
+_savevr_31:
+ li r12,-16
+ stvx v31,r12,r0
+ blr
+
+.globl _restvr_20
+_restvr_20:
+ li r12,-192
+ lvx v20,r12,r0
+.globl _restvr_21
+_restvr_21:
+ li r12,-176
+ lvx v21,r12,r0
+.globl _restvr_22
+_restvr_22:
+ li r12,-160
+ lvx v22,r12,r0
+.globl _restvr_23
+_restvr_23:
+ li r12,-144
+ lvx v23,r12,r0
+.globl _restvr_24
+_restvr_24:
+ li r12,-128
+ lvx v24,r12,r0
+.globl _restvr_25
+_restvr_25:
+ li r12,-112
+ lvx v25,r12,r0
+.globl _restvr_26
+_restvr_26:
+ li r12,-96
+ lvx v26,r12,r0
+.globl _restvr_27
+_restvr_27:
+ li r12,-80
+ lvx v27,r12,r0
+.globl _restvr_28
+_restvr_28:
+ li r12,-64
+ lvx v28,r12,r0
+.globl _restvr_29
+_restvr_29:
+ li r12,-48
+ lvx v29,r12,r0
+.globl _restvr_30
+_restvr_30:
+ li r12,-32
+ lvx v30,r12,r0
+.globl _restvr_31
+_restvr_31:
+ li r12,-16
+ lvx v31,r12,r0
+ blr
+
+#endif /* CONFIG_ALTIVEC */
+
+#endif /* CONFIG_PPC64 */
+
+#endif
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
new file mode 100644
index 000000000..3d5426e7d
--- /dev/null
+++ b/arch/powerpc/lib/div64.S
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Divide a 64-bit unsigned number by a 32-bit unsigned number.
+ * This routine assumes that the top 32 bits of the dividend are
+ * non-zero to start with.
+ * On entry, r3 points to the dividend, which get overwritten with
+ * the 64-bit quotient, and r4 contains the divisor.
+ * On exit, r3 contains the remainder.
+ *
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <asm/ppc_asm.h>
+#include <asm/processor.h>
+
+_GLOBAL(__div64_32)
+ lwz r5,0(r3) # get the dividend into r5/r6
+ lwz r6,4(r3)
+ cmplw r5,r4
+ li r7,0
+ li r8,0
+ blt 1f
+ divwu r7,r5,r4 # if dividend.hi >= divisor,
+ mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor
+ subf. r5,r0,r5 # dividend.hi %= divisor
+ beq 3f
+1: mr r11,r5 # here dividend.hi != 0
+ andis. r0,r5,0xc000
+ bne 2f
+ cntlzw r0,r5 # we are shifting the dividend right
+ li r10,-1 # to make it < 2^32, and shifting
+ srw r10,r10,r0 # the divisor right the same amount,
+ addc r9,r4,r10 # rounding up (so the estimate cannot
+ andc r11,r6,r10 # ever be too large, only too small)
+ andc r9,r9,r10
+ addze r9,r9
+ or r11,r5,r11
+ rotlw r9,r9,r0
+ rotlw r11,r11,r0
+ divwu r11,r11,r9 # then we divide the shifted quantities
+2: mullw r10,r11,r4 # to get an estimate of the quotient,
+ mulhwu r9,r11,r4 # multiply the estimate by the divisor,
+ subfc r6,r10,r6 # take the product from the divisor,
+ add r8,r8,r11 # and add the estimate to the accumulated
+ subfe. r5,r9,r5 # quotient
+ bne 1b
+3: cmplw r6,r4
+ blt 4f
+ divwu r0,r6,r4 # perform the remaining 32-bit division
+ mullw r10,r0,r4 # and get the remainder
+ add r8,r8,r0
+ subf r6,r10,r6
+4: stw r7,0(r3) # return the quotient in *r3
+ stw r8,4(r3)
+ mr r3,r6 # return the remainder in r3
+ blr
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
new file mode 100644
index 000000000..e834079d2
--- /dev/null
+++ b/arch/powerpc/lib/error-inject.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ /*
+ * Emulate 'blr'. 'regs' represents the state on entry of a predefined
+ * function in the kernel/module, captured on a kprobe. We don't need
+ * to worry about 32-bit userspace on a 64-bit kernel.
+ */
+ regs_set_return_ip(regs, regs->link);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
new file mode 100644
index 000000000..480172fbd
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -0,0 +1,862 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <asm/feature-fixups.h>
+#include <asm/ppc_asm.h>
+#include <asm/synch.h>
+#include <asm/asm-compat.h>
+#include <asm/ppc-opcode.h>
+
+ .text
+
+#define globl(x) \
+ .globl x; \
+x:
+
+globl(ftr_fixup_test1)
+ or 1,1,1
+ or 2,2,2 /* fixup will nop out this instruction */
+ or 3,3,3
+
+globl(end_ftr_fixup_test1)
+
+globl(ftr_fixup_test1_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test1_expected)
+ or 1,1,1
+ nop
+ or 3,3,3
+
+globl(ftr_fixup_test2)
+ or 1,1,1
+ or 2,2,2 /* fixup will replace this with ftr_fixup_test2_alt */
+ or 3,3,3
+
+globl(end_ftr_fixup_test2)
+
+globl(ftr_fixup_test2_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test2_alt)
+ or 31,31,31
+
+globl(ftr_fixup_test2_expected)
+ or 1,1,1
+ or 31,31,31
+ or 3,3,3
+
+globl(ftr_fixup_test3)
+ or 1,1,1
+ or 2,2,2 /* fixup will fail to replace this */
+ or 3,3,3
+
+globl(end_ftr_fixup_test3)
+
+globl(ftr_fixup_test3_orig)
+ or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test3_alt)
+ or 31,31,31
+ or 31,31,31
+
+globl(ftr_fixup_test4)
+ or 1,1,1
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 3,3,3
+
+globl(end_ftr_fixup_test4)
+
+globl(ftr_fixup_test4_expected)
+ or 1,1,1
+ or 31,31,31
+ or 31,31,31
+ nop
+ nop
+ or 3,3,3
+
+globl(ftr_fixup_test4_orig)
+ or 1,1,1
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test4_alt)
+ or 31,31,31
+ or 31,31,31
+
+
+globl(ftr_fixup_test5)
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+1: bdnz 3b
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+
+globl(end_ftr_fixup_test5)
+
+globl(ftr_fixup_test5_expected)
+ or 1,1,1
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+1: bdnz 3b
+ or 1,1,1
+
+globl(ftr_fixup_test6)
+1: or 1,1,1
+BEGIN_FTR_SECTION
+ or 5,5,5
+2: PPC_LCMPI r3,0
+ beq 4f
+ blt 2b
+ b 1b
+ b 4f
+FTR_SECTION_ELSE
+2: or 2,2,2
+ PPC_LCMPI r3,1
+ beq 3f
+ blt 2b
+ b 3f
+ b 1b
+ALT_FTR_SECTION_END(0, 1)
+3: or 1,1,1
+ or 2,2,2
+4: or 3,3,3
+
+globl(end_ftr_fixup_test6)
+
+globl(ftr_fixup_test6_expected)
+1: or 1,1,1
+2: or 2,2,2
+ PPC_LCMPI r3,1
+ beq 3f
+ blt 2b
+ b 3f
+ b 1b
+3: or 1,1,1
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_test7)
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1:
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+ or 1,1,1
+
+globl(end_ftr_fixup_test7)
+ nop
+
+globl(ftr_fixup_test7_expected)
+ or 1,1,1
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1: or 1,1,1
+
+#if 0
+/* Test that if we have a larger else case the assembler spots it and
+ * reports an error. #if 0'ed so as not to break the build normally.
+ */
+ftr_fixup_test_too_big:
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+ or 3,3,3
+ or 3,3,3
+ or 3,3,3
+ or 3,3,3
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+#endif
+
+#define MAKE_MACRO_TEST(TYPE) \
+globl(ftr_fixup_test_ ##TYPE##_macros) \
+ or 1,1,1; \
+ /* Basic test, this section should all be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic test, this section should NOT be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, inner section should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, whole section should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 0, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, none should be nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(80) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 0, 80) \
+ or 2,2,2; \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, default case should be taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, else case should be taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+ or 31,31,31; \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt with smaller else case, should be padded with nops */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in default case */ \
+ /* Default case should be taken, with nop'ed inner section */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 2,2,2; \
+ or 2,2,2; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, else taken & nop */ \
+BEGIN_##TYPE##_SECTION \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+##TYPE##_SECTION_ELSE \
+ or 5,5,5; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 3,3,3; \
+END_##TYPE##_SECTION_NESTED(0, 1, 95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, all nop'ed */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+END_##TYPE##_SECTION(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 0) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner default taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner else taken */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+BEGIN_##TYPE##_SECTION_NESTED(95) \
+ or 1,1,1; \
+##TYPE##_SECTION_ELSE_NESTED(95) \
+ or 5,5,5; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+ or 31,31,31; \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
+ or 31,31,31; \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else can have large else case */ \
+BEGIN_##TYPE##_SECTION \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+##TYPE##_SECTION_ELSE \
+BEGIN_##TYPE##_SECTION_NESTED(94) \
+ or 5,5,5; \
+ or 5,5,5; \
+ or 5,5,5; \
+ or 5,5,5; \
+##TYPE##_SECTION_ELSE_NESTED(94) \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) \
+ALT_##TYPE##_SECTION_END(0, 1) \
+ or 1,1,1; \
+ or 1,1,1;
+
+#define MAKE_MACRO_TEST_EXPECTED(TYPE) \
+globl(ftr_fixup_test_ ##TYPE##_macros_expected) \
+ or 1,1,1; \
+ /* Basic test, this section should all be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic test, this section should NOT be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, inner section should be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 80) */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, whole section should be nop'ed */ \
+ /* NB. inner section is not nop'ed, but then entire outer is */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+ nop; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nesting test, none should be nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(80) */ \
+ or 3,3,3; \
+ or 3,3,3; \
+/* END_##TYPE##_SECTION_NESTED(0, 0, 80) */ \
+ or 2,2,2; \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, default case should be taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Basic alt section test, else case should be taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+ or 31,31,31; \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt with smaller else case, should be padded with nops */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+ nop; \
+ nop; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in default case */ \
+ /* Default case should be taken, with nop'ed inner section */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 3,3,3; \
+ or 3,3,3; \
+ or 3,3,3; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 5,5,5; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 3,3,3; */ \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Alt section with nested section in else, else taken & nop */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+ /* or 3,3,3; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 5,5,5; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+/* END_##TYPE##_SECTION_NESTED(0, 1, 95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ or 1,1,1; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ or 2,2,2; \
+/* END_##TYPE##_SECTION(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Feature section with nested alt section, all nop'ed */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ nop; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ nop; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ nop; \
+/* END_##TYPE##_SECTION(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ or 1,1,1; \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 95) */ \
+ or 2,2,2; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 31,31,31; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ /* or 31,31,31; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, default with inner else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ or 2,2,2; \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ or 5,5,5; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ or 2,2,2; \
+/* ##TYPE##_SECTION_ELSE */ \
+ /* or 31,31,31; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ /* or 31,31,31; */ \
+/* ALT_##TYPE##_SECTION_END(0, 0) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner default taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ or 5,5,5; \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ /* or 1,1,1; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 0, 94) */ \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else with inner else taken */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(95) */ \
+ /* or 1,1,1; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(95) */ \
+ /* or 5,5,5; */ \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 95) */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+ or 31,31,31; \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ or 1,1,1; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
+ or 31,31,31; \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ /* Nested alt sections, else can have large else case */ \
+/* BEGIN_##TYPE##_SECTION */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+ /* or 2,2,2; */ \
+/* ##TYPE##_SECTION_ELSE */ \
+/* BEGIN_##TYPE##_SECTION_NESTED(94) */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+ /* or 5,5,5; */ \
+/* ##TYPE##_SECTION_ELSE_NESTED(94) */ \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+ or 1,1,1; \
+/* ALT_##TYPE##_SECTION_END_NESTED(0, 1, 94) */ \
+/* ALT_##TYPE##_SECTION_END(0, 1) */ \
+ or 1,1,1; \
+ or 1,1,1;
+
+MAKE_MACRO_TEST(FTR);
+MAKE_MACRO_TEST_EXPECTED(FTR);
+
+#ifdef CONFIG_PPC64
+MAKE_MACRO_TEST(FW_FTR);
+MAKE_MACRO_TEST_EXPECTED(FW_FTR);
+#endif
+
+globl(lwsync_fixup_test)
+1: or 1,1,1
+ LWSYNC
+globl(end_lwsync_fixup_test)
+
+globl(lwsync_fixup_test_expected_LWSYNC)
+1: or 1,1,1
+ lwsync
+
+globl(lwsync_fixup_test_expected_SYNC)
+1: or 1,1,1
+ sync
+
+globl(ftr_fixup_prefix1)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix1)
+
+globl(ftr_fixup_prefix1_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix1_expected)
+ or 1,1,1
+ nop
+ nop
+ or 2,2,2
+
+globl(ftr_fixup_prefix2)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix2)
+
+globl(ftr_fixup_prefix2_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix2_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+
+globl(ftr_fixup_prefix2_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ or 2,2,2
+
+globl(ftr_fixup_prefix3)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+globl(end_ftr_fixup_prefix3)
+
+globl(ftr_fixup_prefix3_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_prefix3_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+
+globl(ftr_fixup_prefix3_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+ or 3,3,3
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
new file mode 100644
index 000000000..4f82581ca
--- /dev/null
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ *
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
+#include <asm/cputable.h>
+#include <asm/code-patching.h>
+#include <asm/interrupt.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/security_features.h>
+#include <asm/firmware.h>
+#include <asm/inst.h>
+
+struct fixup_entry {
+ unsigned long mask;
+ unsigned long value;
+ long start_off;
+ long end_off;
+ long alt_start_off;
+ long alt_end_off;
+};
+
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
+{
+ /*
+ * We store the offset to the code as a negative offset from
+ * the start of the alt_entry, to support the VDSO. This
+ * routine converts that back into an actual address.
+ */
+ return (u32 *)((unsigned long)fcur + offset);
+}
+
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
+{
+ int err;
+ ppc_inst_t instr;
+
+ instr = ppc_inst_read(src);
+
+ if (instr_is_relative_branch(ppc_inst_read(src))) {
+ u32 *target = (u32 *)branch_target(src);
+
+ /* Branch within the section doesn't need translating */
+ if (target < alt_start || target > alt_end) {
+ err = translate_branch(&instr, dest, src);
+ if (err)
+ return 1;
+ }
+ }
+
+ raw_patch_instruction(dest, instr);
+
+ return 0;
+}
+
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+ struct fixup_entry *fcur)
+{
+ u32 *start, *end, *alt_start, *alt_end, *src, *dest;
+
+ start = calc_addr(fcur, fcur->start_off);
+ end = calc_addr(fcur, fcur->end_off);
+ alt_start = calc_addr(fcur, fcur->alt_start_off);
+ alt_end = calc_addr(fcur, fcur->alt_end_off);
+
+ if ((alt_end - alt_start) > (end - start))
+ return 1;
+
+ if ((value & fcur->mask & mask) == (fcur->value & mask))
+ return 0;
+
+ src = alt_start;
+ dest = start;
+
+ for (; src < alt_end; src = ppc_inst_next(src, src),
+ dest = ppc_inst_next(dest, dest)) {
+ if (patch_alt_instruction(src, dest, alt_start, alt_end))
+ return 1;
+ }
+
+ for (; dest < end; dest++)
+ raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
+
+ return 0;
+}
+
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+ void *fixup_start, void *fixup_end)
+{
+ struct fixup_entry *fcur, *fend;
+
+ fcur = fixup_start;
+ fend = fixup_end;
+
+ for (; fcur < fend; fcur++) {
+ if (patch_feature_section_mask(value, mask, fcur)) {
+ WARN_ON(1);
+ printk("Unable to patch feature section at %p - %p" \
+ " with %p - %p\n",
+ calc_addr(fcur, fcur->start_off),
+ calc_addr(fcur, fcur->end_off),
+ calc_addr(fcur, fcur->alt_start_off),
+ calc_addr(fcur, fcur->alt_end_off));
+ }
+ }
+}
+
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+ return system_state < SYSTEM_FREEING_INITMEM ||
+ !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ int j;
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ for (j = 0; j < num; j++)
+ patch_instruction(dest + j, ppc_inst(instrs[j]));
+ }
+ return i;
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+ bool do_fallback, void *fallback)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (do_fallback) {
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ }
+ }
+ return i;
+}
+
+static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_entry_barrier_fixup);
+ end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK) {
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+ } else if (types & STF_BARRIER_SYNC_ORI) {
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ }
+
+ i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+ &stf_barrier_fallback);
+
+ printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[6];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_exit_barrier_fixup);
+ end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_NOP();
+ instrs[4] = PPC_RAW_NOP();
+ instrs[5] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
+ } else {
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
+ }
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+ else
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+ }
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+static DEFINE_MUTEX(exit_flush_lock);
+
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are
+ * executing them. So call __do_stf_barrier_fixups() on one CPU while
+ * all other CPUs spin in the stop machine core with interrupts hard
+ * disabled.
+ *
+ * The branch to mark interrupt exits non-reentrant is enabled first,
+ * then stop_machine runs which will ensure all CPUs are out of the
+ * low level interrupt exit code before patching. After the patching,
+ * if allowed, then flip the branch to allow fast exits.
+ */
+
+ // Prevent static key update races with do_rfi_flush_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+ if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+ stf_exit_reentrant = false;
+ else
+ stf_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
+}
+
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[4];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___uaccess_flush_fixup);
+ end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_BLR();
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[3] = PPC_RAW_NOP();
+ /* fallthrough to fallback flush */
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+}
+
+static int __do_entry_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ /*
+ * If we're patching in or out the fallback flush we need to be careful about the
+ * order in which we patch instructions. That's because it's possible we could
+ * take a page fault after patching one instruction, so the sequence of
+ * instructions must be safe even in a half patched state.
+ *
+ * To make that work, when patching in the fallback flush we patch in this order:
+ * - the mflr (dest)
+ * - the mtlr (dest + 2)
+ * - the branch (dest + 1)
+ *
+ * That ensures the sequence is safe to execute at any point. In contrast if we
+ * patch the mtlr last, it's possible we could return from the branch and not
+ * restore LR, leading to a crash later.
+ *
+ * When patching out the fallback flush (either with nops or another flush type),
+ * we patch in this order:
+ * - the branch (dest + 1)
+ * - the mtlr (dest + 2)
+ * - the mflr (dest)
+ *
+ * Note we are protected by stop_machine() from other CPUs executing the code in a
+ * semi-patched state.
+ */
+
+ start = PTRRELOC(&__start___entry_flush_fixup);
+ end = PTRRELOC(&__stop___entry_flush_fixup);
+ i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &entry_flush_fallback);
+
+ start = PTRRELOC(&__start___scv_entry_flush_fixup);
+ end = PTRRELOC(&__stop___scv_entry_flush_fixup);
+ i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &scv_entry_flush_fallback);
+
+ printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+
+ return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * The call to the fallback flush can not be safely patched in/out while
+ * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+ * CPU while all other CPUs spin in the stop machine core with interrupts
+ * hard disabled.
+ */
+ stop_machine(__do_entry_flush_fixups, &types, NULL);
+}
+
+static int __do_rfi_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup);
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = PPC_RAW_BRANCH(16);
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+
+ return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * stop_machine gets all CPUs out of the interrupt exit handler same
+ * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+ * without stop_machine, so this could be achieved with a broadcast
+ * IPI instead, but this matches the stf sequence.
+ */
+
+ // Prevent static key update races with do_stf_barrier_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+ if (types & L1D_FLUSH_FALLBACK)
+ rfi_exit_reentrant = false;
+ else
+ rfi_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
+}
+
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr;
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr = PPC_RAW_NOP();
+
+ if (enable) {
+ pr_info("barrier-nospec: using ORI speculation barrier\n");
+ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ }
+
+ i = do_patch_fixups(start, end, &instr, 1);
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void do_barrier_nospec_fixups(bool enable)
+{
+ void *start, *end;
+
+ start = PTRRELOC(&__start___barrier_nospec_fixup);
+ end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+ do_barrier_nospec_fixups_range(enable, start, end);
+}
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+#ifdef CONFIG_PPC_E500
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr[2];
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr[0] = PPC_RAW_NOP();
+ instr[1] = PPC_RAW_NOP();
+
+ if (enable) {
+ pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
+ instr[0] = PPC_RAW_ISYNC();
+ instr[1] = PPC_RAW_SYNC();
+ }
+
+ i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+static void __init patch_btb_flush_section(long *curr)
+{
+ unsigned int *start, *end;
+
+ start = (void *)curr + *curr;
+ end = (void *)curr + *(curr + 1);
+ for (; start < end; start++) {
+ pr_devel("patching dest %lx\n", (unsigned long)start);
+ patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
+ }
+}
+
+void __init do_btb_flush_fixups(void)
+{
+ long *start, *end;
+
+ start = PTRRELOC(&__start__btb_flush_fixup);
+ end = PTRRELOC(&__stop__btb_flush_fixup);
+
+ for (; start < end; start += 2)
+ patch_btb_flush_section(start);
+}
+#endif /* CONFIG_PPC_E500 */
+
+void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ long *start, *end;
+ u32 *dest;
+
+ if (!(value & CPU_FTR_LWSYNC))
+ return ;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ for (; start < end; start++) {
+ dest = (void *)start + *start;
+ raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC));
+ }
+}
+
+static void __init do_final_fixups(void)
+{
+#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
+ ppc_inst_t inst;
+ u32 *src, *dest, *end;
+
+ if (PHYSICAL_START == 0)
+ return;
+
+ src = (u32 *)(KERNELBASE + PHYSICAL_START);
+ dest = (u32 *)KERNELBASE;
+ end = (void *)src + (__end_interrupts - _stext);
+
+ while (src < end) {
+ inst = ppc_inst_read(src);
+ raw_patch_instruction(dest, inst);
+ src = ppc_inst_next(src, src);
+ dest = ppc_inst_next(dest, dest);
+ }
+#endif
+}
+
+static unsigned long __initdata saved_cpu_features;
+static unsigned int __initdata saved_mmu_features;
+#ifdef CONFIG_PPC64
+static unsigned long __initdata saved_firmware_features;
+#endif
+
+void __init apply_feature_fixups(void)
+{
+ struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
+
+ *PTRRELOC(&saved_cpu_features) = spec->cpu_features;
+ *PTRRELOC(&saved_mmu_features) = spec->mmu_features;
+
+ /*
+ * Apply the CPU-specific and firmware specific fixups to kernel text
+ * (nop out sections not relevant to this CPU or this firmware).
+ */
+ do_feature_fixups(spec->cpu_features,
+ PTRRELOC(&__start___ftr_fixup),
+ PTRRELOC(&__stop___ftr_fixup));
+
+ do_feature_fixups(spec->mmu_features,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+
+ do_lwsync_fixups(spec->cpu_features,
+ PTRRELOC(&__start___lwsync_fixup),
+ PTRRELOC(&__stop___lwsync_fixup));
+
+#ifdef CONFIG_PPC64
+ saved_firmware_features = powerpc_firmware_features;
+ do_feature_fixups(powerpc_firmware_features,
+ &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+#endif
+ do_final_fixups();
+}
+
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+ saved_mmu_features &= ~mask;
+ saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+ do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+ mmu_feature_keys_init();
+}
+
+void __init setup_feature_keys(void)
+{
+ /*
+ * Initialise jump label. This causes all the cpu/mmu_has_feature()
+ * checks to take on their correct polarity based on the current set of
+ * CPU/MMU features.
+ */
+ jump_label_init();
+ cpu_feature_keys_init();
+ mmu_feature_keys_init();
+}
+
+static int __init check_features(void)
+{
+ WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
+ "CPU features changed after feature patching!\n");
+ WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
+ "MMU features changed after feature patching!\n");
+#ifdef CONFIG_PPC64
+ WARN(saved_firmware_features != powerpc_firmware_features,
+ "Firmware features changed after feature patching!\n");
+#endif
+
+ return 0;
+}
+late_initcall(check_features);
+
+#ifdef CONFIG_FTR_FIXUP_SELFTEST
+
+#define check(x) \
+ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+ return patch_feature_section_mask(value, ~0, fcur);
+}
+
+/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
+static struct fixup_entry fixup;
+
+static long __init calc_offset(struct fixup_entry *entry, unsigned int *p)
+{
+ return (unsigned long)p - (unsigned long)entry;
+}
+
+static void __init test_basic_patching(void)
+{
+ extern unsigned int ftr_fixup_test1[];
+ extern unsigned int end_ftr_fixup_test1[];
+ extern unsigned int ftr_fixup_test1_orig[];
+ extern unsigned int ftr_fixup_test1_expected[];
+ int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
+ fixup.alt_start_off = fixup.alt_end_off = 0;
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(8, &fixup);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
+ patch_feature_section(~8, &fixup);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
+}
+
+static void __init test_alternative_patching(void)
+{
+ extern unsigned int ftr_fixup_test2[];
+ extern unsigned int end_ftr_fixup_test2[];
+ extern unsigned int ftr_fixup_test2_orig[];
+ extern unsigned int ftr_fixup_test2_alt[];
+ extern unsigned int ftr_fixup_test2_expected[];
+ int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
+
+ fixup.value = fixup.mask = 0xF;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(0xF, &fixup);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
+ patch_feature_section(~0xF, &fixup);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
+}
+
+static void __init test_alternative_case_too_big(void)
+{
+ extern unsigned int ftr_fixup_test3[];
+ extern unsigned int end_ftr_fixup_test3[];
+ extern unsigned int ftr_fixup_test3_orig[];
+ extern unsigned int ftr_fixup_test3_alt[];
+ int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
+
+ fixup.value = fixup.mask = 0xC;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+
+ /* Expect nothing to be patched, and the error returned to us */
+ check(patch_feature_section(0xF, &fixup) == 1);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+ check(patch_feature_section(0, &fixup) == 1);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+ check(patch_feature_section(~0xF, &fixup) == 1);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
+}
+
+static void __init test_alternative_case_too_small(void)
+{
+ extern unsigned int ftr_fixup_test4[];
+ extern unsigned int end_ftr_fixup_test4[];
+ extern unsigned int ftr_fixup_test4_orig[];
+ extern unsigned int ftr_fixup_test4_alt[];
+ extern unsigned int ftr_fixup_test4_expected[];
+ int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
+ unsigned long flag;
+
+ /* Check a high-bit flag */
+ flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
+ fixup.value = fixup.mask = flag;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+
+ /* Check we don't patch if the value matches */
+ patch_feature_section(flag, &fixup);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+
+ /* Check we do patch if the value doesn't match */
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
+
+ /* Check we do patch if the mask doesn't match */
+ memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
+ patch_feature_section(~flag, &fixup);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
+}
+
+static void test_alternative_case_with_branch(void)
+{
+ extern unsigned int ftr_fixup_test5[];
+ extern unsigned int end_ftr_fixup_test5[];
+ extern unsigned int ftr_fixup_test5_expected[];
+ int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
+
+ check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
+}
+
+static void __init test_alternative_case_with_external_branch(void)
+{
+ extern unsigned int ftr_fixup_test6[];
+ extern unsigned int end_ftr_fixup_test6[];
+ extern unsigned int ftr_fixup_test6_expected[];
+ int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
+
+ check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
+}
+
+static void __init test_alternative_case_with_branch_to_end(void)
+{
+ extern unsigned int ftr_fixup_test7[];
+ extern unsigned int end_ftr_fixup_test7[];
+ extern unsigned int ftr_fixup_test7_expected[];
+ int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
+
+ check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
+}
+
+static void __init test_cpu_macros(void)
+{
+ extern u8 ftr_fixup_test_FTR_macros[];
+ extern u8 ftr_fixup_test_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FTR_macros_expected -
+ ftr_fixup_test_FTR_macros;
+
+ /* The fixups have already been done for us during boot */
+ check(memcmp(ftr_fixup_test_FTR_macros,
+ ftr_fixup_test_FTR_macros_expected, size) == 0);
+}
+
+static void __init test_fw_macros(void)
+{
+#ifdef CONFIG_PPC64
+ extern u8 ftr_fixup_test_FW_FTR_macros[];
+ extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+ ftr_fixup_test_FW_FTR_macros;
+
+ /* The fixups have already been done for us during boot */
+ check(memcmp(ftr_fixup_test_FW_FTR_macros,
+ ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+#endif
+}
+
+static void __init test_lwsync_macros(void)
+{
+ extern u8 lwsync_fixup_test[];
+ extern u8 end_lwsync_fixup_test[];
+ extern u8 lwsync_fixup_test_expected_LWSYNC[];
+ extern u8 lwsync_fixup_test_expected_SYNC[];
+ unsigned long size = end_lwsync_fixup_test -
+ lwsync_fixup_test;
+
+ /* The fixups have already been done for us during boot */
+ if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_LWSYNC, size) == 0);
+ } else {
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_SYNC, size) == 0);
+ }
+}
+
+#ifdef CONFIG_PPC64
+static void __init test_prefix_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix1[];
+ extern unsigned int end_ftr_fixup_prefix1[];
+ extern unsigned int ftr_fixup_prefix1_orig[];
+ extern unsigned int ftr_fixup_prefix1_expected[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
+ fixup.alt_start_off = fixup.alt_end_off = 0;
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
+}
+
+static void __init test_prefix_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix2[];
+ extern unsigned int end_ftr_fixup_prefix2[];
+ extern unsigned int ftr_fixup_prefix2_orig[];
+ extern unsigned int ftr_fixup_prefix2_expected[];
+ extern unsigned int ftr_fixup_prefix2_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
+}
+
+static void __init test_prefix_word_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix3[];
+ extern unsigned int end_ftr_fixup_prefix3[];
+ extern unsigned int ftr_fixup_prefix3_orig[];
+ extern unsigned int ftr_fixup_prefix3_expected[];
+ extern unsigned int ftr_fixup_prefix3_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
+}
+#else
+static inline void test_prefix_patching(void) {}
+static inline void test_prefix_alt_patching(void) {}
+static inline void test_prefix_word_alt_patching(void) {}
+#endif /* CONFIG_PPC64 */
+
+static int __init test_feature_fixups(void)
+{
+ printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
+
+ test_basic_patching();
+ test_alternative_patching();
+ test_alternative_case_too_big();
+ test_alternative_case_too_small();
+ test_alternative_case_with_branch();
+ test_alternative_case_with_external_branch();
+ test_alternative_case_with_branch_to_end();
+ test_cpu_macros();
+ test_fw_macros();
+ test_lwsync_macros();
+ test_prefix_patching();
+ test_prefix_alt_patching();
+ test_prefix_word_alt_patching();
+
+ return 0;
+}
+late_initcall(test_feature_fixups);
+
+#endif /* CONFIG_FTR_FIXUP_SELFTEST */
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
new file mode 100644
index 000000000..151875050
--- /dev/null
+++ b/arch/powerpc/lib/hweight_64.S
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/feature-fixups.h>
+
+/* Note: This code relies on -mminimal-toc */
+
+_GLOBAL(__arch_hweight8)
+BEGIN_FTR_SECTION
+ b CFUNC(__sw_hweight8)
+ nop
+ nop
+FTR_SECTION_ELSE
+ PPC_POPCNTB(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight8)
+
+_GLOBAL(__arch_hweight16)
+BEGIN_FTR_SECTION
+ b CFUNC(__sw_hweight16)
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(50)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(50)
+ clrlwi r3,r3,16
+ PPC_POPCNTW(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight16)
+
+_GLOBAL(__arch_hweight32)
+BEGIN_FTR_SECTION
+ b CFUNC(__sw_hweight32)
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(51)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(51)
+ PPC_POPCNTW(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight32)
+
+_GLOBAL(__arch_hweight64)
+BEGIN_FTR_SECTION
+ b CFUNC(__sw_hweight64)
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+ nop
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(52)
+ PPC_POPCNTB(R3,R3)
+ srdi r4,r3,32
+ add r3,r4,r3
+ srdi r4,r3,16
+ add r3,r4,r3
+ srdi r4,r3,8
+ add r3,r4,r3
+ clrldi r3,r3,64-8
+ blr
+ FTR_SECTION_ELSE_NESTED(52)
+ PPC_POPCNTD(R3,R3)
+ clrldi r3,r3,64-8
+ blr
+ ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
new file mode 100644
index 000000000..e00abeabc
--- /dev/null
+++ b/arch/powerpc/lib/ldstfp.S
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Floating-point, VMX/Altivec and VSX loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
+#include <linux/errno.h>
+
+#define STKFRM (PPC_MIN_STKFRM + 16)
+
+/* Get the contents of frN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_fpr)
+ mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+reg = 0
+ .rept 32
+ stfd reg, 0(r4)
+ b 2f
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+2: MTMSRD(r6)
+ isync
+ blr
+
+/* Put the contents of *p into frN; N is in r3 and p is in r4. */
+_GLOBAL(put_fpr)
+ mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+reg = 0
+ .rept 32
+ lfd reg, 0(r4)
+ b 2f
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+2: MTMSRD(r6)
+ isync
+ blr
+
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_vr)
+ mflr r0
+ mfmsr r6
+ oris r7, r6, MSR_VEC@h
+ MTMSRD(r7)
+ isync
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+reg = 0
+ .rept 32
+ stvx reg, 0, r4
+ b 2f
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+2: MTMSRD(r6)
+ isync
+ blr
+
+/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
+_GLOBAL(put_vr)
+ mflr r0
+ mfmsr r6
+ oris r7, r6, MSR_VEC@h
+ MTMSRD(r7)
+ isync
+ rlwinm r3,r3,3,0xf8
+ bcl 20,31,1f
+reg = 0
+ .rept 32
+ lvx reg, 0, r4
+ b 2f
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+2: MTMSRD(r6)
+ isync
+ blr
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/* Get the contents of vsN into vs0; N is in r3. */
+_GLOBAL(get_vsr)
+ mflr r0
+ rlwinm r3,r3,3,0x1f8
+ bcl 20,31,1f
+ blr /* vs0 is already in vs0 */
+ nop
+reg = 1
+ .rept 63
+ XXLOR(0,reg,reg)
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Put the contents of vs0 into vsN; N is in r3. */
+_GLOBAL(put_vsr)
+ mflr r0
+ rlwinm r3,r3,3,0x1f8
+ bcl 20,31,1f
+ blr /* v0 is already in v0 */
+ nop
+reg = 1
+ .rept 63
+ XXLOR(reg,0,0)
+ blr
+reg = reg + 1
+ .endr
+1: mflr r5
+ add r5,r3,r5
+ mtctr r5
+ mtlr r0
+ bctr
+
+/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
+_GLOBAL(load_vsrn)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VSX@h
+ cmpwi cr7,r3,0
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ beq cr7,1f
+ STXVD2X(0,R1,R8)
+1: LXVD2X(0,R0,R4)
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ beq cr7,4f
+ bl put_vsr
+ LXVD2X(0,R1,R8)
+4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ addi r1,r1,STKFRM
+ blr
+
+/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
+_GLOBAL(store_vsrn)
+ PPC_STLU r1,-STKFRM(r1)
+ mflr r0
+ PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mfmsr r6
+ oris r7,r6,MSR_VSX@h
+ li r8,STKFRM-16
+ MTMSRD(r7)
+ isync
+ STXVD2X(0,R1,R8)
+ bl get_vsr
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ STXVD2X(0,R0,R4)
+ LXVD2X(0,R1,R8)
+ PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ mtlr r0
+ MTMSRD(r6)
+ isync
+ mr r3,r9
+ addi r1,r1,STKFRM
+ blr
+#endif /* CONFIG_VSX */
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(float *sp, double *dp) */
+_GLOBAL(conv_sp_to_dp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfs fr0, 0(r3)
+ stfd fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(double *dp, float *sp) */
+_GLOBAL(conv_dp_to_sp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfd fr0, 0(r3)
+ stfs fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
new file mode 100644
index 000000000..04165b7a1
--- /dev/null
+++ b/arch/powerpc/lib/locks.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Spin and read/write lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/smp.h>
+
+/* waiting for a spinlock... */
+#if defined(CONFIG_PPC_SPLPAR)
+#include <asm/hvcall.h>
+#include <asm/smp.h>
+
+void splpar_spin_yield(arch_spinlock_t *lock)
+{
+ unsigned int lock_value, holder_cpu, yield_count;
+
+ lock_value = lock->slock;
+ if (lock_value == 0)
+ return;
+ holder_cpu = lock_value & 0xffff;
+ BUG_ON(holder_cpu >= NR_CPUS);
+
+ yield_count = yield_count_of(holder_cpu);
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (lock->slock != lock_value)
+ return; /* something has changed */
+ yield_to_preempted(holder_cpu, yield_count);
+}
+EXPORT_SYMBOL_GPL(splpar_spin_yield);
+
+/*
+ * Waiting for a read lock or a write lock on a rwlock...
+ * This turns out to be the same for read and write locks, since
+ * we only know the holder if it is write-locked.
+ */
+void splpar_rw_yield(arch_rwlock_t *rw)
+{
+ int lock_value;
+ unsigned int holder_cpu, yield_count;
+
+ lock_value = rw->lock;
+ if (lock_value >= 0)
+ return; /* no write lock at present */
+ holder_cpu = lock_value & 0xffff;
+ BUG_ON(holder_cpu >= NR_CPUS);
+
+ yield_count = yield_count_of(holder_cpu);
+ if ((yield_count & 1) == 0)
+ return; /* virtual cpu is currently running */
+ rmb();
+ if (rw->lock != lock_value)
+ return; /* something has changed */
+ yield_to_preempted(holder_cpu, yield_count);
+}
+#endif
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
new file mode 100644
index 000000000..6fd06cd20
--- /dev/null
+++ b/arch/powerpc/lib/mem_64.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/errno.h>
+#include <asm/ppc_asm.h>
+#include <asm/kasan.h>
+
+#ifndef CONFIG_KASAN
+_GLOBAL(__memset16)
+ rlwimi r4,r4,16,0,15
+ /* fall through */
+
+_GLOBAL(__memset32)
+ rldimi r4,r4,32,0
+ /* fall through */
+
+_GLOBAL(__memset64)
+ neg r0,r3
+ andi. r0,r0,7
+ cmplw cr1,r5,r0
+ b .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+#endif
+
+_GLOBAL_KASAN(memset)
+ neg r0,r3
+ rlwimi r4,r4,8,16,23
+ andi. r0,r0,7 /* # bytes to be 8-byte aligned */
+ rlwimi r4,r4,16,0,15
+ cmplw cr1,r5,r0 /* do we get that far? */
+ rldimi r4,r4,32,0
+.Lms: PPC_MTOCRF(1,r0)
+ mr r6,r3
+ blt cr1,8f
+ beq 3f /* if already 8-byte aligned */
+ subf r5,r0,r5
+ bf 31,1f
+ stb r4,0(r6)
+ addi r6,r6,1
+1: bf 30,2f
+ sth r4,0(r6)
+ addi r6,r6,2
+2: bf 29,3f
+ stw r4,0(r6)
+ addi r6,r6,4
+3: srdi. r0,r5,6
+ clrldi r5,r5,58
+ mtctr r0
+ beq 5f
+ .balign 16
+4: std r4,0(r6)
+ std r4,8(r6)
+ std r4,16(r6)
+ std r4,24(r6)
+ std r4,32(r6)
+ std r4,40(r6)
+ std r4,48(r6)
+ std r4,56(r6)
+ addi r6,r6,64
+ bdnz 4b
+5: srwi. r0,r5,3
+ clrlwi r5,r5,29
+ PPC_MTOCRF(1,r0)
+ beq 8f
+ bf 29,6f
+ std r4,0(r6)
+ std r4,8(r6)
+ std r4,16(r6)
+ std r4,24(r6)
+ addi r6,r6,32
+6: bf 30,7f
+ std r4,0(r6)
+ std r4,8(r6)
+ addi r6,r6,16
+7: bf 31,8f
+ std r4,0(r6)
+ addi r6,r6,8
+8: cmpwi r5,0
+ PPC_MTOCRF(1,r5)
+ beqlr
+ bf 29,9f
+ stw r4,0(r6)
+ addi r6,r6,4
+9: bf 30,10f
+ sth r4,0(r6)
+ addi r6,r6,2
+10: bflr 31
+ stb r4,0(r6)
+ blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
+
+_GLOBAL_TOC_KASAN(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ b memcpy
+
+_GLOBAL(backwards_memcpy)
+ rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */
+ add r6,r3,r5
+ add r4,r4,r5
+ beq 2f
+ andi. r0,r6,3
+ mtctr r7
+ bne 5f
+ .balign 16
+1: lwz r7,-4(r4)
+ lwzu r8,-8(r4)
+ stw r7,-4(r6)
+ stwu r8,-8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,-4(r4)
+ subi r5,r5,4
+ stwu r0,-4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+4: lbzu r0,-1(r4)
+ stbu r0,-1(r6)
+ bdnz 4b
+ blr
+5: mtctr r0
+6: lbzu r7,-1(r4)
+ stbu r7,-1(r6)
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 000000000..f6fca5664
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+
+ .text
+
+_GLOBAL(memcmp)
+ srawi. r7, r5, 2 /* Divide len by 4 */
+ mr r6, r3
+ beq- 3f
+ mtctr r7
+ li r7, 0
+1: lwzx r3, r6, r7
+ lwzx r0, r4, r7
+ addi r7, r7, 4
+ cmplw cr0, r3, r0
+ bdnzt eq, 1b
+ bne 5f
+3: andi. r3, r5, 3
+ beqlr
+ cmplwi cr1, r3, 2
+ blt- cr1, 4f
+ lhzx r3, r6, r7
+ lhzx r0, r4, r7
+ addi r7, r7, 2
+ subf. r3, r0, r3
+ beqlr cr1
+ bnelr
+4: lbzx r3, r6, r7
+ lbzx r0, r4, r7
+ subf. r3, r0, r3
+ blr
+5: li r3, 1
+ bgtlr
+ li r3, -1
+ blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
new file mode 100644
index 000000000..142c666d3
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ * Copyright 2015 IBM Corporation.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+#define off8 r6
+#define off16 r7
+#define off24 r8
+
+#define rA r9
+#define rB r10
+#define rC r11
+#define rD r27
+#define rE r28
+#define rF r29
+#define rG r30
+#define rH r31
+
+#ifdef __LITTLE_ENDIAN__
+#define LH lhbrx
+#define LW lwbrx
+#define LD ldbrx
+#define LVS lvsr
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+ vperm _VRT,_VRB,_VRA,_VRC
+#else
+#define LH lhzx
+#define LW lwzx
+#define LD ldx
+#define LVS lvsl
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+ vperm _VRT,_VRA,_VRB,_VRC
+#endif
+
+#define VMX_THRESH 4096
+#define ENTER_VMX_OPS \
+ mflr r0; \
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+ std r0,16(r1); \
+ stdu r1,-STACKFRAMESIZE(r1); \
+ bl CFUNC(enter_vmx_ops); \
+ cmpwi cr1,r3,0; \
+ ld r0,STACKFRAMESIZE+16(r1); \
+ ld r3,STK_REG(R31)(r1); \
+ ld r4,STK_REG(R30)(r1); \
+ ld r5,STK_REG(R29)(r1); \
+ addi r1,r1,STACKFRAMESIZE; \
+ mtlr r0
+
+#define EXIT_VMX_OPS \
+ mflr r0; \
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+ std r0,16(r1); \
+ stdu r1,-STACKFRAMESIZE(r1); \
+ bl CFUNC(exit_vmx_ops); \
+ ld r0,STACKFRAMESIZE+16(r1); \
+ ld r3,STK_REG(R31)(r1); \
+ ld r4,STK_REG(R30)(r1); \
+ ld r5,STK_REG(R29)(r1); \
+ addi r1,r1,STACKFRAMESIZE; \
+ mtlr r0
+
+/*
+ * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
+ * 16 bytes boundary and permute the result with the 1st 16 bytes.
+
+ * | y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
+ * ^ ^ ^
+ * 0xbbbb10 0xbbbb20 0xbbb30
+ * ^
+ * _vaddr
+ *
+ *
+ * _vmask is the mask generated by LVS
+ * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
+ * for example: 0xyyyyyyyyyyyyy012 for big endian
+ * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
+ * for example: 0x3456789abcdefzzz for big endian
+ * The permute result is saved in _v_res.
+ * for example: 0x0123456789abcdef for big endian.
+ */
+#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
+ lvx _v2nd_qw,_vaddr,off16; \
+ VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
+
+/*
+ * There are 2 categories for memcmp:
+ * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
+ * are named like .Lsameoffset_xxxx
+ * 2) src/dst has different offset to the 8 bytes boundary. The handlers
+ * are named like .Ldiffoffset_xxxx
+ */
+_GLOBAL_TOC(memcmp)
+ cmpdi cr1,r5,0
+
+ /* Use the short loop if the src/dst addresses are not
+ * with the same offset of 8 bytes align boundary.
+ */
+ xor r6,r3,r4
+ andi. r6,r6,7
+
+ /* Fall back to short loop if compare at aligned addrs
+ * with less than 8 bytes.
+ */
+ cmpdi cr6,r5,7
+
+ beq cr1,.Lzero
+ bgt cr6,.Lno_short
+
+.Lshort:
+ mtctr r5
+1: lbz rA,0(r3)
+ lbz rB,0(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,1(r3)
+ lbz rB,1(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,2(r3)
+ lbz rB,2(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,3(r3)
+ lbz rB,3(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+
+ addi r3,r3,4
+ addi r4,r4,4
+
+ bdnz 1b
+
+.Lzero:
+ li r3,0
+ blr
+
+.Lno_short:
+ dcbt 0,r3
+ dcbt 0,r4
+ bne .Ldiffoffset_8bytes_make_align_start
+
+
+.Lsameoffset_8bytes_make_align_start:
+ /* attempt to compare bytes not aligned with 8 bytes so that
+ * rest comparison can run based on 8 bytes alignment.
+ */
+ andi. r6,r3,7
+
+ /* Try to compare the first double word which is not 8 bytes aligned:
+ * load the first double word at (src & ~7UL) and shift left appropriate
+ * bits before comparision.
+ */
+ rlwinm r6,r3,3,26,28
+ beq .Lsameoffset_8bytes_aligned
+ clrrdi r3,r3,3
+ clrrdi r4,r4,3
+ LD rA,0,r3
+ LD rB,0,r4
+ sld rA,rA,r6
+ sld rB,rB,r6
+ cmpld cr0,rA,rB
+ srwi r6,r6,3
+ bne cr0,.LcmpAB_lightweight
+ subfic r6,r6,8
+ subf. r5,r6,r5
+ addi r3,r3,8
+ addi r4,r4,8
+ beq .Lzero
+
+.Lsameoffset_8bytes_aligned:
+ /* now we are aligned with 8 bytes.
+ * Use .Llong loop if left cmp bytes are equal or greater than 32B.
+ */
+ cmpdi cr6,r5,31
+ bgt cr6,.Llong
+
+.Lcmp_lt32bytes:
+ /* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
+ cmpdi cr5,r5,7
+ srdi r0,r5,3
+ ble cr5,.Lcmp_rest_lt8bytes
+
+ /* handle 8 ~ 31 bytes */
+ clrldi r5,r5,61
+ mtctr r0
+2:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ bdnz 2b
+
+ cmpwi r5,0
+ beq .Lzero
+
+.Lcmp_rest_lt8bytes:
+ /*
+ * Here we have less than 8 bytes to compare. At least s1 is aligned to
+ * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+ * page boundary, otherwise we might read past the end of the buffer and
+ * trigger a page fault. We use 4K as the conservative minimum page
+ * size. If we detect that case we go to the byte-by-byte loop.
+ *
+ * Otherwise the next double word is loaded from s1 and s2, and shifted
+ * right to compare the appropriate bits.
+ */
+ clrldi r6,r4,(64-12) // r6 = r4 & 0xfff
+ cmpdi r6,0xff8
+ bgt .Lshort
+
+ subfic r6,r5,8
+ slwi r6,r6,3
+ LD rA,0,r3
+ LD rB,0,r4
+ srd rA,rA,r6
+ srd rB,rB,r6
+ cmpld cr0,rA,rB
+ bne cr0,.LcmpAB_lightweight
+ b .Lzero
+
+.Lnon_zero:
+ mr r3,rC
+ blr
+
+.Llong:
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ /* Try to use vmx loop if length is equal or greater than 4K */
+ cmpldi cr6,r5,VMX_THRESH
+ bge cr6,.Lsameoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Llong_novmx_cmp:
+#endif
+ /* At least s1 addr is aligned with 8 bytes */
+ li off8,8
+ li off16,16
+ li off24,24
+
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+
+ srdi r0,r5,5
+ mtctr r0
+ andi. r5,r5,31
+
+ LD rA,0,r3
+ LD rB,0,r4
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lfirst32
+
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lsecond32
+
+ .balign 16
+
+1: LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdnz 1b
+
+.Lsecond32:
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+.Ltail:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+
+ cmpdi r5,0
+ beq .Lzero
+ b .Lshort
+
+.Lfirst32:
+ cmpld cr1,rC,rD
+ cmpld cr6,rE,rF
+ cmpld cr7,rG,rH
+
+ bne cr0,.LcmpAB
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+ b .Ltail
+
+.LcmpAB:
+ li r3,1
+ bgt cr0,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpCD:
+ li r3,1
+ bgt cr1,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpEF:
+ li r3,1
+ bgt cr6,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpGH:
+ li r3,1
+ bgt cr7,.Lout
+ li r3,-1
+
+.Lout:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ blr
+
+.LcmpAB_lightweight: /* skip NV GPRS restore */
+ li r3,1
+ bgtlr
+ li r3,-1
+ blr
+
+#ifdef CONFIG_ALTIVEC
+.Lsameoffset_vmx_cmp:
+ /* Enter with src/dst addrs has the same offset with 8 bytes
+ * align boundary.
+ *
+ * There is an optimization based on following fact: memcmp()
+ * prones to fail early at the first 32 bytes.
+ * Before applying VMX instructions which will lead to 32x128bits
+ * VMX regs load/restore penalty, we compare the first 32 bytes
+ * so that we can catch the ~80% fail cases.
+ */
+
+ li r0,4
+ mtctr r0
+.Lsameoffset_prechk_32B_loop:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ addi r5,r5,-8
+ bdnz .Lsameoffset_prechk_32B_loop
+
+ ENTER_VMX_OPS
+ beq cr1,.Llong_novmx_cmp
+
+3:
+ /* need to check whether r4 has the same offset with r3
+ * for 16 bytes boundary.
+ */
+ xor r0,r3,r4
+ andi. r0,r0,0xf
+ bne .Ldiffoffset_vmx_cmp_start
+
+ /* len is no less than 4KB. Need to align with 16 bytes further.
+ */
+ andi. rA,r3,8
+ LD rA,0,r3
+ beq 4f
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ addi r5,r5,-8
+
+ beq cr0,4f
+ /* save and restore cr0 */
+ mfocrf r5,128
+ EXIT_VMX_OPS
+ mtocrf 128,r5
+ b .LcmpAB_lightweight
+
+4:
+ /* compare 32 bytes for each loop */
+ srdi r0,r5,5
+ mtctr r0
+ clrldi r5,r5,59
+ li off16,16
+
+.balign 16
+5:
+ lvx v0,0,r3
+ lvx v1,0,r4
+ VCMPEQUD_RC(v0,v0,v1)
+ bnl cr6,7f
+ lvx v0,off16,r3
+ lvx v1,off16,r4
+ VCMPEQUD_RC(v0,v0,v1)
+ bnl cr6,6f
+ addi r3,r3,32
+ addi r4,r4,32
+ bdnz 5b
+
+ EXIT_VMX_OPS
+ cmpdi r5,0
+ beq .Lzero
+ b .Lcmp_lt32bytes
+
+6:
+ addi r3,r3,16
+ addi r4,r4,16
+
+7:
+ /* diff the last 16 bytes */
+ EXIT_VMX_OPS
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ li off8,8
+ bne cr0,.LcmpAB_lightweight
+
+ LD rA,off8,r3
+ LD rB,off8,r4
+ cmpld cr0,rA,rB
+ bne cr0,.LcmpAB_lightweight
+ b .Lzero
+#endif
+
+.Ldiffoffset_8bytes_make_align_start:
+ /* now try to align s1 with 8 bytes */
+ rlwinm r6,r3,3,26,28
+ beq .Ldiffoffset_align_s1_8bytes
+
+ clrrdi r3,r3,3
+ LD rA,0,r3
+ LD rB,0,r4 /* unaligned load */
+ sld rA,rA,r6
+ srd rA,rA,r6
+ srd rB,rB,r6
+ cmpld cr0,rA,rB
+ srwi r6,r6,3
+ bne cr0,.LcmpAB_lightweight
+
+ subfic r6,r6,8
+ subf. r5,r6,r5
+ addi r3,r3,8
+ add r4,r4,r6
+
+ beq .Lzero
+
+.Ldiffoffset_align_s1_8bytes:
+ /* now s1 is aligned with 8 bytes. */
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ /* only do vmx ops when the size equal or greater than 4K bytes */
+ cmpdi cr5,r5,VMX_THRESH
+ bge cr5,.Ldiffoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Ldiffoffset_novmx_cmp:
+#endif
+
+
+ cmpdi cr5,r5,31
+ ble cr5,.Lcmp_lt32bytes
+
+#ifdef CONFIG_ALTIVEC
+ b .Llong_novmx_cmp
+#else
+ b .Llong
+#endif
+
+#ifdef CONFIG_ALTIVEC
+.Ldiffoffset_vmx_cmp:
+ /* perform a 32 bytes pre-checking before
+ * enable VMX operations.
+ */
+ li r0,4
+ mtctr r0
+.Ldiffoffset_prechk_32B_loop:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ addi r5,r5,-8
+ bdnz .Ldiffoffset_prechk_32B_loop
+
+ ENTER_VMX_OPS
+ beq cr1,.Ldiffoffset_novmx_cmp
+
+.Ldiffoffset_vmx_cmp_start:
+ /* Firstly try to align r3 with 16 bytes */
+ andi. r6,r3,0xf
+ li off16,16
+ beq .Ldiffoffset_vmx_s1_16bytes_align
+
+ LVS v3,0,r3
+ LVS v4,0,r4
+
+ lvx v5,0,r3
+ lvx v6,0,r4
+ LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+
+ VCMPEQUB_RC(v7,v9,v10)
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ subfic r6,r6,16
+ subf r5,r6,r5
+ add r3,r3,r6
+ add r4,r4,r6
+
+.Ldiffoffset_vmx_s1_16bytes_align:
+ /* now s1 is aligned with 16 bytes */
+ lvx v6,0,r4
+ LVS v4,0,r4
+ srdi r6,r5,5 /* loop for 32 bytes each */
+ clrldi r5,r5,59
+ mtctr r6
+
+.balign 16
+.Ldiffoffset_vmx_32bytesloop:
+ /* the first qw of r4 was saved in v6 */
+ lvx v9,0,r3
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+ VCMPEQUB_RC(v7,v9,v10)
+ vor v6,v8,v8
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ addi r3,r3,16
+ addi r4,r4,16
+
+ lvx v9,0,r3
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+ VCMPEQUB_RC(v7,v9,v10)
+ vor v6,v8,v8
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ addi r3,r3,16
+ addi r4,r4,16
+
+ bdnz .Ldiffoffset_vmx_32bytesloop
+
+ EXIT_VMX_OPS
+
+ cmpdi r5,0
+ beq .Lzero
+ b .Lcmp_lt32bytes
+
+.Ldiffoffset_vmx_diff_found:
+ EXIT_VMX_OPS
+ /* anyway, the diff will appear in next 16 bytes */
+ li r5,16
+ b .Lcmp_lt32bytes
+
+#endif
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
new file mode 100644
index 000000000..b5a67e201
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2002 Paul Mackerras, IBM Corp.
+ */
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
+
+#ifndef SELFTEST_CASE
+/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE 0
+#endif
+
+ .align 7
+_GLOBAL_TOC_KASAN(memcpy)
+BEGIN_FTR_SECTION
+#ifdef __LITTLE_ENDIAN__
+ cmpdi cr7,r5,0
+#else
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
+#endif
+FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
+ b memcpy_power7
+#endif
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#ifdef __LITTLE_ENDIAN__
+ /* dumb little-endian memcpy that will get replaced at runtime */
+ addi r9,r3,-1
+ addi r4,r4,-1
+ beqlr cr7
+ mtctr r5
+1: lbzu r10,1(r4)
+ stbu r10,1(r9)
+ bdnz 1b
+ blr
+#else
+ PPC_MTOCRF(0x01,r5)
+ cmpldi cr1,r5,16
+ neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
+ andi. r6,r6,7
+ dcbt 0,r4
+ blt cr1,.Lshort_copy
+/* Below we want to nop out the bne if we're on a CPU that has the
+ CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
+ cleared.
+ At the time of writing the only CPU that has this combination of bits
+ set is Power6. */
+test_feature = (SELFTEST_CASE == 1)
+BEGIN_FTR_SECTION
+ nop
+FTR_SECTION_ELSE
+ bne .Ldst_unaligned
+ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
+ CPU_FTR_UNALIGNED_LD_STD)
+.Ldst_aligned:
+ addi r3,r3,-16
+test_feature = (SELFTEST_CASE == 0)
+BEGIN_FTR_SECTION
+ andi. r0,r4,7
+ bne .Lsrc_unaligned
+END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
+ srdi r7,r5,4
+ ld r9,0(r4)
+ addi r4,r4,-8
+ mtctr r7
+ andi. r5,r5,7
+ bf cr7*4+0,2f
+ addi r3,r3,8
+ addi r4,r4,8
+ mr r8,r9
+ blt cr1,3f
+1: ld r9,8(r4)
+ std r8,8(r3)
+2: ldu r8,16(r4)
+ stdu r9,16(r3)
+ bdnz 1b
+3: std r8,8(r3)
+ beq 3f
+ addi r3,r3,16
+.Ldo_tail:
+ bf cr7*4+1,1f
+ lwz r9,8(r4)
+ addi r4,r4,4
+ stw r9,0(r3)
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+ lhz r9,8(r4)
+ addi r4,r4,2
+ sth r9,0(r3)
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+ lbz r9,8(r4)
+ stb r9,0(r3)
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
+ blr
+
+.Lsrc_unaligned:
+ srdi r6,r5,3
+ addi r5,r5,-16
+ subf r4,r0,r4
+ srdi r7,r5,4
+ sldi r10,r0,3
+ cmpdi cr6,r6,3
+ andi. r5,r5,7
+ mtctr r7
+ subfic r11,r10,64
+ add r5,r5,r0
+
+ bt cr7*4+0,0f
+
+ ld r9,0(r4) # 3+2n loads, 2+2n stores
+ ld r0,8(r4)
+ sld r6,r9,r10
+ ldu r9,16(r4)
+ srd r7,r0,r11
+ sld r8,r0,r10
+ or r7,r7,r6
+ blt cr6,4f
+ ld r0,8(r4)
+ # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
+ b 2f
+
+0: ld r0,0(r4) # 4+2n loads, 3+2n stores
+ ldu r9,8(r4)
+ sld r8,r0,r10
+ addi r3,r3,-8
+ blt cr6,5f
+ ld r0,8(r4)
+ srd r12,r9,r11
+ sld r6,r9,r10
+ ldu r9,16(r4)
+ or r12,r8,r12
+ srd r7,r0,r11
+ sld r8,r0,r10
+ addi r3,r3,16
+ beq cr6,3f
+
+ # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
+1: or r7,r7,r6
+ ld r0,8(r4)
+ std r12,8(r3)
+2: srd r12,r9,r11
+ sld r6,r9,r10
+ ldu r9,16(r4)
+ or r12,r8,r12
+ stdu r7,16(r3)
+ srd r7,r0,r11
+ sld r8,r0,r10
+ bdnz 1b
+
+3: std r12,8(r3)
+ or r7,r7,r6
+4: std r7,16(r3)
+5: srd r12,r9,r11
+ or r12,r8,r12
+ std r12,24(r3)
+ beq 4f
+ cmpwi cr1,r5,8
+ addi r3,r3,32
+ sld r9,r9,r10
+ ble cr1,6f
+ ld r0,8(r4)
+ srd r7,r0,r11
+ or r9,r7,r9
+6:
+ bf cr7*4+1,1f
+ rotldi r9,r9,32
+ stw r9,0(r3)
+ addi r3,r3,4
+1: bf cr7*4+2,2f
+ rotldi r9,r9,16
+ sth r9,0(r3)
+ addi r3,r3,2
+2: bf cr7*4+3,3f
+ rotldi r9,r9,8
+ stb r9,0(r3)
+3: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
+ blr
+
+.Ldst_unaligned:
+ PPC_MTOCRF(0x01,r6) # put #bytes to 8B bdry into cr7
+ subf r5,r6,r5
+ li r7,0
+ cmpldi cr1,r5,16
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+ addi r7,r7,1
+1: bf cr7*4+2,2f
+ lhzx r0,r7,r4
+ sthx r0,r7,r3
+ addi r7,r7,2
+2: bf cr7*4+1,3f
+ lwzx r0,r7,r4
+ stwx r0,r7,r3
+3: PPC_MTOCRF(0x01,r5)
+ add r4,r6,r4
+ add r3,r6,r3
+ b .Ldst_aligned
+
+.Lshort_copy:
+ bf cr7*4+0,1f
+ lwz r0,0(r4)
+ lwz r9,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r9,4(r3)
+ addi r3,r3,8
+1: bf cr7*4+1,2f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+2: bf cr7*4+2,3f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+3: bf cr7*4+3,4f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
+ blr
+#endif
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
new file mode 100644
index 000000000..9398b2b74
--- /dev/null
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -0,0 +1,641 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE 0
+#endif
+
+#ifdef __BIG_ENDIAN__
+#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
+#else
+#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
+#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
+#endif
+
+_GLOBAL(memcpy_power7)
+ cmpldi r5,16
+ cmpldi cr1,r5,4096
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ blt .Lshort_copy
+
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+ bgt cr1, .Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+#endif
+
+.Lnonvmx_copy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+ blt 5f
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ ld r14,64(r4)
+ ld r15,72(r4)
+ ld r16,80(r4)
+ ld r17,88(r4)
+ ld r18,96(r4)
+ ld r19,104(r4)
+ ld r20,112(r4)
+ ld r21,120(r4)
+ addi r4,r4,128
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ std r14,64(r3)
+ std r15,72(r3)
+ std r16,80(r3)
+ std r17,88(r3)
+ std r18,96(r3)
+ std r19,104(r3)
+ std r20,112(r3)
+ std r21,120(r3)
+ addi r3,r3,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ ld r9,32(r4)
+ ld r10,40(r4)
+ ld r11,48(r4)
+ ld r12,56(r4)
+ addi r4,r4,64
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ std r9,32(r3)
+ std r10,40(r3)
+ std r11,48(r3)
+ std r12,56(r3)
+ addi r3,r3,64
+
+ /* Up to 63B to go */
+7: bf cr7*4+2,8f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ ld r7,16(r4)
+ ld r8,24(r4)
+ addi r4,r4,32
+ std r0,0(r3)
+ std r6,8(r3)
+ std r7,16(r3)
+ std r8,24(r3)
+ addi r3,r3,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+ ld r0,0(r4)
+ ld r6,8(r4)
+ addi r4,r4,16
+ std r0,0(r3)
+ std r6,8(r3)
+ addi r3,r3,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ blr
+
+.Lunwind_stack_nonvmx_copy:
+ addi r1,r1,STACKFRAMESIZE
+ b .Lnonvmx_copy
+
+.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
+ mflr r0
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
+ std r0,16(r1)
+ stdu r1,-STACKFRAMESIZE(r1)
+ bl CFUNC(enter_vmx_ops)
+ cmpwi cr1,r3,0
+ ld r0,STACKFRAMESIZE+16(r1)
+ ld r3,STK_REG(R31)(r1)
+ ld r4,STK_REG(R30)(r1)
+ ld r5,STK_REG(R29)(r1)
+ mtlr r0
+
+ /*
+ * We prefetch both the source and destination using enhanced touch
+ * instructions. We use a stream ID of 0 for the load side and
+ * 1 for the store side.
+ */
+ clrrdi r6,r4,7
+ clrrdi r9,r3,7
+ ori r9,r9,1 /* stream=1 */
+
+ srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
+ cmpldi r7,0x3FF
+ ble 1f
+ li r7,0x3FF
+1: lis r0,0x0E00 /* depth=7 */
+ sldi r7,r7,7
+ or r7,r7,r0
+ ori r10,r7,1 /* stream=1 */
+
+ lis r8,0x8000 /* GO=1 */
+ clrldi r8,r8,32
+
+ dcbt 0,r6,0b01000
+ dcbt 0,r7,0b01010
+ dcbtst 0,r9,0b01000
+ dcbtst 0,r10,0b01010
+ eieio
+ dcbt 0,r8,0b01010 /* GO */
+
+ beq cr1,.Lunwind_stack_nonvmx_copy
+
+ /*
+ * If source and destination are not relatively aligned we use a
+ * slower permute loop.
+ */
+ xor r6,r4,r3
+ rldicl. r6,r6,0,(64-4)
+ bne .Lvmx_unaligned_copy
+
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ bf cr7*4+3,5f
+ lvx v1,0,r4
+ addi r4,r4,16
+ stvx v1,0,r3
+ addi r3,r3,16
+
+5: bf cr7*4+2,6f
+ lvx v1,0,r4
+ lvx v0,r4,r9
+ addi r4,r4,32
+ stvx v1,0,r3
+ stvx v0,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx v3,0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
+ addi r4,r4,64
+ stvx v3,0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx v7,0,r4
+ lvx v6,r4,r9
+ lvx v5,r4,r10
+ lvx v4,r4,r11
+ lvx v3,r4,r12
+ lvx v2,r4,r14
+ lvx v1,r4,r15
+ lvx v0,r4,r16
+ addi r4,r4,128
+ stvx v7,0,r3
+ stvx v6,r3,r9
+ stvx v5,r3,r10
+ stvx v4,r3,r11
+ stvx v3,r3,r12
+ stvx v2,r3,r14
+ stvx v1,r3,r15
+ stvx v0,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx v3,0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
+ addi r4,r4,64
+ stvx v3,0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx v1,0,r4
+ lvx v0,r4,r9
+ addi r4,r4,32
+ stvx v1,0,r3
+ stvx v0,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx v1,0,r4
+ addi r4,r4,16
+ stvx v1,0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ ld r0,0(r4)
+ addi r4,r4,8
+ std r0,0(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
+
+.Lvmx_unaligned_copy:
+ /* Get the destination 16B aligned */
+ neg r6,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-4)
+
+ bf cr7*4+3,1f
+ lbz r0,0(r4)
+ addi r4,r4,1
+ stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+3: bf cr7*4+0,4f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r7,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r7,4(r3)
+ addi r3,r3,8
+
+4: sub r5,r5,r6
+
+ /* Get the desination 128B aligned */
+ neg r6,r3
+ srdi r7,r6,4
+ mtocrf 0x01,r7
+ clrldi r6,r6,(64-7)
+
+ li r9,16
+ li r10,32
+ li r11,48
+
+ LVS(v16,0,r4) /* Setup permute control vector */
+ lvx v0,0,r4
+ addi r4,r4,16
+
+ bf cr7*4+3,5f
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ addi r4,r4,16
+ stvx v8,0,r3
+ addi r3,r3,16
+ vor v0,v1,v1
+
+5: bf cr7*4+2,6f
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
+ addi r4,r4,32
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ addi r3,r3,32
+
+6: bf cr7*4+1,7f
+ lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
+ addi r4,r4,64
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
+ addi r3,r3,64
+
+7: sub r5,r5,r6
+ srdi r6,r5,7
+
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+
+ li r12,64
+ li r14,80
+ li r15,96
+ li r16,112
+
+ mtctr r6
+
+ /*
+ * Now do cacheline sized loads and stores. By this stage the
+ * cacheline stores are also cacheline aligned.
+ */
+ .align 5
+8:
+ lvx v7,0,r4
+ VPERM(v8,v0,v7,v16)
+ lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+ lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+ lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+ lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+ lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+ lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+ lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
+ addi r4,r4,128
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
+ stvx v12,r3,r12
+ stvx v13,r3,r14
+ stvx v14,r3,r15
+ stvx v15,r3,r16
+ addi r3,r3,128
+ bdnz 8b
+
+ ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+
+ /* Up to 127B to go */
+ clrldi r5,r5,(64-7)
+ srdi r6,r5,4
+ mtocrf 0x01,r6
+
+ bf cr7*4+1,9f
+ lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
+ addi r4,r4,64
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
+ addi r3,r3,64
+
+9: bf cr7*4+2,10f
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
+ addi r4,r4,32
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ addi r3,r3,32
+
+10: bf cr7*4+3,11f
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ addi r4,r4,16
+ stvx v8,0,r3
+ addi r3,r3,16
+
+ /* Up to 15B to go */
+11: clrldi r5,r5,(64-4)
+ addi r4,r4,-16 /* Unwind the +16 load offset */
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+ lwz r0,0(r4) /* Less chance of a reject with word ops */
+ lwz r6,4(r4)
+ addi r4,r4,8
+ stw r0,0(r3)
+ stw r6,4(r3)
+ addi r3,r3,8
+
+12: bf cr7*4+1,13f
+ lwz r0,0(r4)
+ addi r4,r4,4
+ stw r0,0(r3)
+ addi r3,r3,4
+
+13: bf cr7*4+2,14f
+ lhz r0,0(r4)
+ addi r4,r4,2
+ sth r0,0(r3)
+ addi r3,r3,2
+
+14: bf cr7*4+3,15f
+ lbz r0,0(r4)
+ stb r0,0(r3)
+
+15: addi r1,r1,STACKFRAMESIZE
+ ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
new file mode 100644
index 000000000..4e724c4c0
--- /dev/null
+++ b/arch/powerpc/lib/pmem.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2017 IBM Corporation. All rights reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/libnvdimm.h>
+
+#include <asm/cacheflush.h>
+
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __flush_pmem_range(start, stop);
+}
+
+/*
+ * CONFIG_ARCH_HAS_PMEM_API symbols
+ */
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ flush_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+/*
+ * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
+ */
+long __copy_from_user_flushcache(void *dest, const void __user *src,
+ unsigned size)
+{
+ unsigned long copied, start = (unsigned long) dest;
+
+ copied = __copy_from_user(dest, src, size);
+ clean_pmem_range(start, start + size);
+
+ return copied;
+}
+
+void memcpy_flushcache(void *dest, const void *src, size_t size)
+{
+ unsigned long start = (unsigned long) dest;
+
+ memcpy(dest, src, size);
+ clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL(memcpy_flushcache);
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 000000000..6dd2f46bd
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,1007 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+
+#define MAX_NODES 4
+
+struct qnode {
+ struct qnode *next;
+ struct qspinlock *lock;
+ int cpu;
+ int yield_cpu;
+ u8 locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+ int count;
+ struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+ /* pv_sleepy_lock is true when this is called */
+ if (pv_sleepy_lock_interval_ns) {
+ u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+ if (seen) {
+ u64 delta = sched_clock() - seen;
+ if (delta < pv_sleepy_lock_interval_ns)
+ return true;
+ this_cpu_write(sleepy_lock_seen_clock, 0);
+ }
+ }
+
+ return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return steal_spins * pv_sleepy_lock_factor;
+ else
+ return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return remote_steal_spins * pv_sleepy_lock_factor;
+ else
+ return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return head_spins * pv_sleepy_lock_factor;
+ else
+ return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+ return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+ return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+ return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 newval = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ asm volatile(
+"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
+ /* This test is necessary if there could be stealers */
+" andi. %1,%0,%5 \n"
+" bne 3f \n"
+ /* Test whether the lock tail == mytail */
+" and %1,%0,%6 \n"
+" cmpw 0,%1,%3 \n"
+ /* Merge the new locked value */
+" or %1,%1,%4 \n"
+" bne 2f \n"
+ /* If the lock tail matched, then clear it, otherwise leave it. */
+" andc %1,%1,%6 \n"
+"2: stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"3: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r"(tail), "r" (newval),
+ "i" (_Q_LOCKED_VAL),
+ "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+ u32 prev, tmp;
+
+ kcsan_release();
+
+ asm volatile(
+"\t" PPC_RELEASE_BARRIER " \n"
+"1: lwarx %0,0,%2 # publish_tail_cpu \n"
+" andc %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+ : "=&r" (prev), "=&r"(tmp)
+ : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # set_mustq \n"
+" or %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # clear_mustq \n"
+" andc %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+ u32 prev;
+ u32 new = old | _Q_SLEEPY_VAL;
+
+ BUG_ON(!(old & _Q_LOCKED_VAL));
+ BUG_ON(old & _Q_SLEEPY_VAL);
+
+ asm volatile(
+"1: lwarx %0,0,%1 # try_set_sleepy \n"
+" cmpw 0,%0,%2 \n"
+" bne- 2f \n"
+" stwcx. %3,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r"(old), "r" (new)
+ : "cr0", "memory");
+
+ return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+ if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (val & _Q_LOCKED_VAL) {
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+ }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
+{
+ int cpu = decode_tail_cpu(val);
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+ int idx;
+
+ /*
+ * After publishing the new tail and finding a previous tail in the
+ * previous val (which is the control dependency), this barrier
+ * orders the release barrier in publish_tail_cpu performed by the
+ * last CPU, with subsequently looking at its qnode structures
+ * after the barrier.
+ */
+ smp_acquire__after_ctrl_dep();
+
+ for (idx = 0; idx < MAX_NODES; idx++) {
+ struct qnode *qnode = &qnodesp->nodes[idx];
+ if (qnode->lock == lock)
+ return qnode;
+ }
+
+ BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+ int owner;
+ u32 yield_count;
+ bool preempted = false;
+
+ BUG_ON(!(val & _Q_LOCKED_VAL));
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_owner)
+ goto relax;
+
+ owner = get_owner_cpu(val);
+ yield_count = yield_count_of(owner);
+
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ seen_sleepy_owner(lock, val);
+ preempted = true;
+
+ /*
+ * Read the lock word after sampling the yield count. On the other side
+ * there may a wmb because the yield count update is done by the
+ * hypervisor preemption and the value update by the OS, however this
+ * ordering might reduce the chance of out of order accesses and
+ * improve the heuristic.
+ */
+ smp_rmb();
+
+ if (READ_ONCE(lock->val) == val) {
+ if (mustq)
+ clear_mustq(lock);
+ yield_to_preempted(owner, yield_count);
+ if (mustq)
+ set_mustq(lock);
+ spin_begin();
+
+ /* Don't relax if we yielded. Maybe we should? */
+ return preempted;
+ }
+ spin_begin();
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ bool mustq = false;
+
+ if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+ mustq = true;
+
+ return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_propagate_owner)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (*set_yield_cpu == owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (vcpu_is_preempted(owner)) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ } else if (*set_yield_cpu != -1) {
+ next->yield_cpu = owner;
+ *set_yield_cpu = owner;
+ }
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+{
+ int prev_cpu = decode_tail_cpu(val);
+ u32 yield_count;
+ int yield_cpu;
+ bool preempted = false;
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_propagate_owner)
+ goto yield_prev;
+
+ yield_cpu = READ_ONCE(node->yield_cpu);
+ if (yield_cpu == -1) {
+ /* Propagate back the -1 CPU */
+ if (node->next && node->next->yield_cpu != -1)
+ node->next->yield_cpu = yield_cpu;
+ goto yield_prev;
+ }
+
+ yield_count = yield_count_of(yield_cpu);
+ if ((yield_count & 1) == 0)
+ goto yield_prev; /* owner vcpu is running */
+
+ if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
+ goto yield_prev; /* re-sample lock owner */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb();
+
+ if (yield_cpu == node->yield_cpu) {
+ if (node->next && node->next->yield_cpu != yield_cpu)
+ node->next->yield_cpu = yield_cpu;
+ yield_to_preempted(yield_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+yield_prev:
+ if (!pv_yield_prev)
+ goto relax;
+
+ yield_count = yield_count_of(prev_cpu);
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node(lock, val);
+
+ smp_rmb(); /* See __yield_to_locked_owner comment */
+
+ if (!READ_ONCE(node->locked)) {
+ yield_to_preempted(prev_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+ if (iters >= get_steal_spins(paravirt, sleepy))
+ return true;
+
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+ int cpu = get_owner_cpu(val);
+ if (numa_node_id() != cpu_to_node(cpu))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+ bool seen_preempted = false;
+ bool sleepy = false;
+ int iters = 0;
+ u32 val;
+
+ if (!steal_spins) {
+ /* XXX: should spin_on_preempted_owner do anything here? */
+ return false;
+ }
+
+ /* Attempt to steal the lock */
+ spin_begin();
+ do {
+ bool preempted = false;
+
+ val = READ_ONCE(lock->val);
+ if (val & _Q_MUST_Q_VAL)
+ break;
+ spec_barrier();
+
+ if (unlikely(!(val & _Q_LOCKED_VAL))) {
+ spin_end();
+ if (__queued_spin_trylock_steal(lock))
+ return true;
+ spin_begin();
+ } else {
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ }
+
+ if (paravirt && pv_sleepy_lock) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ if (preempted) {
+ seen_preempted = true;
+ sleepy = true;
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ /*
+ * pv_spin_on_preempted_owner don't increase iters
+ * while the owner is preempted -- we won't interfere
+ * with it by definition. This could introduce some
+ * latency issue if we continually observe preempted
+ * owners, but hopefully that's a rare corner case of
+ * a badly oversubscribed system.
+ */
+ } else {
+ iters++;
+ }
+ } while (!steal_break(val, iters, paravirt, sleepy));
+
+ spin_end();
+
+ return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+ struct qnodes *qnodesp;
+ struct qnode *next, *node;
+ u32 val, old, tail;
+ bool seen_preempted = false;
+ bool sleepy = false;
+ bool mustq = false;
+ int idx;
+ int set_yield_cpu = -1;
+ int iters = 0;
+
+ BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+ qnodesp = this_cpu_ptr(&qnodes);
+ if (unlikely(qnodesp->count >= MAX_NODES)) {
+ spec_barrier();
+ while (!queued_spin_trylock(lock))
+ cpu_relax();
+ return;
+ }
+
+ idx = qnodesp->count++;
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+ node = &qnodesp->nodes[idx];
+ node->next = NULL;
+ node->lock = lock;
+ node->cpu = smp_processor_id();
+ node->yield_cpu = -1;
+ node->locked = 0;
+
+ tail = encode_tail_cpu(node->cpu);
+
+ /*
+ * Assign all attributes of a node before it can be published.
+ * Issues an lwsync, serving as a release barrier, as well as a
+ * compiler barrier.
+ */
+ old = publish_tail_cpu(lock, tail);
+
+ /*
+ * If there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_CPU_MASK) {
+ struct qnode *prev = get_tail_qnode(lock, old);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ /* Wait for mcs node lock to be released */
+ spin_begin();
+ while (!READ_ONCE(node->locked)) {
+ spec_barrier();
+
+ if (yield_to_prev(lock, node, old, paravirt))
+ seen_preempted = true;
+ }
+ spec_barrier();
+ spin_end();
+
+ /* Clear out stale propagated yield_cpu */
+ if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+ node->yield_cpu = -1;
+
+ smp_rmb(); /* acquire barrier for the mcs lock */
+
+ /*
+ * Generic qspinlocks have this prefetch here, but it seems
+ * like it could cause additional line transitions because
+ * the waiter will keep loading from it.
+ */
+ if (_Q_SPIN_PREFETCH_NEXT) {
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
+ }
+ }
+
+ /* We're at the head of the waitqueue, wait for the lock. */
+again:
+ spin_begin();
+ for (;;) {
+ bool preempted;
+
+ val = READ_ONCE(lock->val);
+ if (!(val & _Q_LOCKED_VAL))
+ break;
+ spec_barrier();
+
+ if (paravirt && pv_sleepy_lock && maybe_stealers) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ preempted = yield_head_to_locked_owner(lock, val, paravirt);
+ if (!maybe_stealers)
+ continue;
+
+ if (preempted)
+ seen_preempted = true;
+
+ if (paravirt && preempted) {
+ sleepy = true;
+
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ } else {
+ iters++;
+ }
+
+ if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+ mustq = true;
+ set_mustq(lock);
+ val |= _Q_MUST_Q_VAL;
+ }
+ }
+ spec_barrier();
+ spin_end();
+
+ /* If we're the last queued, must clean up the tail. */
+ old = trylock_clean_tail(lock, tail);
+ if (unlikely(old & _Q_LOCKED_VAL)) {
+ BUG_ON(!maybe_stealers);
+ goto again; /* Can only be true if maybe_stealers. */
+ }
+
+ if ((old & _Q_TAIL_CPU_MASK) == tail)
+ goto release; /* We were the tail, no next. */
+
+ /* There is a next, must wait for node->next != NULL (MCS protocol) */
+ next = READ_ONCE(node->next);
+ if (!next) {
+ spin_begin();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ spin_end();
+ }
+ spec_barrier();
+
+ /*
+ * Unlock the next mcs waiter node. Release barrier is not required
+ * here because the acquirer is only accessing the lock word, and
+ * the acquire barrier we took the lock with orders that update vs
+ * this store to locked. The corresponding barrier is the smp_rmb()
+ * acquire barrier for mcs lock, above.
+ */
+ if (paravirt && pv_prod_head) {
+ int next_cpu = next->cpu;
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ if (vcpu_is_preempted(next_cpu))
+ prod_cpu(next_cpu);
+ } else {
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ }
+
+release:
+ qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+ /*
+ * This looks funny, but it induces the compiler to inline both
+ * sides of the branch rather than share code as when the condition
+ * is passed as the paravirt argument to the functions.
+ */
+ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+ if (try_to_steal_lock(lock, true)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, true);
+ } else {
+ if (try_to_steal_lock(lock, false)) {
+ spec_barrier();
+ return;
+ }
+ queued_spin_lock_mcs_queue(lock, false);
+ }
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
+ static DEFINE_MUTEX(lock);
+
+ /*
+ * The lock slow path has a !maybe_stealers case that can assume
+ * the head of queue will not see concurrent waiters. That waiter
+ * is unsafe in the presence of stealers, so must keep them away
+ * from one another.
+ */
+
+ mutex_lock(&lock);
+ if (val && !steal_spins) {
+ maybe_stealers = true;
+ /* wait for queue head waiter to go away */
+ synchronize_rcu();
+ steal_spins = val;
+ } else if (!val && steal_spins) {
+ steal_spins = val;
+ /* wait for all possible stealers to go away */
+ synchronize_rcu();
+ maybe_stealers = false;
+ } else {
+ steal_spins = val;
+ }
+ mutex_unlock(&lock);
+#endif
+
+ return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+ *val = steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+ remote_steal_spins = val;
+
+ return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+ *val = remote_steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+ head_spins = val;
+
+ return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+ *val = head_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+ pv_yield_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+ pv_yield_allow_steal = !!val;
+
+ return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+ *val = pv_yield_allow_steal;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+ pv_spin_on_preempted_owner = !!val;
+
+ return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+ *val = pv_spin_on_preempted_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+ pv_sleepy_lock = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+ pv_sleepy_lock_sticky = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_sticky;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+ pv_sleepy_lock_interval_ns = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_interval_ns;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+ pv_sleepy_lock_factor = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_factor;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+ pv_yield_prev = !!val;
+
+ return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+ *val = pv_yield_prev;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+ pv_yield_propagate_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_propagate_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+ pv_prod_head = !!val;
+
+ return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+ *val = pv_prod_head;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+ debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+ debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+ debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+ if (is_shared_processor()) {
+ debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+ debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+ debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+ debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+ debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+ debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+ debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+ debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+ debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+ }
+
+ return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
new file mode 100644
index 000000000..da71760e5
--- /dev/null
+++ b/arch/powerpc/lib/quad.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Quadword loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+/* do_lq(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lq)
+1: lq r6, 0(r3)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
+_GLOBAL(do_stq)
+1: stq r4, 0(r3)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_lqarx(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lqarx)
+1: PPC_LQARX(6, 0, 3, 0)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp) */
+
+_GLOBAL(do_stqcx)
+1: PPC_STQCX(4, 0, 3)
+ mfcr r5
+ stw r5, 0(r6)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644
index 000000000..bccb662c1
--- /dev/null
+++ b/arch/powerpc/lib/restart_table.c
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct restart_table_entry {
+ unsigned long start;
+ unsigned long end;
+ unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+ struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+ while (smte < __stop___soft_mask_table) {
+ unsigned long start = smte->start;
+ unsigned long end = smte->end;
+
+ if (addr >= start && addr < end)
+ return true;
+
+ smte++;
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ struct restart_table_entry *rte = __start___restart_table;
+
+ while (rte < __stop___restart_table) {
+ unsigned long start = rte->start;
+ unsigned long end = rte->end;
+ unsigned long fixup = rte->fixup;
+
+ if (addr >= start && addr < end)
+ return fixup;
+
+ rte++;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
new file mode 100644
index 000000000..6aa774aa5
--- /dev/null
+++ b/arch/powerpc/lib/rheap.c
@@ -0,0 +1,747 @@
+/*
+ * A Remote Heap. Remote means that we don't touch the memory that the
+ * heap points to. Normal heap implementations use the memory they manage
+ * to place their list. We cannot do that because the memory we manage may
+ * have special properties, for example it is uncachable or of different
+ * endianess.
+ *
+ * Author: Pantelis Antoniou <panto@intracom.gr>
+ *
+ * 2004 (c) INTRACOM S.A. Greece. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <asm/rheap.h>
+
+/*
+ * Fixup a list_head, needed when copying lists. If the pointers fall
+ * between s and e, apply the delta. This assumes that
+ * sizeof(struct list_head *) == sizeof(unsigned long *).
+ */
+static inline void fixup(unsigned long s, unsigned long e, int d,
+ struct list_head *l)
+{
+ unsigned long *pp;
+
+ pp = (unsigned long *)&l->next;
+ if (*pp >= s && *pp < e)
+ *pp += d;
+
+ pp = (unsigned long *)&l->prev;
+ if (*pp >= s && *pp < e)
+ *pp += d;
+}
+
+/* Grow the allocated blocks */
+static int grow(rh_info_t * info, int max_blocks)
+{
+ rh_block_t *block, *blk;
+ int i, new_blocks;
+ int delta;
+ unsigned long blks, blke;
+
+ if (max_blocks <= info->max_blocks)
+ return -EINVAL;
+
+ new_blocks = max_blocks - info->max_blocks;
+
+ block = kmalloc_array(max_blocks, sizeof(rh_block_t), GFP_ATOMIC);
+ if (block == NULL)
+ return -ENOMEM;
+
+ if (info->max_blocks > 0) {
+
+ /* copy old block area */
+ memcpy(block, info->block,
+ sizeof(rh_block_t) * info->max_blocks);
+
+ delta = (char *)block - (char *)info->block;
+
+ /* and fixup list pointers */
+ blks = (unsigned long)info->block;
+ blke = (unsigned long)(info->block + info->max_blocks);
+
+ for (i = 0, blk = block; i < info->max_blocks; i++, blk++)
+ fixup(blks, blke, delta, &blk->list);
+
+ fixup(blks, blke, delta, &info->empty_list);
+ fixup(blks, blke, delta, &info->free_list);
+ fixup(blks, blke, delta, &info->taken_list);
+
+ /* free the old allocated memory */
+ if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+ kfree(info->block);
+ }
+
+ info->block = block;
+ info->empty_slots += new_blocks;
+ info->max_blocks = max_blocks;
+ info->flags &= ~RHIF_STATIC_BLOCK;
+
+ /* add all new blocks to the free list */
+ blk = block + info->max_blocks - new_blocks;
+ for (i = 0; i < new_blocks; i++, blk++)
+ list_add(&blk->list, &info->empty_list);
+
+ return 0;
+}
+
+/*
+ * Assure at least the required amount of empty slots. If this function
+ * causes a grow in the block area then all pointers kept to the block
+ * area are invalid!
+ */
+static int assure_empty(rh_info_t * info, int slots)
+{
+ int max_blocks;
+
+ /* This function is not meant to be used to grow uncontrollably */
+ if (slots >= 4)
+ return -EINVAL;
+
+ /* Enough space */
+ if (info->empty_slots >= slots)
+ return 0;
+
+ /* Next 16 sized block */
+ max_blocks = ((info->max_blocks + slots) + 15) & ~15;
+
+ return grow(info, max_blocks);
+}
+
+static rh_block_t *get_slot(rh_info_t * info)
+{
+ rh_block_t *blk;
+
+ /* If no more free slots, and failure to extend. */
+ /* XXX: You should have called assure_empty before */
+ if (info->empty_slots == 0) {
+ printk(KERN_ERR "rh: out of slots; crash is imminent.\n");
+ return NULL;
+ }
+
+ /* Get empty slot to use */
+ blk = list_entry(info->empty_list.next, rh_block_t, list);
+ list_del_init(&blk->list);
+ info->empty_slots--;
+
+ /* Initialize */
+ blk->start = 0;
+ blk->size = 0;
+ blk->owner = NULL;
+
+ return blk;
+}
+
+static inline void release_slot(rh_info_t * info, rh_block_t * blk)
+{
+ list_add(&blk->list, &info->empty_list);
+ info->empty_slots++;
+}
+
+static void attach_free_block(rh_info_t * info, rh_block_t * blkn)
+{
+ rh_block_t *blk;
+ rh_block_t *before;
+ rh_block_t *after;
+ rh_block_t *next;
+ int size;
+ unsigned long s, e, bs, be;
+ struct list_head *l;
+
+ /* We assume that they are aligned properly */
+ size = blkn->size;
+ s = blkn->start;
+ e = s + size;
+
+ /* Find the blocks immediately before and after the given one
+ * (if any) */
+ before = NULL;
+ after = NULL;
+ next = NULL;
+
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+
+ bs = blk->start;
+ be = bs + blk->size;
+
+ if (next == NULL && s >= bs)
+ next = blk;
+
+ if (be == s)
+ before = blk;
+
+ if (e == bs)
+ after = blk;
+
+ /* If both are not null, break now */
+ if (before != NULL && after != NULL)
+ break;
+ }
+
+ /* Now check if they are really adjacent */
+ if (before && s != (before->start + before->size))
+ before = NULL;
+
+ if (after && e != after->start)
+ after = NULL;
+
+ /* No coalescing; list insert and return */
+ if (before == NULL && after == NULL) {
+
+ if (next != NULL)
+ list_add(&blkn->list, &next->list);
+ else
+ list_add(&blkn->list, &info->free_list);
+
+ return;
+ }
+
+ /* We don't need it anymore */
+ release_slot(info, blkn);
+
+ /* Grow the before block */
+ if (before != NULL && after == NULL) {
+ before->size += size;
+ return;
+ }
+
+ /* Grow the after block backwards */
+ if (before == NULL && after != NULL) {
+ after->start -= size;
+ after->size += size;
+ return;
+ }
+
+ /* Grow the before block, and release the after block */
+ before->size += size + after->size;
+ list_del(&after->list);
+ release_slot(info, after);
+}
+
+static void attach_taken_block(rh_info_t * info, rh_block_t * blkn)
+{
+ rh_block_t *blk;
+ struct list_head *l;
+
+ /* Find the block immediately before the given one (if any) */
+ list_for_each(l, &info->taken_list) {
+ blk = list_entry(l, rh_block_t, list);
+ if (blk->start > blkn->start) {
+ list_add_tail(&blkn->list, &blk->list);
+ return;
+ }
+ }
+
+ list_add_tail(&blkn->list, &info->taken_list);
+}
+
+/*
+ * Create a remote heap dynamically. Note that no memory for the blocks
+ * are allocated. It will upon the first allocation
+ */
+rh_info_t *rh_create(unsigned int alignment)
+{
+ rh_info_t *info;
+
+ /* Alignment must be a power of two */
+ if ((alignment & (alignment - 1)) != 0)
+ return ERR_PTR(-EINVAL);
+
+ info = kmalloc(sizeof(*info), GFP_ATOMIC);
+ if (info == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ info->alignment = alignment;
+
+ /* Initially everything as empty */
+ info->block = NULL;
+ info->max_blocks = 0;
+ info->empty_slots = 0;
+ info->flags = 0;
+
+ INIT_LIST_HEAD(&info->empty_list);
+ INIT_LIST_HEAD(&info->free_list);
+ INIT_LIST_HEAD(&info->taken_list);
+
+ return info;
+}
+EXPORT_SYMBOL_GPL(rh_create);
+
+/*
+ * Destroy a dynamically created remote heap. Deallocate only if the areas
+ * are not static
+ */
+void rh_destroy(rh_info_t * info)
+{
+ if ((info->flags & RHIF_STATIC_BLOCK) == 0)
+ kfree(info->block);
+
+ if ((info->flags & RHIF_STATIC_INFO) == 0)
+ kfree(info);
+}
+EXPORT_SYMBOL_GPL(rh_destroy);
+
+/*
+ * Initialize in place a remote heap info block. This is needed to support
+ * operation very early in the startup of the kernel, when it is not yet safe
+ * to call kmalloc.
+ */
+void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
+ rh_block_t * block)
+{
+ int i;
+ rh_block_t *blk;
+
+ /* Alignment must be a power of two */
+ if ((alignment & (alignment - 1)) != 0)
+ return;
+
+ info->alignment = alignment;
+
+ /* Initially everything as empty */
+ info->block = block;
+ info->max_blocks = max_blocks;
+ info->empty_slots = max_blocks;
+ info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK;
+
+ INIT_LIST_HEAD(&info->empty_list);
+ INIT_LIST_HEAD(&info->free_list);
+ INIT_LIST_HEAD(&info->taken_list);
+
+ /* Add all new blocks to the free list */
+ for (i = 0, blk = block; i < max_blocks; i++, blk++)
+ list_add(&blk->list, &info->empty_list);
+}
+EXPORT_SYMBOL_GPL(rh_init);
+
+/* Attach a free memory region, coalesces regions if adjacent */
+int rh_attach_region(rh_info_t * info, unsigned long start, int size)
+{
+ rh_block_t *blk;
+ unsigned long s, e, m;
+ int r;
+
+ /* The region must be aligned */
+ s = start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ if (IS_ERR_VALUE(e) || (e < s))
+ return -ERANGE;
+
+ /* Take final values */
+ start = s;
+ size = e - s;
+
+ /* Grow the blocks, if needed */
+ r = assure_empty(info, 1);
+ if (r < 0)
+ return r;
+
+ blk = get_slot(info);
+ blk->start = start;
+ blk->size = size;
+ blk->owner = NULL;
+
+ attach_free_block(info, blk);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rh_attach_region);
+
+/* Detatch given address range, splits free block if needed. */
+unsigned long rh_detach_region(rh_info_t * info, unsigned long start, int size)
+{
+ struct list_head *l;
+ rh_block_t *blk, *newblk;
+ unsigned long s, e, m, bs, be;
+
+ /* Validate size */
+ if (size <= 0)
+ return (unsigned long) -EINVAL;
+
+ /* The region must be aligned */
+ s = start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ if (assure_empty(info, 1) < 0)
+ return (unsigned long) -ENOMEM;
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ /* The range must lie entirely inside one free block */
+ bs = blk->start;
+ be = blk->start + blk->size;
+ if (s >= bs && e <= be)
+ break;
+ blk = NULL;
+ }
+
+ if (blk == NULL)
+ return (unsigned long) -ENOMEM;
+
+ /* Perfect fit */
+ if (bs == s && be == e) {
+ /* Delete from free list, release slot */
+ list_del(&blk->list);
+ release_slot(info, blk);
+ return s;
+ }
+
+ /* blk still in free list, with updated start and/or size */
+ if (bs == s || be == e) {
+ if (bs == s)
+ blk->start += size;
+ blk->size -= size;
+
+ } else {
+ /* The front free fragment */
+ blk->size = s - bs;
+
+ /* the back free fragment */
+ newblk = get_slot(info);
+ newblk->start = e;
+ newblk->size = be - e;
+
+ list_add(&newblk->list, &blk->list);
+ }
+
+ return s;
+}
+EXPORT_SYMBOL_GPL(rh_detach_region);
+
+/* Allocate a block of memory at the specified alignment. The value returned
+ * is an offset into the buffer initialized by rh_init(), or a negative number
+ * if there is an error.
+ */
+unsigned long rh_alloc_align(rh_info_t * info, int size, int alignment, const char *owner)
+{
+ struct list_head *l;
+ rh_block_t *blk;
+ rh_block_t *newblk;
+ unsigned long start, sp_size;
+
+ /* Validate size, and alignment must be power of two */
+ if (size <= 0 || (alignment & (alignment - 1)) != 0)
+ return (unsigned long) -EINVAL;
+
+ /* Align to configured alignment */
+ size = (size + (info->alignment - 1)) & ~(info->alignment - 1);
+
+ if (assure_empty(info, 2) < 0)
+ return (unsigned long) -ENOMEM;
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ if (size <= blk->size) {
+ start = (blk->start + alignment - 1) & ~(alignment - 1);
+ if (start + size <= blk->start + blk->size)
+ break;
+ }
+ blk = NULL;
+ }
+
+ if (blk == NULL)
+ return (unsigned long) -ENOMEM;
+
+ /* Just fits */
+ if (blk->size == size) {
+ /* Move from free list to taken list */
+ list_del(&blk->list);
+ newblk = blk;
+ } else {
+ /* Fragment caused, split if needed */
+ /* Create block for fragment in the beginning */
+ sp_size = start - blk->start;
+ if (sp_size) {
+ rh_block_t *spblk;
+
+ spblk = get_slot(info);
+ spblk->start = blk->start;
+ spblk->size = sp_size;
+ /* add before the blk */
+ list_add(&spblk->list, blk->list.prev);
+ }
+ newblk = get_slot(info);
+ newblk->start = start;
+ newblk->size = size;
+
+ /* blk still in free list, with updated start and size
+ * for fragment in the end */
+ blk->start = start + size;
+ blk->size -= sp_size + size;
+ /* No fragment in the end, remove blk */
+ if (blk->size == 0) {
+ list_del(&blk->list);
+ release_slot(info, blk);
+ }
+ }
+
+ newblk->owner = owner;
+ attach_taken_block(info, newblk);
+
+ return start;
+}
+EXPORT_SYMBOL_GPL(rh_alloc_align);
+
+/* Allocate a block of memory at the default alignment. The value returned is
+ * an offset into the buffer initialized by rh_init(), or a negative number if
+ * there is an error.
+ */
+unsigned long rh_alloc(rh_info_t * info, int size, const char *owner)
+{
+ return rh_alloc_align(info, size, info->alignment, owner);
+}
+EXPORT_SYMBOL_GPL(rh_alloc);
+
+/* Allocate a block of memory at the given offset, rounded up to the default
+ * alignment. The value returned is an offset into the buffer initialized by
+ * rh_init(), or a negative number if there is an error.
+ */
+unsigned long rh_alloc_fixed(rh_info_t * info, unsigned long start, int size, const char *owner)
+{
+ struct list_head *l;
+ rh_block_t *blk, *newblk1, *newblk2;
+ unsigned long s, e, m, bs = 0, be = 0;
+
+ /* Validate size */
+ if (size <= 0)
+ return (unsigned long) -EINVAL;
+
+ /* The region must be aligned */
+ s = start;
+ e = s + size;
+ m = info->alignment - 1;
+
+ /* Round start up */
+ s = (s + m) & ~m;
+
+ /* Round end down */
+ e = e & ~m;
+
+ if (assure_empty(info, 2) < 0)
+ return (unsigned long) -ENOMEM;
+
+ blk = NULL;
+ list_for_each(l, &info->free_list) {
+ blk = list_entry(l, rh_block_t, list);
+ /* The range must lie entirely inside one free block */
+ bs = blk->start;
+ be = blk->start + blk->size;
+ if (s >= bs && e <= be)
+ break;
+ blk = NULL;
+ }
+
+ if (blk == NULL)
+ return (unsigned long) -ENOMEM;
+
+ /* Perfect fit */
+ if (bs == s && be == e) {
+ /* Move from free list to taken list */
+ list_del(&blk->list);
+ blk->owner = owner;
+
+ start = blk->start;
+ attach_taken_block(info, blk);
+
+ return start;
+
+ }
+
+ /* blk still in free list, with updated start and/or size */
+ if (bs == s || be == e) {
+ if (bs == s)
+ blk->start += size;
+ blk->size -= size;
+
+ } else {
+ /* The front free fragment */
+ blk->size = s - bs;
+
+ /* The back free fragment */
+ newblk2 = get_slot(info);
+ newblk2->start = e;
+ newblk2->size = be - e;
+
+ list_add(&newblk2->list, &blk->list);
+ }
+
+ newblk1 = get_slot(info);
+ newblk1->start = s;
+ newblk1->size = e - s;
+ newblk1->owner = owner;
+
+ start = newblk1->start;
+ attach_taken_block(info, newblk1);
+
+ return start;
+}
+EXPORT_SYMBOL_GPL(rh_alloc_fixed);
+
+/* Deallocate the memory previously allocated by one of the rh_alloc functions.
+ * The return value is the size of the deallocated block, or a negative number
+ * if there is an error.
+ */
+int rh_free(rh_info_t * info, unsigned long start)
+{
+ rh_block_t *blk, *blk2;
+ struct list_head *l;
+ int size;
+
+ /* Linear search for block */
+ blk = NULL;
+ list_for_each(l, &info->taken_list) {
+ blk2 = list_entry(l, rh_block_t, list);
+ if (start < blk2->start)
+ break;
+ blk = blk2;
+ }
+
+ if (blk == NULL || start > (blk->start + blk->size))
+ return -EINVAL;
+
+ /* Remove from taken list */
+ list_del(&blk->list);
+
+ /* Get size of freed block */
+ size = blk->size;
+ attach_free_block(info, blk);
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(rh_free);
+
+int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats)
+{
+ rh_block_t *blk;
+ struct list_head *l;
+ struct list_head *h;
+ int nr;
+
+ switch (what) {
+
+ case RHGS_FREE:
+ h = &info->free_list;
+ break;
+
+ case RHGS_TAKEN:
+ h = &info->taken_list;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ /* Linear search for block */
+ nr = 0;
+ list_for_each(l, h) {
+ blk = list_entry(l, rh_block_t, list);
+ if (stats != NULL && nr < max_stats) {
+ stats->start = blk->start;
+ stats->size = blk->size;
+ stats->owner = blk->owner;
+ stats++;
+ }
+ nr++;
+ }
+
+ return nr;
+}
+EXPORT_SYMBOL_GPL(rh_get_stats);
+
+int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner)
+{
+ rh_block_t *blk, *blk2;
+ struct list_head *l;
+ int size;
+
+ /* Linear search for block */
+ blk = NULL;
+ list_for_each(l, &info->taken_list) {
+ blk2 = list_entry(l, rh_block_t, list);
+ if (start < blk2->start)
+ break;
+ blk = blk2;
+ }
+
+ if (blk == NULL || start > (blk->start + blk->size))
+ return -EINVAL;
+
+ blk->owner = owner;
+ size = blk->size;
+
+ return size;
+}
+EXPORT_SYMBOL_GPL(rh_set_owner);
+
+void rh_dump(rh_info_t * info)
+{
+ static rh_stats_t st[32]; /* XXX maximum 32 blocks */
+ int maxnr;
+ int i, nr;
+
+ maxnr = ARRAY_SIZE(st);
+
+ printk(KERN_INFO
+ "info @0x%p (%d slots empty / %d max)\n",
+ info, info->empty_slots, info->max_blocks);
+
+ printk(KERN_INFO " Free:\n");
+ nr = rh_get_stats(info, RHGS_FREE, maxnr, st);
+ if (nr > maxnr)
+ nr = maxnr;
+ for (i = 0; i < nr; i++)
+ printk(KERN_INFO
+ " 0x%lx-0x%lx (%u)\n",
+ st[i].start, st[i].start + st[i].size,
+ st[i].size);
+ printk(KERN_INFO "\n");
+
+ printk(KERN_INFO " Taken:\n");
+ nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st);
+ if (nr > maxnr)
+ nr = maxnr;
+ for (i = 0; i < nr; i++)
+ printk(KERN_INFO
+ " 0x%lx-0x%lx (%u) %s\n",
+ st[i].start, st[i].start + st[i].size,
+ st[i].size, st[i].owner != NULL ? st[i].owner : "");
+ printk(KERN_INFO "\n");
+}
+EXPORT_SYMBOL_GPL(rh_dump);
+
+void rh_dump_blk(rh_info_t * info, rh_block_t * blk)
+{
+ printk(KERN_INFO
+ "blk @0x%p: 0x%lx-0x%lx (%u)\n",
+ blk, blk->start, blk->start + blk->size, blk->size);
+}
+EXPORT_SYMBOL_GPL(rh_dump_blk);
+
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
new file mode 100644
index 000000000..a4ab86250
--- /dev/null
+++ b/arch/powerpc/lib/sstep.c
@@ -0,0 +1,3666 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Single-step support.
+ *
+ * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ */
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/prefetch.h>
+#include <asm/sstep.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/cputable.h>
+#include <asm/disassemble.h>
+
+#ifdef CONFIG_PPC64
+/* Bits in SRR1 that are copied from MSR */
+#define MSR_MASK 0xffffffff87c0ffffUL
+#else
+#define MSR_MASK 0x87c0ffff
+#endif
+
+/* Bits in XER */
+#define XER_SO 0x80000000U
+#define XER_OV 0x40000000U
+#define XER_CA 0x20000000U
+#define XER_OV32 0x00080000U
+#define XER_CA32 0x00040000U
+
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * Functions in ldstfp.S
+ */
+extern void get_fpr(int rn, double *p);
+extern void put_fpr(int rn, const double *p);
+extern void get_vr(int rn, __vector128 *p);
+extern void put_vr(int rn, __vector128 *p);
+extern void load_vsrn(int vsr, const void *p);
+extern void store_vsrn(int vsr, void *p);
+extern void conv_sp_to_dp(const float *sp, double *dp);
+extern void conv_dp_to_sp(const double *dp, float *sp);
+#endif
+
+#ifdef __powerpc64__
+/*
+ * Functions in quad.S
+ */
+extern int do_lq(unsigned long ea, unsigned long *regs);
+extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1);
+extern int do_lqarx(unsigned long ea, unsigned long *regs);
+extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp);
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define IS_LE 1
+#define IS_BE 0
+#else
+#define IS_LE 0
+#define IS_BE 1
+#endif
+
+/*
+ * Emulate the truncation of 64 bit values in 32-bit mode.
+ */
+static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
+ unsigned long val)
+{
+ if ((msr & MSR_64BIT) == 0)
+ val &= 0xffffffffUL;
+ return val;
+}
+
+/*
+ * Determine whether a conditional branch instruction would branch.
+ */
+static nokprobe_inline int branch_taken(unsigned int instr,
+ const struct pt_regs *regs,
+ struct instruction_op *op)
+{
+ unsigned int bo = (instr >> 21) & 0x1f;
+ unsigned int bi;
+
+ if ((bo & 4) == 0) {
+ /* decrement counter */
+ op->type |= DECCTR;
+ if (((bo >> 1) & 1) ^ (regs->ctr == 1))
+ return 0;
+ }
+ if ((bo & 0x10) == 0) {
+ /* check bit from CR */
+ bi = (instr >> 16) & 0x1f;
+ if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1))
+ return 0;
+ }
+ return 1;
+}
+
+static nokprobe_inline long address_ok(struct pt_regs *regs,
+ unsigned long ea, int nb)
+{
+ if (!user_mode(regs))
+ return 1;
+ if (access_ok((void __user *)ea, nb))
+ return 1;
+ if (access_ok((void __user *)ea, 1))
+ /* Access overlaps the end of the user region */
+ regs->dar = TASK_SIZE_MAX - 1;
+ else
+ regs->dar = ea;
+ return 0;
+}
+
+/*
+ * Calculate effective address for a D-form instruction
+ */
+static nokprobe_inline unsigned long dform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) instr; /* sign-extend */
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
+}
+
+#ifdef __powerpc64__
+/*
+ * Calculate effective address for a DS-form instruction
+ */
+static nokprobe_inline unsigned long dsform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) (instr & ~3); /* sign-extend */
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
+}
+
+/*
+ * Calculate effective address for a DQ-form instruction
+ */
+static nokprobe_inline unsigned long dqform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) (instr & ~0xf); /* sign-extend */
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
+}
+#endif /* __powerpc64 */
+
+/*
+ * Calculate effective address for an X-form instruction
+ */
+static nokprobe_inline unsigned long xform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra, rb;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ ea = regs->gpr[rb];
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
+}
+
+/*
+ * Calculate effective address for a MLS:D-form / 8LS:D-form
+ * prefixed instruction
+ */
+static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
+ unsigned int suffix,
+ const struct pt_regs *regs)
+{
+ int ra, prefix_r;
+ unsigned int dd;
+ unsigned long ea, d0, d1, d;
+
+ prefix_r = GET_PREFIX_R(instr);
+ ra = GET_PREFIX_RA(suffix);
+
+ d0 = instr & 0x3ffff;
+ d1 = suffix & 0xffff;
+ d = (d0 << 16) | d1;
+
+ /*
+ * sign extend a 34 bit number
+ */
+ dd = (unsigned int)(d >> 2);
+ ea = (signed int)dd;
+ ea = (ea << 2) | (d & 0x3);
+
+ if (!prefix_r && ra)
+ ea += regs->gpr[ra];
+ else if (!prefix_r && !ra)
+ ; /* Leave ea as is */
+ else if (prefix_r)
+ ea += regs->nip;
+
+ /*
+ * (prefix_r && ra) is an invalid form. Should already be
+ * checked for by caller!
+ */
+
+ return ea;
+}
+
+/*
+ * Return the largest power of 2, not greater than sizeof(unsigned long),
+ * such that x is a multiple of it.
+ */
+static nokprobe_inline unsigned long max_align(unsigned long x)
+{
+ x |= sizeof(unsigned long);
+ return x & -x; /* isolates rightmost bit */
+}
+
+static nokprobe_inline unsigned long byterev_2(unsigned long x)
+{
+ return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
+}
+
+static nokprobe_inline unsigned long byterev_4(unsigned long x)
+{
+ return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) |
+ ((x & 0xff00) << 8) | ((x & 0xff) << 24);
+}
+
+#ifdef __powerpc64__
+static nokprobe_inline unsigned long byterev_8(unsigned long x)
+{
+ return (byterev_4(x) << 32) | byterev_4(x >> 32);
+}
+#endif
+
+static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
+{
+ switch (nb) {
+ case 2:
+ *(u16 *)ptr = byterev_2(*(u16 *)ptr);
+ break;
+ case 4:
+ *(u32 *)ptr = byterev_4(*(u32 *)ptr);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr);
+ break;
+ case 16: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+ case 32: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[3]);
+ up[3] = tmp;
+ tmp = byterev_8(up[2]);
+ up[2] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static __always_inline int
+__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ unsigned long x = 0;
+
+ switch (nb) {
+ case 1:
+ unsafe_get_user(x, (unsigned char __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_get_user(x, (unsigned short __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_get_user(x, (unsigned int __user *)ea, Efault);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ unsafe_get_user(x, (unsigned long __user *)ea, Efault);
+ break;
+#endif
+ }
+ *dest = x;
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int
+read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __read_mem_aligned(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __read_mem_aligned(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+/*
+ * Copy from userspace to a buffer, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int c;
+
+ for (; nb > 0; nb -= c) {
+ c = max_align(ea);
+ if (c > nb)
+ c = max_align(nb);
+ switch (c) {
+ case 1:
+ unsafe_get_user(*dest, (u8 __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+ break;
+#endif
+ }
+ dest += c;
+ ea += c;
+ }
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_in(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_in(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+ int err;
+
+ u.ul = 0;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ err = copy_mem_in(&u.b[i], ea, nb, regs);
+ if (!err)
+ *dest = u.ul;
+ return err;
+}
+
+/*
+ * Read memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
+ * If nb < sizeof(long), the result is right-justified on BE systems.
+ */
+static int read_mem(unsigned long *dest, unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & (nb - 1)) == 0)
+ return read_mem_aligned(dest, ea, nb, regs);
+ return read_mem_unaligned(dest, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(read_mem);
+
+static __always_inline int
+__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ switch (nb) {
+ case 1:
+ unsafe_put_user(val, (unsigned char __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_put_user(val, (unsigned short __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_put_user(val, (unsigned int __user *)ea, Efault);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ unsafe_put_user(val, (unsigned long __user *)ea, Efault);
+ break;
+#endif
+ }
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int
+write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __write_mem_aligned(val, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __write_mem_aligned(val, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+/*
+ * Copy from a buffer to userspace, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int c;
+
+ for (; nb > 0; nb -= c) {
+ c = max_align(ea);
+ if (c > nb)
+ c = max_align(nb);
+ switch (c) {
+ case 1:
+ unsafe_put_user(*dest, (u8 __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+ break;
+#endif
+ }
+ dest += c;
+ ea += c;
+ }
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_out(dest, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_out(dest, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+static nokprobe_inline int write_mem_unaligned(unsigned long val,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+
+ u.ul = val;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ return copy_mem_out(&u.b[i], ea, nb, regs);
+}
+
+/*
+ * Write memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
+ */
+static int write_mem(unsigned long val, unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ if ((ea & (nb - 1)) == 0)
+ return write_mem_aligned(val, ea, nb, regs);
+ return write_mem_unaligned(val, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(write_mem);
+
+#ifdef CONFIG_PPC_FPU
+/*
+ * These access either the real FP register or the image in the
+ * thread_struct, depending on regs->msr & MSR_FP.
+ */
+static int do_fp_load(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
+{
+ int err, rn, nb;
+ union {
+ int i;
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ rn = op->reg;
+ err = copy_mem_in(u.b, ea, nb, regs);
+ if (err)
+ return err;
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ preempt_disable();
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_sp_to_dp(&u.f, &u.d[0]);
+ else if (op->type & SIGNEXT)
+ u.l[0] = u.i;
+ else
+ u.l[0] = u.u;
+ }
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[0]);
+ else
+ current->thread.TS_FPR(rn) = u.l[0];
+ if (nb == 16) {
+ /* lfdp */
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[1]);
+ else
+ current->thread.TS_FPR(rn) = u.l[1];
+ }
+ preempt_enable();
+ return 0;
+}
+NOKPROBE_SYMBOL(do_fp_load);
+
+static int do_fp_store(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
+{
+ int rn, nb;
+ union {
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
+ if (!address_ok(regs, ea, nb))
+ return -EFAULT;
+ rn = op->reg;
+ preempt_disable();
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[0]);
+ else
+ u.l[0] = current->thread.TS_FPR(rn);
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_dp_to_sp(&u.d[0], &u.f);
+ else
+ u.u = u.l[0];
+ }
+ if (nb == 16) {
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[1]);
+ else
+ u.l[1] = current->thread.TS_FPR(rn);
+ }
+ preempt_enable();
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ return copy_mem_out(u.b, ea, nb, regs);
+}
+NOKPROBE_SYMBOL(do_fp_store);
+#endif
+
+#ifdef CONFIG_ALTIVEC
+/* For Altivec/VMX, no need to worry about alignment */
+static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int err;
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u = {};
+
+ if (!address_ok(regs, ea & ~0xfUL, 16))
+ return -EFAULT;
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+ err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs);
+ if (err)
+ return err;
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], size);
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ put_vr(rn, &u.v);
+ else
+ current->thread.vr_state.vr[rn] = u.v;
+ preempt_enable();
+ return 0;
+}
+
+static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
+{
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u;
+
+ if (!address_ok(regs, ea & ~0xfUL, 16))
+ return -EFAULT;
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ get_vr(rn, &u.v);
+ else
+ u.v = current->thread.vr_state.vr[rn];
+ preempt_enable();
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], size);
+ return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
+{
+ int err;
+
+ if (!address_ok(regs, ea, 16))
+ return -EFAULT;
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0) {
+ err = do_lq(ea, &regs->gpr[reg]);
+ } else {
+ err = read_mem(&regs->gpr[reg + IS_LE], ea, 8, regs);
+ if (!err)
+ err = read_mem(&regs->gpr[reg + IS_BE], ea + 8, 8, regs);
+ }
+ if (!err && unlikely(cross_endian))
+ do_byte_reverse(&regs->gpr[reg], 16);
+ return err;
+}
+
+static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
+{
+ int err;
+ unsigned long vals[2];
+
+ if (!address_ok(regs, ea, 16))
+ return -EFAULT;
+ vals[0] = regs->gpr[reg];
+ vals[1] = regs->gpr[reg + 1];
+ if (unlikely(cross_endian))
+ do_byte_reverse(vals, 16);
+
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0)
+ return do_stq(ea, vals[0], vals[1]);
+
+ err = write_mem(vals[IS_LE], ea, 8, regs);
+ if (!err)
+ err = write_mem(vals[IS_BE], ea + 8, 8, regs);
+ return err;
+}
+#endif /* __powerpc64 */
+
+#ifdef CONFIG_VSX
+void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+ const void *mem, bool rev)
+{
+ int size, read_size;
+ int i, j;
+ const unsigned int *wp;
+ const unsigned short *hp;
+ const unsigned char *bp;
+
+ size = GETSIZE(op->type);
+ reg->d[0] = reg->d[1] = 0;
+
+ switch (op->element_size) {
+ case 32:
+ /* [p]lxvp[x] */
+ case 16:
+ /* whole vector; lxv[x] or lxvl[l] */
+ if (size == 0)
+ break;
+ memcpy(reg, mem, size);
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev)
+ do_byte_reverse(reg, size);
+ break;
+ case 8:
+ /* scalar loads, lxvd2x, lxvdsx */
+ read_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - read_size;
+ memcpy(&reg->b[i], mem, read_size);
+ if (rev)
+ do_byte_reverse(&reg->b[i], 8);
+ if (size < 8) {
+ if (op->type & SIGNEXT) {
+ /* size == 4 is the only case here */
+ reg->d[IS_LE] = (signed int) reg->d[IS_LE];
+ } else if (op->vsx_flags & VSX_FPCONV) {
+ preempt_disable();
+ conv_sp_to_dp(&reg->fp[1 + IS_LE],
+ &reg->dp[IS_LE]);
+ preempt_enable();
+ }
+ } else {
+ if (size == 16) {
+ unsigned long v = *(unsigned long *)(mem + 8);
+ reg->d[IS_BE] = !rev ? v : byterev_8(v);
+ } else if (op->vsx_flags & VSX_SPLAT)
+ reg->d[IS_BE] = reg->d[IS_LE];
+ }
+ break;
+ case 4:
+ /* lxvw4x, lxvwsx */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = !rev ? *wp++ : byterev_4(*wp++);
+ }
+ if (op->vsx_flags & VSX_SPLAT) {
+ u32 val = reg->w[IS_LE ? 3 : 0];
+ for (; j < 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = val;
+ }
+ }
+ break;
+ case 2:
+ /* lxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ reg->h[i] = !rev ? *hp++ : byterev_2(*hp++);
+ }
+ break;
+ case 1:
+ /* lxvb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ reg->b[i] = *bp++;
+ }
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_load);
+NOKPROBE_SYMBOL(emulate_vsx_load);
+
+void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
+ void *mem, bool rev)
+{
+ int size, write_size;
+ int i, j;
+ union vsx_reg buf;
+ unsigned int *wp;
+ unsigned short *hp;
+ unsigned char *bp;
+
+ size = GETSIZE(op->type);
+
+ switch (op->element_size) {
+ case 32:
+ /* [p]stxvp[x] */
+ if (size == 0)
+ break;
+ if (rev) {
+ /* reverse 32 bytes */
+ union vsx_reg buf32[2];
+ buf32[0].d[0] = byterev_8(reg[1].d[1]);
+ buf32[0].d[1] = byterev_8(reg[1].d[0]);
+ buf32[1].d[0] = byterev_8(reg[0].d[1]);
+ buf32[1].d[1] = byterev_8(reg[0].d[0]);
+ memcpy(mem, buf32, size);
+ } else {
+ memcpy(mem, reg, size);
+ }
+ break;
+ case 16:
+ /* stxv, stxvx, stxvl, stxvll */
+ if (size == 0)
+ break;
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev) {
+ /* reverse 16 bytes */
+ buf.d[0] = byterev_8(reg->d[1]);
+ buf.d[1] = byterev_8(reg->d[0]);
+ reg = &buf;
+ }
+ memcpy(mem, reg, size);
+ break;
+ case 8:
+ /* scalar stores, stxvd2x */
+ write_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - write_size;
+ if (size < 8 && op->vsx_flags & VSX_FPCONV) {
+ buf.d[0] = buf.d[1] = 0;
+ preempt_disable();
+ conv_dp_to_sp(&reg->dp[IS_LE], &buf.fp[1 + IS_LE]);
+ preempt_enable();
+ reg = &buf;
+ }
+ memcpy(mem, &reg->b[i], write_size);
+ if (size == 16)
+ memcpy(mem + 8, &reg->d[IS_BE], 8);
+ if (unlikely(rev)) {
+ do_byte_reverse(mem, write_size);
+ if (size == 16)
+ do_byte_reverse(mem + 8, 8);
+ }
+ break;
+ case 4:
+ /* stxvw4x */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]);
+ }
+ break;
+ case 2:
+ /* stxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]);
+ }
+ break;
+ case 1:
+ /* stvxb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ *bp++ = reg->b[i];
+ }
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(emulate_vsx_store);
+NOKPROBE_SYMBOL(emulate_vsx_store);
+
+static nokprobe_inline int do_vsx_load(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
+ return -EFAULT;
+
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+ emulate_vsx_load(op, buf, mem, cross_endian);
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
+ current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
+ }
+ }
+ } else {
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
+ }
+ }
+ }
+ preempt_enable();
+ return 0;
+}
+
+static nokprobe_inline int do_vsx_store(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
+ buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
+ }
+ }
+ } else {
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
+ }
+ }
+ }
+ preempt_enable();
+ emulate_vsx_store(op, buf, mem, cross_endian);
+ return copy_mem_out(mem, ea, size, regs);
+}
+#endif /* CONFIG_VSX */
+
+static __always_inline int __emulate_dcbz(unsigned long ea)
+{
+ unsigned long i;
+ unsigned long size = l1_dcache_bytes();
+
+ for (i = 0; i < size; i += sizeof(long))
+ unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault);
+
+ return 0;
+
+Efault:
+ return -EFAULT;
+}
+
+int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
+{
+ int err;
+ unsigned long size = l1_dcache_bytes();
+
+ ea = truncate_if_32bit(regs->msr, ea);
+ ea &= ~(size - 1);
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+
+ if (is_kernel_addr(ea)) {
+ err = __emulate_dcbz(ea);
+ } else if (user_write_access_begin((void __user *)ea, size)) {
+ err = __emulate_dcbz(ea);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ }
+
+ if (err)
+ regs->dar = ea;
+
+
+ return err;
+}
+NOKPROBE_SYMBOL(emulate_dcbz);
+
+#define __put_user_asmx(x, addr, err, op, cr) \
+ __asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
+ "1: " op " %2,0,%3\n" \
+ ".machine pop\n" \
+ " mfcr %1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%4\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err), "=r" (cr) \
+ : "r" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __get_user_asmx(x, addr, err, op) \
+ __asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
+ "1: "op" %1,0,%2\n" \
+ ".machine pop\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err), "=r" (x) \
+ : "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __cacheop_user_asmx(addr, err, op) \
+ __asm__ __volatile__( \
+ "1: "op" 0,%1\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err) \
+ : "r" (addr), "i" (-EFAULT), "0" (err))
+
+static nokprobe_inline void set_cr0(const struct pt_regs *regs,
+ struct instruction_op *op)
+{
+ long val = op->val;
+
+ op->type |= SETCC;
+ op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+ if (!(regs->msr & MSR_64BIT))
+ val = (int) val;
+ if (val < 0)
+ op->ccval |= 0x80000000;
+ else if (val > 0)
+ op->ccval |= 0x40000000;
+ else
+ op->ccval |= 0x20000000;
+}
+
+static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (val)
+ op->xerval |= XER_CA32;
+ else
+ op->xerval &= ~XER_CA32;
+ }
+}
+
+static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
+ struct instruction_op *op, int rd,
+ unsigned long val1, unsigned long val2,
+ unsigned long carry_in)
+{
+ unsigned long val = val1 + val2;
+
+ if (carry_in)
+ ++val;
+ op->type = COMPUTE | SETREG | SETXER;
+ op->reg = rd;
+ op->val = val;
+ val = truncate_if_32bit(regs->msr, val);
+ val1 = truncate_if_32bit(regs->msr, val1);
+ op->xerval = regs->xer;
+ if (val < val1 || (carry_in && val == val1))
+ op->xerval |= XER_CA;
+ else
+ op->xerval &= ~XER_CA;
+
+ set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
+ (carry_in && (unsigned int)val == (unsigned int)val1));
+}
+
+static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
+ struct instruction_op *op,
+ long v1, long v2, int crfld)
+{
+ unsigned int crval, shift;
+
+ op->type = COMPUTE | SETCC;
+ crval = (regs->xer >> 31) & 1; /* get SO bit */
+ if (v1 < v2)
+ crval |= 8;
+ else if (v1 > v2)
+ crval |= 4;
+ else
+ crval |= 2;
+ shift = (7 - crfld) * 4;
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1,
+ unsigned long v2, int crfld)
+{
+ unsigned int crval, shift;
+
+ op->type = COMPUTE | SETCC;
+ crval = (regs->xer >> 31) & 1; /* get SO bit */
+ if (v1 < v2)
+ crval |= 8;
+ else if (v1 > v2)
+ crval |= 4;
+ else
+ crval |= 2;
+ shift = (7 - crfld) * 4;
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmpb(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned long long out_val, mask;
+ int i;
+
+ out_val = 0;
+ for (i = 0; i < 8; i++) {
+ mask = 0xffUL << (i * 8);
+ if ((v1 & mask) == (v2 & mask))
+ out_val |= mask;
+ }
+ op->val = out_val;
+}
+
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, int size)
+{
+ unsigned long long out = v1;
+
+ out -= (out >> 1) & 0x5555555555555555ULL;
+ out = (0x3333333333333333ULL & out) +
+ (0x3333333333333333ULL & (out >> 2));
+ out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+
+ if (size == 8) { /* popcntb */
+ op->val = out;
+ return;
+ }
+ out += out >> 8;
+ out += out >> 16;
+ if (size == 32) { /* popcntw */
+ op->val = out & 0x0000003f0000003fULL;
+ return;
+ }
+
+ out = (out + (out >> 32)) & 0x7f;
+ op->val = out; /* popcntd */
+}
+
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned char perm, idx;
+ unsigned int i;
+
+ perm = 0;
+ for (i = 0; i < 8; i++) {
+ idx = (v1 >> (i * 8)) & 0xff;
+ if (idx < 64)
+ if (v2 & PPC_BIT(idx))
+ perm |= 1 << i;
+ }
+ op->val = perm;
+}
+#endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v, int size)
+{
+ unsigned long long res = v ^ (v >> 8);
+
+ res ^= res >> 16;
+ if (size == 32) { /* prtyw */
+ op->val = res & 0x0000000100000001ULL;
+ return;
+ }
+
+ res ^= res >> 32;
+ op->val = res & 1; /*prtyd */
+}
+
+static nokprobe_inline int trap_compare(long v1, long v2)
+{
+ int ret = 0;
+
+ if (v1 < v2)
+ ret |= 0x10;
+ else if (v1 > v2)
+ ret |= 0x08;
+ else
+ ret |= 0x04;
+ if ((unsigned long)v1 < (unsigned long)v2)
+ ret |= 0x02;
+ else if ((unsigned long)v1 > (unsigned long)v2)
+ ret |= 0x01;
+ return ret;
+}
+
+/*
+ * Elements of 32-bit rotate and mask instructions.
+ */
+#define MASK32(mb, me) ((0xffffffffUL >> (mb)) + \
+ ((signed long)-0x80000000L >> (me)) + ((me) >= (mb)))
+#ifdef __powerpc64__
+#define MASK64_L(mb) (~0UL >> (mb))
+#define MASK64_R(me) ((signed long)-0x8000000000000000L >> (me))
+#define MASK64(mb, me) (MASK64_L(mb) + MASK64_R(me) + ((me) >= (mb)))
+#define DATA32(x) (((x) & 0xffffffffUL) | (((x) & 0xffffffffUL) << 32))
+#else
+#define DATA32(x) (x)
+#endif
+#define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
+
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ * Integer arithmetic and logical instructions, branches, and barrier
+ * instructions can be emulated just using the information in *op.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+ ppc_inst_t instr)
+{
+#ifdef CONFIG_PPC64
+ unsigned int suffixopcode, prefixtype, prefix_r;
+#endif
+ unsigned int opcode, ra, rb, rc, rd, spr, u;
+ unsigned long int imm;
+ unsigned long int val, val2;
+ unsigned int mb, me, sh;
+ unsigned int word, suffix;
+ long ival;
+
+ word = ppc_inst_val(instr);
+ suffix = ppc_inst_suffix(instr);
+
+ op->type = COMPUTE;
+
+ opcode = ppc_inst_primary_opcode(instr);
+ switch (opcode) {
+ case 16: /* bc */
+ op->type = BRANCH;
+ imm = (signed short)(word & 0xfffc);
+ if ((word & 2) == 0)
+ imm += regs->nip;
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
+ if (branch_taken(word, regs, op))
+ op->type |= BRTAKEN;
+ return 1;
+ case 17: /* sc */
+ if ((word & 0xfe2) == 2)
+ op->type = SYSCALL;
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ (word & 0xfe3) == 1) { /* scv */
+ op->type = SYSCALL_VECTORED_0;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ } else
+ op->type = UNKNOWN;
+ return 0;
+ case 18: /* b */
+ op->type = BRANCH | BRTAKEN;
+ imm = word & 0x03fffffc;
+ if (imm & 0x02000000)
+ imm -= 0x04000000;
+ if ((word & 2) == 0)
+ imm += regs->nip;
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
+ return 1;
+ case 19:
+ switch ((word >> 1) & 0x3ff) {
+ case 0: /* mcrf */
+ op->type = COMPUTE + SETCC;
+ rd = 7 - ((word >> 23) & 0x7);
+ ra = 7 - ((word >> 18) & 0x7);
+ rd *= 4;
+ ra *= 4;
+ val = (regs->ccr >> ra) & 0xf;
+ op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+ return 1;
+
+ case 16: /* bclr */
+ case 528: /* bcctr */
+ op->type = BRANCH;
+ imm = (word & 0x400)? regs->ctr: regs->link;
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
+ if (branch_taken(word, regs, op))
+ op->type |= BRTAKEN;
+ return 1;
+
+ case 18: /* rfid, scary */
+ if (regs->msr & MSR_PR)
+ goto priv;
+ op->type = RFI;
+ return 0;
+
+ case 150: /* isync */
+ op->type = BARRIER | BARRIER_ISYNC;
+ return 1;
+
+ case 33: /* crnor */
+ case 129: /* crandc */
+ case 193: /* crxor */
+ case 225: /* crnand */
+ case 257: /* crand */
+ case 289: /* creqv */
+ case 417: /* crorc */
+ case 449: /* cror */
+ op->type = COMPUTE + SETCC;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rd = (word >> 21) & 0x1f;
+ ra = (regs->ccr >> (31 - ra)) & 1;
+ rb = (regs->ccr >> (31 - rb)) & 1;
+ val = (word >> (6 + ra * 2 + rb)) & 1;
+ op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
+ (val << (31 - rd));
+ return 1;
+ }
+ break;
+ case 31:
+ switch ((word >> 1) & 0x3ff) {
+ case 598: /* sync */
+ op->type = BARRIER + BARRIER_SYNC;
+#ifdef __powerpc64__
+ switch ((word >> 21) & 3) {
+ case 1: /* lwsync */
+ op->type = BARRIER + BARRIER_LWSYNC;
+ break;
+ case 2: /* ptesync */
+ op->type = BARRIER + BARRIER_PTESYNC;
+ break;
+ }
+#endif
+ return 1;
+
+ case 854: /* eieio */
+ op->type = BARRIER + BARRIER_EIEIO;
+ return 1;
+ }
+ break;
+ }
+
+ rd = (word >> 21) & 0x1f;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rc = (word >> 6) & 0x1f;
+
+ switch (opcode) {
+#ifdef __powerpc64__
+ case 1:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 2:
+ if (prefix_r && ra)
+ return 0;
+ switch (suffixopcode) {
+ case 14: /* paddi */
+ op->type = COMPUTE | PREFIXED;
+ op->val = mlsd_8lsd_ea(word, suffix, regs);
+ goto compute_done;
+ }
+ }
+ break;
+ case 2: /* tdi */
+ if (rd & trap_compare(regs->gpr[ra], (short) word))
+ goto trap;
+ return 1;
+#endif
+ case 3: /* twi */
+ if (rd & trap_compare((int)regs->gpr[ra], (short) word))
+ goto trap;
+ return 1;
+
+#ifdef __powerpc64__
+ case 4:
+ /*
+ * There are very many instructions with this primary opcode
+ * introduced in the ISA as early as v2.03. However, the ones
+ * we currently emulate were all introduced with ISA 3.0
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+
+ switch (word & 0x3f) {
+ case 48: /* maddhd */
+ asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 49: /* maddhdu */
+ asm volatile(PPC_MADDHDU(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 51: /* maddld */
+ asm volatile(PPC_MADDLD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+ }
+
+ /*
+ * There are other instructions from ISA 3.0 with the same
+ * primary opcode which do not have emulation support yet.
+ */
+ goto unknown_opcode;
+#endif
+
+ case 7: /* mulli */
+ op->val = regs->gpr[ra] * (short) word;
+ goto compute_done;
+
+ case 8: /* subfic */
+ imm = (short) word;
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
+ return 1;
+
+ case 10: /* cmpli */
+ imm = (unsigned short) word;
+ val = regs->gpr[ra];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0)
+ val = (unsigned int) val;
+#endif
+ do_cmp_unsigned(regs, op, val, imm, rd >> 2);
+ return 1;
+
+ case 11: /* cmpi */
+ imm = (short) word;
+ val = regs->gpr[ra];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0)
+ val = (int) val;
+#endif
+ do_cmp_signed(regs, op, val, imm, rd >> 2);
+ return 1;
+
+ case 12: /* addic */
+ imm = (short) word;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ return 1;
+
+ case 13: /* addic. */
+ imm = (short) word;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ set_cr0(regs, op);
+ return 1;
+
+ case 14: /* addi */
+ imm = (short) word;
+ if (ra)
+ imm += regs->gpr[ra];
+ op->val = imm;
+ goto compute_done;
+
+ case 15: /* addis */
+ imm = ((short) word) << 16;
+ if (ra)
+ imm += regs->gpr[ra];
+ op->val = imm;
+ goto compute_done;
+
+ case 19:
+ if (((word >> 1) & 0x1f) == 2) {
+ /* addpcis */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ imm = (short) (word & 0xffc1); /* d0 + d2 fields */
+ imm |= (word >> 15) & 0x3e; /* d1 field */
+ op->val = regs->nip + (imm << 16) + 4;
+ goto compute_done;
+ }
+ op->type = UNKNOWN;
+ return 0;
+
+ case 20: /* rlwimi */
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ imm = MASK32(mb, me);
+ op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+ goto logical_done;
+
+ case 21: /* rlwinm */
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
+ goto logical_done;
+
+ case 23: /* rlwnm */
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
+ rb = regs->gpr[rb] & 0x1f;
+ val = DATA32(regs->gpr[rd]);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
+ goto logical_done;
+
+ case 24: /* ori */
+ op->val = regs->gpr[rd] | (unsigned short) word;
+ goto logical_done_nocc;
+
+ case 25: /* oris */
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] | (imm << 16);
+ goto logical_done_nocc;
+
+ case 26: /* xori */
+ op->val = regs->gpr[rd] ^ (unsigned short) word;
+ goto logical_done_nocc;
+
+ case 27: /* xoris */
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] ^ (imm << 16);
+ goto logical_done_nocc;
+
+ case 28: /* andi. */
+ op->val = regs->gpr[rd] & (unsigned short) word;
+ set_cr0(regs, op);
+ goto logical_done_nocc;
+
+ case 29: /* andis. */
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] & (imm << 16);
+ set_cr0(regs, op);
+ goto logical_done_nocc;
+
+#ifdef __powerpc64__
+ case 30: /* rld* */
+ mb = ((word >> 6) & 0x1f) | (word & 0x20);
+ val = regs->gpr[rd];
+ if ((word & 0x10) == 0) {
+ sh = rb | ((word & 2) << 4);
+ val = ROTATE(val, sh);
+ switch ((word >> 2) & 3) {
+ case 0: /* rldicl */
+ val &= MASK64_L(mb);
+ break;
+ case 1: /* rldicr */
+ val &= MASK64_R(mb);
+ break;
+ case 2: /* rldic */
+ val &= MASK64(mb, 63 - sh);
+ break;
+ case 3: /* rldimi */
+ imm = MASK64(mb, 63 - sh);
+ val = (regs->gpr[ra] & ~imm) |
+ (val & imm);
+ }
+ op->val = val;
+ goto logical_done;
+ } else {
+ sh = regs->gpr[rb] & 0x3f;
+ val = ROTATE(val, sh);
+ switch ((word >> 1) & 7) {
+ case 0: /* rldcl */
+ op->val = val & MASK64_L(mb);
+ goto logical_done;
+ case 1: /* rldcr */
+ op->val = val & MASK64_R(mb);
+ goto logical_done;
+ }
+ }
+#endif
+ op->type = UNKNOWN; /* illegal instruction */
+ return 0;
+
+ case 31:
+ /* isel occupies 32 minor opcodes */
+ if (((word >> 1) & 0x1f) == 15) {
+ mb = (word >> 6) & 0x1f; /* bc field */
+ val = (regs->ccr >> (31 - mb)) & 1;
+ val2 = (ra) ? regs->gpr[ra] : 0;
+
+ op->val = (val) ? val2 : regs->gpr[rb];
+ goto compute_done;
+ }
+
+ switch ((word >> 1) & 0x3ff) {
+ case 4: /* tw */
+ if (rd == 0x1f ||
+ (rd & trap_compare((int)regs->gpr[ra],
+ (int)regs->gpr[rb])))
+ goto trap;
+ return 1;
+#ifdef __powerpc64__
+ case 68: /* td */
+ if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
+ goto trap;
+ return 1;
+#endif
+ case 83: /* mfmsr */
+ if (regs->msr & MSR_PR)
+ goto priv;
+ op->type = MFMSR;
+ op->reg = rd;
+ return 0;
+ case 146: /* mtmsr */
+ if (regs->msr & MSR_PR)
+ goto priv;
+ op->type = MTMSR;
+ op->reg = rd;
+ op->val = 0xffffffff & ~(MSR_ME | MSR_LE);
+ return 0;
+#ifdef CONFIG_PPC64
+ case 178: /* mtmsrd */
+ if (regs->msr & MSR_PR)
+ goto priv;
+ op->type = MTMSR;
+ op->reg = rd;
+ /* only MSR_EE and MSR_RI get changed if bit 15 set */
+ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */
+ imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
+ op->val = imm;
+ return 0;
+#endif
+
+ case 19: /* mfcr */
+ imm = 0xffffffffUL;
+ if ((word >> 20) & 1) {
+ imm = 0xf0000000UL;
+ for (sh = 0; sh < 8; ++sh) {
+ if (word & (0x80000 >> sh))
+ break;
+ imm >>= 4;
+ }
+ }
+ op->val = regs->ccr & imm;
+ goto compute_done;
+
+ case 128: /* setb */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ /*
+ * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+ * Since each CR field is 4 bits,
+ * we can simply mask off the bottom two bits (bfa * 4)
+ * to yield the first bit in the CR field.
+ */
+ ra = ra & ~0x3;
+ /* 'val' stores bits of the CR field (bfa) */
+ val = regs->ccr >> (CR0_SHIFT - ra);
+ /* checks if the LT bit of CR field (bfa) is set */
+ if (val & 8)
+ op->val = -1;
+ /* checks if the GT bit of CR field (bfa) is set */
+ else if (val & 4)
+ op->val = 1;
+ else
+ op->val = 0;
+ goto compute_done;
+
+ case 144: /* mtcrf */
+ op->type = COMPUTE + SETCC;
+ imm = 0xf0000000UL;
+ val = regs->gpr[rd];
+ op->ccval = regs->ccr;
+ for (sh = 0; sh < 8; ++sh) {
+ if (word & (0x80000 >> sh))
+ op->ccval = (op->ccval & ~imm) |
+ (val & imm);
+ imm >>= 4;
+ }
+ return 1;
+
+ case 339: /* mfspr */
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+ op->type = MFSPR;
+ op->reg = rd;
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
+
+ case 467: /* mtspr */
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+ op->type = MTSPR;
+ op->val = regs->gpr[rd];
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
+
+/*
+ * Compare instructions
+ */
+ case 0: /* cmp */
+ val = regs->gpr[ra];
+ val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0) {
+ /* word (32-bit) compare */
+ val = (int) val;
+ val2 = (int) val2;
+ }
+#endif
+ do_cmp_signed(regs, op, val, val2, rd >> 2);
+ return 1;
+
+ case 32: /* cmpl */
+ val = regs->gpr[ra];
+ val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+ if ((rd & 1) == 0) {
+ /* word (32-bit) compare */
+ val = (unsigned int) val;
+ val2 = (unsigned int) val2;
+ }
+#endif
+ do_cmp_unsigned(regs, op, val, val2, rd >> 2);
+ return 1;
+
+ case 508: /* cmpb */
+ do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
+
+/*
+ * Arithmetic instructions
+ */
+ case 8: /* subfc */
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
+ regs->gpr[rb], 1);
+ goto arith_done;
+#ifdef __powerpc64__
+ case 9: /* mulhdu */
+ asm("mulhdu %0,%1,%2" : "=r" (op->val) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 10: /* addc */
+ add_with_carry(regs, op, rd, regs->gpr[ra],
+ regs->gpr[rb], 0);
+ goto arith_done;
+
+ case 11: /* mulhwu */
+ asm("mulhwu %0,%1,%2" : "=r" (op->val) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+
+ case 40: /* subf */
+ op->val = regs->gpr[rb] - regs->gpr[ra];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 73: /* mulhd */
+ asm("mulhd %0,%1,%2" : "=r" (op->val) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 75: /* mulhw */
+ asm("mulhw %0,%1,%2" : "=r" (op->val) :
+ "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+ goto arith_done;
+
+ case 104: /* neg */
+ op->val = -regs->gpr[ra];
+ goto arith_done;
+
+ case 136: /* subfe */
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
+ goto arith_done;
+
+ case 138: /* adde */
+ add_with_carry(regs, op, rd, regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
+ goto arith_done;
+
+ case 200: /* subfze */
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 202: /* addze */
+ add_with_carry(regs, op, rd, regs->gpr[ra], 0L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 232: /* subfme */
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L,
+ regs->xer & XER_CA);
+ goto arith_done;
+#ifdef __powerpc64__
+ case 233: /* mulld */
+ op->val = regs->gpr[ra] * regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 234: /* addme */
+ add_with_carry(regs, op, rd, regs->gpr[ra], -1L,
+ regs->xer & XER_CA);
+ goto arith_done;
+
+ case 235: /* mullw */
+ op->val = (long)(int) regs->gpr[ra] *
+ (int) regs->gpr[rb];
+
+ goto arith_done;
+#ifdef __powerpc64__
+ case 265: /* modud */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = regs->gpr[ra] % regs->gpr[rb];
+ goto compute_done;
+#endif
+ case 266: /* add */
+ op->val = regs->gpr[ra] + regs->gpr[rb];
+ goto arith_done;
+
+ case 267: /* moduw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (unsigned int) regs->gpr[ra] %
+ (unsigned int) regs->gpr[rb];
+ goto compute_done;
+#ifdef __powerpc64__
+ case 457: /* divdu */
+ op->val = regs->gpr[ra] / regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 459: /* divwu */
+ op->val = (unsigned int) regs->gpr[ra] /
+ (unsigned int) regs->gpr[rb];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 489: /* divd */
+ op->val = (long int) regs->gpr[ra] /
+ (long int) regs->gpr[rb];
+ goto arith_done;
+#endif
+ case 491: /* divw */
+ op->val = (int) regs->gpr[ra] /
+ (int) regs->gpr[rb];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 425: /* divde[.] */
+ asm volatile(PPC_DIVDE(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+ case 393: /* divdeu[.] */
+ asm volatile(PPC_DIVDEU(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 755: /* darn */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ switch (ra & 0x3) {
+ case 0:
+ /* 32-bit conditioned */
+ asm volatile(PPC_DARN(%0, 0) : "=r" (op->val));
+ goto compute_done;
+
+ case 1:
+ /* 64-bit conditioned */
+ asm volatile(PPC_DARN(%0, 1) : "=r" (op->val));
+ goto compute_done;
+
+ case 2:
+ /* 64-bit raw */
+ asm volatile(PPC_DARN(%0, 2) : "=r" (op->val));
+ goto compute_done;
+ }
+
+ goto unknown_opcode;
+#ifdef __powerpc64__
+ case 777: /* modsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (long int) regs->gpr[ra] %
+ (long int) regs->gpr[rb];
+ goto compute_done;
+#endif
+ case 779: /* modsw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (int) regs->gpr[ra] %
+ (int) regs->gpr[rb];
+ goto compute_done;
+
+
+/*
+ * Logical instructions
+ */
+ case 26: /* cntlzw */
+ val = (unsigned int) regs->gpr[rd];
+ op->val = ( val ? __builtin_clz(val) : 32 );
+ goto logical_done;
+#ifdef __powerpc64__
+ case 58: /* cntlzd */
+ val = regs->gpr[rd];
+ op->val = ( val ? __builtin_clzl(val) : 64 );
+ goto logical_done;
+#endif
+ case 28: /* and */
+ op->val = regs->gpr[rd] & regs->gpr[rb];
+ goto logical_done;
+
+ case 60: /* andc */
+ op->val = regs->gpr[rd] & ~regs->gpr[rb];
+ goto logical_done;
+
+ case 122: /* popcntb */
+ do_popcnt(regs, op, regs->gpr[rd], 8);
+ goto logical_done_nocc;
+
+ case 124: /* nor */
+ op->val = ~(regs->gpr[rd] | regs->gpr[rb]);
+ goto logical_done;
+
+ case 154: /* prtyw */
+ do_prty(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
+ case 186: /* prtyd */
+ do_prty(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#ifdef CONFIG_PPC64
+ case 252: /* bpermd */
+ do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
+#endif
+ case 284: /* xor */
+ op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+ goto logical_done;
+
+ case 316: /* xor */
+ op->val = regs->gpr[rd] ^ regs->gpr[rb];
+ goto logical_done;
+
+ case 378: /* popcntw */
+ do_popcnt(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
+ case 412: /* orc */
+ op->val = regs->gpr[rd] | ~regs->gpr[rb];
+ goto logical_done;
+
+ case 444: /* or */
+ op->val = regs->gpr[rd] | regs->gpr[rb];
+ goto logical_done;
+
+ case 476: /* nand */
+ op->val = ~(regs->gpr[rd] & regs->gpr[rb]);
+ goto logical_done;
+#ifdef CONFIG_PPC64
+ case 506: /* popcntd */
+ do_popcnt(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#endif
+ case 538: /* cnttzw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ val = (unsigned int) regs->gpr[rd];
+ op->val = (val ? __builtin_ctz(val) : 32);
+ goto logical_done;
+#ifdef __powerpc64__
+ case 570: /* cnttzd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ val = regs->gpr[rd];
+ op->val = (val ? __builtin_ctzl(val) : 64);
+ goto logical_done;
+#endif
+ case 922: /* extsh */
+ op->val = (signed short) regs->gpr[rd];
+ goto logical_done;
+
+ case 954: /* extsb */
+ op->val = (signed char) regs->gpr[rd];
+ goto logical_done;
+#ifdef __powerpc64__
+ case 986: /* extsw */
+ op->val = (signed int) regs->gpr[rd];
+ goto logical_done;
+#endif
+
+/*
+ * Shift instructions
+ */
+ case 24: /* slw */
+ sh = regs->gpr[rb] & 0x3f;
+ if (sh < 32)
+ op->val = (regs->gpr[rd] << sh) & 0xffffffffUL;
+ else
+ op->val = 0;
+ goto logical_done;
+
+ case 536: /* srw */
+ sh = regs->gpr[rb] & 0x3f;
+ if (sh < 32)
+ op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+ else
+ op->val = 0;
+ goto logical_done;
+
+ case 792: /* sraw */
+ op->type = COMPUTE + SETREG + SETXER;
+ sh = regs->gpr[rb] & 0x3f;
+ ival = (signed int) regs->gpr[rd];
+ op->val = ival >> (sh < 32 ? sh : 31);
+ op->xerval = regs->xer;
+ if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
+ op->xerval |= XER_CA;
+ else
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
+ goto logical_done;
+
+ case 824: /* srawi */
+ op->type = COMPUTE + SETREG + SETXER;
+ sh = rb;
+ ival = (signed int) regs->gpr[rd];
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+ op->xerval |= XER_CA;
+ else
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
+ goto logical_done;
+
+#ifdef __powerpc64__
+ case 27: /* sld */
+ sh = regs->gpr[rb] & 0x7f;
+ if (sh < 64)
+ op->val = regs->gpr[rd] << sh;
+ else
+ op->val = 0;
+ goto logical_done;
+
+ case 539: /* srd */
+ sh = regs->gpr[rb] & 0x7f;
+ if (sh < 64)
+ op->val = regs->gpr[rd] >> sh;
+ else
+ op->val = 0;
+ goto logical_done;
+
+ case 794: /* srad */
+ op->type = COMPUTE + SETREG + SETXER;
+ sh = regs->gpr[rb] & 0x7f;
+ ival = (signed long int) regs->gpr[rd];
+ op->val = ival >> (sh < 64 ? sh : 63);
+ op->xerval = regs->xer;
+ if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
+ op->xerval |= XER_CA;
+ else
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
+ goto logical_done;
+
+ case 826: /* sradi with sh_5 = 0 */
+ case 827: /* sradi with sh_5 = 1 */
+ op->type = COMPUTE + SETREG + SETXER;
+ sh = rb | ((word & 2) << 4);
+ ival = (signed long int) regs->gpr[rd];
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
+ if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
+ op->xerval |= XER_CA;
+ else
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
+ goto logical_done;
+
+ case 890: /* extswsli with sh_5 = 0 */
+ case 891: /* extswsli with sh_5 = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->type = COMPUTE + SETREG;
+ sh = rb | ((word & 2) << 4);
+ val = (signed int) regs->gpr[rd];
+ if (sh)
+ op->val = ROTATE(val, sh) & MASK64(0, 63 - sh);
+ else
+ op->val = val;
+ goto logical_done;
+
+#endif /* __powerpc64__ */
+
+/*
+ * Cache instructions
+ */
+ case 54: /* dcbst */
+ op->type = MKOP(CACHEOP, DCBST, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
+
+ case 86: /* dcbf */
+ op->type = MKOP(CACHEOP, DCBF, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
+
+ case 246: /* dcbtst */
+ op->type = MKOP(CACHEOP, DCBTST, 0);
+ op->ea = xform_ea(word, regs);
+ op->reg = rd;
+ return 0;
+
+ case 278: /* dcbt */
+ op->type = MKOP(CACHEOP, DCBTST, 0);
+ op->ea = xform_ea(word, regs);
+ op->reg = rd;
+ return 0;
+
+ case 982: /* icbi */
+ op->type = MKOP(CACHEOP, ICBI, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
+
+ case 1014: /* dcbz */
+ op->type = MKOP(CACHEOP, DCBZ, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
+ }
+ break;
+ }
+
+/*
+ * Loads and stores.
+ */
+ op->type = UNKNOWN;
+ op->update_reg = ra;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+ u = (word >> 20) & UPDATE;
+ op->vsx_flags = 0;
+
+ switch (opcode) {
+ case 31:
+ u = word & UPDATE;
+ op->ea = xform_ea(word, regs);
+ switch ((word >> 1) & 0x3ff) {
+ case 20: /* lwarx */
+ op->type = MKOP(LARX, 0, 4);
+ break;
+
+ case 150: /* stwcx. */
+ op->type = MKOP(STCX, 0, 4);
+ break;
+
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+ case 52: /* lbarx */
+ op->type = MKOP(LARX, 0, 1);
+ break;
+
+ case 694: /* stbcx. */
+ op->type = MKOP(STCX, 0, 1);
+ break;
+
+ case 116: /* lharx */
+ op->type = MKOP(LARX, 0, 2);
+ break;
+
+ case 726: /* sthcx. */
+ op->type = MKOP(STCX, 0, 2);
+ break;
+#endif
+#ifdef __powerpc64__
+ case 84: /* ldarx */
+ op->type = MKOP(LARX, 0, 8);
+ break;
+
+ case 214: /* stdcx. */
+ op->type = MKOP(STCX, 0, 8);
+ break;
+
+ case 276: /* lqarx */
+ if (!((rd & 1) || rd == ra || rd == rb))
+ op->type = MKOP(LARX, 0, 16);
+ break;
+
+ case 182: /* stqcx. */
+ if (!(rd & 1))
+ op->type = MKOP(STCX, 0, 16);
+ break;
+#endif
+
+ case 23: /* lwzx */
+ case 55: /* lwzux */
+ op->type = MKOP(LOAD, u, 4);
+ break;
+
+ case 87: /* lbzx */
+ case 119: /* lbzux */
+ op->type = MKOP(LOAD, u, 1);
+ break;
+
+#ifdef CONFIG_ALTIVEC
+ /*
+ * Note: for the load/store vector element instructions,
+ * bits of the EA say which field of the VMX register to use.
+ */
+ case 7: /* lvebx */
+ op->type = MKOP(LOAD_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 39: /* lvehx */
+ op->type = MKOP(LOAD_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 71: /* lvewx */
+ op->type = MKOP(LOAD_VMX, 0, 4);
+ op->element_size = 4;
+ break;
+
+ case 103: /* lvx */
+ case 359: /* lvxl */
+ op->type = MKOP(LOAD_VMX, 0, 16);
+ op->element_size = 16;
+ break;
+
+ case 135: /* stvebx */
+ op->type = MKOP(STORE_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 167: /* stvehx */
+ op->type = MKOP(STORE_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 199: /* stvewx */
+ op->type = MKOP(STORE_VMX, 0, 4);
+ op->element_size = 4;
+ break;
+
+ case 231: /* stvx */
+ case 487: /* stvxl */
+ op->type = MKOP(STORE_VMX, 0, 16);
+ break;
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+ case 21: /* ldx */
+ case 53: /* ldux */
+ op->type = MKOP(LOAD, u, 8);
+ break;
+
+ case 149: /* stdx */
+ case 181: /* stdux */
+ op->type = MKOP(STORE, u, 8);
+ break;
+#endif
+
+ case 151: /* stwx */
+ case 183: /* stwux */
+ op->type = MKOP(STORE, u, 4);
+ break;
+
+ case 215: /* stbx */
+ case 247: /* stbux */
+ op->type = MKOP(STORE, u, 1);
+ break;
+
+ case 279: /* lhzx */
+ case 311: /* lhzux */
+ op->type = MKOP(LOAD, u, 2);
+ break;
+
+#ifdef __powerpc64__
+ case 341: /* lwax */
+ case 373: /* lwaux */
+ op->type = MKOP(LOAD, SIGNEXT | u, 4);
+ break;
+#endif
+
+ case 343: /* lhax */
+ case 375: /* lhaux */
+ op->type = MKOP(LOAD, SIGNEXT | u, 2);
+ break;
+
+ case 407: /* sthx */
+ case 439: /* sthux */
+ op->type = MKOP(STORE, u, 2);
+ break;
+
+#ifdef __powerpc64__
+ case 532: /* ldbrx */
+ op->type = MKOP(LOAD, BYTEREV, 8);
+ break;
+
+#endif
+ case 533: /* lswx */
+ op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f);
+ break;
+
+ case 534: /* lwbrx */
+ op->type = MKOP(LOAD, BYTEREV, 4);
+ break;
+
+ case 597: /* lswi */
+ if (rb == 0)
+ rb = 32; /* # bytes to load */
+ op->type = MKOP(LOAD_MULTI, 0, rb);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case 535: /* lfsx */
+ case 567: /* lfsux */
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+ break;
+
+ case 599: /* lfdx */
+ case 631: /* lfdux */
+ op->type = MKOP(LOAD_FP, u, 8);
+ break;
+
+ case 663: /* stfsx */
+ case 695: /* stfsux */
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
+ break;
+
+ case 727: /* stfdx */
+ case 759: /* stfdux */
+ op->type = MKOP(STORE_FP, u, 8);
+ break;
+
+#ifdef __powerpc64__
+ case 791: /* lfdpx */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+
+ case 855: /* lfiwax */
+ op->type = MKOP(LOAD_FP, SIGNEXT, 4);
+ break;
+
+ case 887: /* lfiwzx */
+ op->type = MKOP(LOAD_FP, 0, 4);
+ break;
+
+ case 919: /* stfdpx */
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 983: /* stfiwx */
+ op->type = MKOP(STORE_FP, 0, 4);
+ break;
+#endif /* __powerpc64 */
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef __powerpc64__
+ case 660: /* stdbrx */
+ op->type = MKOP(STORE, BYTEREV, 8);
+ op->val = byterev_8(regs->gpr[rd]);
+ break;
+
+#endif
+ case 661: /* stswx */
+ op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f);
+ break;
+
+ case 662: /* stwbrx */
+ op->type = MKOP(STORE, BYTEREV, 4);
+ op->val = byterev_4(regs->gpr[rd]);
+ break;
+
+ case 725: /* stswi */
+ if (rb == 0)
+ rb = 32; /* # bytes to store */
+ op->type = MKOP(STORE_MULTI, 0, rb);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ break;
+
+ case 790: /* lhbrx */
+ op->type = MKOP(LOAD, BYTEREV, 2);
+ break;
+
+ case 918: /* sthbrx */
+ op->type = MKOP(STORE, BYTEREV, 2);
+ op->val = byterev_2(regs->gpr[rd]);
+ break;
+
+#ifdef CONFIG_VSX
+ case 12: /* lxsiwzx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 76: /* lxsiwax */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
+ op->element_size = 8;
+ break;
+
+ case 140: /* stxsiwx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 268: /* lxvx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 269: /* lxvl */
+ case 301: { /* lxvll */
+ int nb;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(LOAD_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 332: /* lxvdsx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_SPLAT;
+ break;
+
+ case 333: /* lxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ op->element_size = 32;
+ break;
+
+ case 364: /* lxvwsx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 4;
+ op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
+ break;
+
+ case 396: /* stxvx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 397: /* stxvl */
+ case 429: { /* stxvll */
+ int nb;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(STORE_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 461: /* stxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, 0, 32);
+ op->element_size = 32;
+ break;
+ case 524: /* lxsspx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 588: /* lxsdx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 652: /* stxsspx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 716: /* stxsdx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 780: /* lxvw4x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 781: /* lxsibzx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 812: /* lxvh8x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 813: /* lxsihzx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 844: /* lxvd2x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 876: /* lxvb16x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 908: /* stxvw4x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 909: /* stxsibx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 940: /* stxvh8x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 941: /* stxsihx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 972: /* stxvd2x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 1004: /* stxvb16x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+#endif /* CONFIG_VSX */
+ }
+ break;
+
+ case 32: /* lwz */
+ case 33: /* lwzu */
+ op->type = MKOP(LOAD, u, 4);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 34: /* lbz */
+ case 35: /* lbzu */
+ op->type = MKOP(LOAD, u, 1);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 36: /* stw */
+ case 37: /* stwu */
+ op->type = MKOP(STORE, u, 4);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 38: /* stb */
+ case 39: /* stbu */
+ op->type = MKOP(STORE, u, 1);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 40: /* lhz */
+ case 41: /* lhzu */
+ op->type = MKOP(LOAD, u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 42: /* lha */
+ case 43: /* lhau */
+ op->type = MKOP(LOAD, SIGNEXT | u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 44: /* sth */
+ case 45: /* sthu */
+ op->type = MKOP(STORE, u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 46: /* lmw */
+ if (ra >= rd)
+ break; /* invalid form, ra in range to load */
+ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd));
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 47: /* stmw */
+ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd));
+ op->ea = dform_ea(word, regs);
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case 48: /* lfs */
+ case 49: /* lfsu */
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 50: /* lfd */
+ case 51: /* lfdu */
+ op->type = MKOP(LOAD_FP, u, 8);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 52: /* stfs */
+ case 53: /* stfsu */
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
+ op->ea = dform_ea(word, regs);
+ break;
+
+ case 54: /* stfd */
+ case 55: /* stfdu */
+ op->type = MKOP(STORE_FP, u, 8);
+ op->ea = dform_ea(word, regs);
+ break;
+#endif
+
+#ifdef __powerpc64__
+ case 56: /* lq */
+ if (!((rd & 1) || (rd == ra)))
+ op->type = MKOP(LOAD, 0, 16);
+ op->ea = dqform_ea(word, regs);
+ break;
+#endif
+
+#ifdef CONFIG_VSX
+ case 57: /* lfdp, lxsd, lxssp */
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
+ case 0: /* lfdp */
+ if (rd & 1)
+ break; /* reg must be even */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+ case 2: /* lxsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 3: /* lxssp */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
+ case 58: /* ld[u], lwa */
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
+ case 0: /* ld */
+ op->type = MKOP(LOAD, 0, 8);
+ break;
+ case 1: /* ldu */
+ op->type = MKOP(LOAD, UPDATE, 8);
+ break;
+ case 2: /* lwa */
+ op->type = MKOP(LOAD, SIGNEXT, 4);
+ break;
+ }
+ break;
+#endif
+
+#ifdef CONFIG_VSX
+ case 6:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->element_size = 32;
+ switch (word & 0xf) {
+ case 0: /* lxvp */
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ break;
+ case 1: /* stxvp */
+ op->type = MKOP(STORE_VSX, 0, 32);
+ break;
+ }
+ break;
+
+ case 61: /* stfdp, lxv, stxsd, stxssp, stxv */
+ switch (word & 7) {
+ case 0: /* stfdp with LSB of DS field = 0 */
+ case 4: /* stfdp with LSB of DS field = 1 */
+ op->ea = dsform_ea(word, regs);
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 1: /* lxv */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 2: /* stxsd with LSB of DS field = 0 */
+ case 6: /* stxsd with LSB of DS field = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 3: /* stxssp with LSB of DS field = 0 */
+ case 7: /* stxssp with LSB of DS field = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+
+ case 5: /* stxv */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
+ case 62: /* std[u] */
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
+ case 0: /* std */
+ op->type = MKOP(STORE, 0, 8);
+ break;
+ case 1: /* stdu */
+ op->type = MKOP(STORE, UPDATE, 8);
+ break;
+ case 2: /* stq */
+ if (!(rd & 1))
+ op->type = MKOP(STORE, 0, 16);
+ break;
+ }
+ break;
+ case 1: /* Prefixed instructions */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ op->update_reg = ra;
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 0: /* Type 00 Eight-Byte Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 41: /* plwa */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4);
+ break;
+#ifdef CONFIG_VSX
+ case 42: /* plxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 43: /* plxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 46: /* pstxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 47: /* pstxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 51: /* plxv1 */
+ op->reg += 32;
+ fallthrough;
+ case 50: /* plxv0 */
+ op->type = MKOP(LOAD_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 55: /* pstxv1 */
+ op->reg = rd + 32;
+ fallthrough;
+ case 54: /* pstxv0 */
+ op->type = MKOP(STORE_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+#endif /* CONFIG_VSX */
+ case 56: /* plq */
+ op->type = MKOP(LOAD, PREFIXED, 16);
+ break;
+ case 57: /* pld */
+ op->type = MKOP(LOAD, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 58: /* plxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ case 60: /* pstq */
+ op->type = MKOP(STORE, PREFIXED, 16);
+ break;
+ case 61: /* pstd */
+ op->type = MKOP(STORE, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 62: /* pstxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ }
+ break;
+ case 1: /* Type 01 Eight-Byte Register-to-Register */
+ break;
+ case 2: /* Type 10 Modified Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 32: /* plwz */
+ op->type = MKOP(LOAD, PREFIXED, 4);
+ break;
+ case 34: /* plbz */
+ op->type = MKOP(LOAD, PREFIXED, 1);
+ break;
+ case 36: /* pstw */
+ op->type = MKOP(STORE, PREFIXED, 4);
+ break;
+ case 38: /* pstb */
+ op->type = MKOP(STORE, PREFIXED, 1);
+ break;
+ case 40: /* plhz */
+ op->type = MKOP(LOAD, PREFIXED, 2);
+ break;
+ case 42: /* plha */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2);
+ break;
+ case 44: /* psth */
+ op->type = MKOP(STORE, PREFIXED, 2);
+ break;
+ case 48: /* plfs */
+ op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 50: /* plfd */
+ op->type = MKOP(LOAD_FP, PREFIXED, 8);
+ break;
+ case 52: /* pstfs */
+ op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 54: /* pstfd */
+ op->type = MKOP(STORE_FP, PREFIXED, 8);
+ break;
+ }
+ break;
+ case 3: /* Type 11 Modified Register-to-Register */
+ break;
+ }
+#endif /* __powerpc64__ */
+
+ }
+
+ if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) {
+ switch (GETTYPE(op->type)) {
+ case LOAD:
+ if (ra == rd)
+ goto unknown_opcode;
+ fallthrough;
+ case STORE:
+ case LOAD_FP:
+ case STORE_FP:
+ if (ra == 0)
+ goto unknown_opcode;
+ }
+ }
+
+#ifdef CONFIG_VSX
+ if ((GETTYPE(op->type) == LOAD_VSX ||
+ GETTYPE(op->type) == STORE_VSX) &&
+ !cpu_has_feature(CPU_FTR_VSX)) {
+ return -1;
+ }
+#endif /* CONFIG_VSX */
+
+ return 0;
+
+ unknown_opcode:
+ op->type = UNKNOWN;
+ return 0;
+
+ logical_done:
+ if (word & 1)
+ set_cr0(regs, op);
+ logical_done_nocc:
+ op->reg = ra;
+ op->type |= SETREG;
+ return 1;
+
+ arith_done:
+ if (word & 1)
+ set_cr0(regs, op);
+ compute_done:
+ op->reg = rd;
+ op->type |= SETREG;
+ return 1;
+
+ priv:
+ op->type = INTERRUPT | 0x700;
+ op->val = SRR1_PROGPRIV;
+ return 0;
+
+ trap:
+ op->type = INTERRUPT | 0x700;
+ op->val = SRR1_PROGTRAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(analyse_instr);
+NOKPROBE_SYMBOL(analyse_instr);
+
+/*
+ * For PPC32 we always use stwu with r1 to change the stack pointer.
+ * So this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
+{
+ /*
+ * Check if we already set since that means we'll
+ * lose the previous value.
+ */
+ WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+ set_thread_flag(TIF_EMULATE_STACK_STORE);
+ return 0;
+}
+
+static nokprobe_inline void do_signext(unsigned long *valp, int size)
+{
+ switch (size) {
+ case 2:
+ *valp = (signed short) *valp;
+ break;
+ case 4:
+ *valp = (signed int) *valp;
+ break;
+ }
+}
+
+static nokprobe_inline void do_byterev(unsigned long *valp, int size)
+{
+ switch (size) {
+ case 2:
+ *valp = byterev_2(*valp);
+ break;
+ case 4:
+ *valp = byterev_4(*valp);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *valp = byterev_8(*valp);
+ break;
+#endif
+ }
+}
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
+{
+ unsigned long next_pc;
+
+ next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type));
+ switch (GETTYPE(op->type)) {
+ case COMPUTE:
+ if (op->type & SETREG)
+ regs->gpr[op->reg] = op->val;
+ if (op->type & SETCC)
+ regs->ccr = op->ccval;
+ if (op->type & SETXER)
+ regs->xer = op->xerval;
+ break;
+
+ case BRANCH:
+ if (op->type & SETLK)
+ regs->link = next_pc;
+ if (op->type & BRTAKEN)
+ next_pc = op->val;
+ if (op->type & DECCTR)
+ --regs->ctr;
+ break;
+
+ case BARRIER:
+ switch (op->type & BARRIER_MASK) {
+ case BARRIER_SYNC:
+ mb();
+ break;
+ case BARRIER_ISYNC:
+ isync();
+ break;
+ case BARRIER_EIEIO:
+ eieio();
+ break;
+#ifdef CONFIG_PPC64
+ case BARRIER_LWSYNC:
+ asm volatile("lwsync" : : : "memory");
+ break;
+ case BARRIER_PTESYNC:
+ asm volatile("ptesync" : : : "memory");
+ break;
+#endif
+ }
+ break;
+
+ case MFSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->gpr[op->reg] = regs->xer & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->gpr[op->reg] = regs->link;
+ break;
+ case SPRN_CTR:
+ regs->gpr[op->reg] = regs->ctr;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ case MTSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->xer = op->val & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->link = op->val;
+ break;
+ case SPRN_CTR:
+ regs->ctr = op->val;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ }
+ regs_set_return_ip(regs, next_pc);
+}
+NOKPROBE_SYMBOL(emulate_update_regs);
+
+/*
+ * Emulate a previously-analysed load or store instruction.
+ * Return values are:
+ * 0 = instruction emulated successfully
+ * -EFAULT = address out of range or access faulted (regs->dar
+ * contains the faulting address)
+ * -EACCES = misaligned access, instruction requires alignment
+ * -EINVAL = unknown operation in *op
+ */
+int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+{
+ int err, size, type;
+ int i, rd, nb;
+ unsigned int cr;
+ unsigned long val;
+ unsigned long ea;
+ bool cross_endian;
+
+ err = 0;
+ size = GETSIZE(op->type);
+ type = GETTYPE(op->type);
+ cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+ ea = truncate_if_32bit(regs->msr, op->ea);
+
+ switch (type) {
+ case LARX:
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ err = 0;
+ val = 0;
+ switch (size) {
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+ case 1:
+ __get_user_asmx(val, ea, err, "lbarx");
+ break;
+ case 2:
+ __get_user_asmx(val, ea, err, "lharx");
+ break;
+#endif
+ case 4:
+ __get_user_asmx(val, ea, err, "lwarx");
+ break;
+#ifdef __powerpc64__
+ case 8:
+ __get_user_asmx(val, ea, err, "ldarx");
+ break;
+ case 16:
+ err = do_lqarx(ea, &regs->gpr[op->reg]);
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ if (err) {
+ regs->dar = ea;
+ break;
+ }
+ if (size < 16)
+ regs->gpr[op->reg] = val;
+ break;
+
+ case STCX:
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ err = 0;
+ switch (size) {
+#ifdef __powerpc64__
+ case 1:
+ __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ break;
+ case 2:
+ __put_user_asmx(op->val, ea, err, "sthcx.", cr);
+ break;
+#endif
+ case 4:
+ __put_user_asmx(op->val, ea, err, "stwcx.", cr);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ __put_user_asmx(op->val, ea, err, "stdcx.", cr);
+ break;
+ case 16:
+ err = do_stqcx(ea, regs->gpr[op->reg],
+ regs->gpr[op->reg + 1], &cr);
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ if (!err)
+ regs->ccr = (regs->ccr & 0x0fffffff) |
+ (cr & 0xe0000000) |
+ ((regs->xer >> 3) & 0x10000000);
+ else
+ regs->dar = ea;
+ break;
+
+ case LOAD:
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_lq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ err = read_mem(&regs->gpr[op->reg], ea, size, regs);
+ if (!err) {
+ if (op->type & SIGNEXT)
+ do_signext(&regs->gpr[op->reg], size);
+ if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV))
+ do_byterev(&regs->gpr[op->reg], size);
+ }
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case LOAD_FP:
+ /*
+ * If the instruction is in userspace, we can emulate it even
+ * if the VMX state is not live, because we have the state
+ * stored in the thread_struct. If the instruction is in
+ * the kernel, we must not touch the state in the thread_struct.
+ */
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_load(op, ea, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case LOAD_VMX:
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_VSX
+ case LOAD_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_load(op, ea, regs, cross_endian);
+ break;
+ }
+#endif
+ case LOAD_MULTI:
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
+ for (i = 0; i < size; i += 4) {
+ unsigned int v32 = 0;
+
+ nb = size - i;
+ if (nb > 4)
+ nb = 4;
+ err = copy_mem_in((u8 *) &v32, ea, nb, regs);
+ if (err)
+ break;
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ regs->gpr[rd] = v32;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for lsw[ix] */
+ rd = (rd + 1) & 0x1f;
+ }
+ break;
+
+ case STORE:
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_stq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ if ((op->type & UPDATE) && size == sizeof(long) &&
+ op->reg == 1 && op->update_reg == 1 &&
+ !(regs->msr & MSR_PR) &&
+ ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+ err = handle_stack_update(ea, regs);
+ break;
+ }
+ if (unlikely(cross_endian))
+ do_byterev(&op->val, size);
+ err = write_mem(op->val, ea, size, regs);
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case STORE_FP:
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_store(op, ea, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case STORE_VMX:
+ if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_VSX
+ case STORE_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_store(op, ea, regs, cross_endian);
+ break;
+ }
+#endif
+ case STORE_MULTI:
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
+ for (i = 0; i < size; i += 4) {
+ unsigned int v32 = regs->gpr[rd];
+
+ nb = size - i;
+ if (nb > 4)
+ nb = 4;
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ err = copy_mem_out((u8 *) &v32, ea, nb, regs);
+ if (err)
+ break;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for stsw[ix] */
+ rd = (rd + 1) & 0x1f;
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (err)
+ return err;
+
+ if (op->type & UPDATE)
+ regs->gpr[op->update_reg] = op->ea;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(emulate_loadstore);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
+{
+ struct instruction_op op;
+ int r, err, type;
+ unsigned long val;
+ unsigned long ea;
+
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ emulate_update_regs(regs, &op);
+ return 1;
+ }
+
+ err = 0;
+ type = GETTYPE(op.type);
+
+ if (OP_IS_LOAD_STORE(type)) {
+ err = emulate_loadstore(regs, &op);
+ if (err)
+ return 0;
+ goto instr_done;
+ }
+
+ switch (type) {
+ case CACHEOP:
+ ea = truncate_if_32bit(regs->msr, op.ea);
+ if (!address_ok(regs, ea, 8))
+ return 0;
+ switch (op.type & CACHEOP_MASK) {
+ case DCBST:
+ __cacheop_user_asmx(ea, err, "dcbst");
+ break;
+ case DCBF:
+ __cacheop_user_asmx(ea, err, "dcbf");
+ break;
+ case DCBTST:
+ if (op.reg == 0)
+ prefetchw((void *) ea);
+ break;
+ case DCBT:
+ if (op.reg == 0)
+ prefetch((void *) ea);
+ break;
+ case ICBI:
+ __cacheop_user_asmx(ea, err, "icbi");
+ break;
+ case DCBZ:
+ err = emulate_dcbz(ea, regs);
+ break;
+ }
+ if (err) {
+ regs->dar = ea;
+ return 0;
+ }
+ goto instr_done;
+
+ case MFMSR:
+ regs->gpr[op.reg] = regs->msr & MSR_MASK;
+ goto instr_done;
+
+ case MTMSR:
+ val = regs->gpr[op.reg];
+ if ((val & MSR_RI) == 0)
+ /* can't step mtmsr[d] that would clear MSR_RI */
+ return -1;
+ /* here op.val is the mask of bits to change */
+ regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
+ goto instr_done;
+
+ case SYSCALL: /* sc */
+ /*
+ * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't
+ * single step a system call instruction:
+ *
+ * Successful completion for an instruction means that the
+ * instruction caused no other interrupt. Thus a Trace
+ * interrupt never occurs for a System Call or System Call
+ * Vectored instruction, or for a Trap instruction that
+ * traps.
+ */
+ return -1;
+ case SYSCALL_VECTORED_0: /* scv 0 */
+ return -1;
+ case RFI:
+ return -1;
+ }
+ return 0;
+
+ instr_done:
+ regs_set_return_ip(regs,
+ truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
+ return 1;
+}
+NOKPROBE_SYMBOL(emulate_step);
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
new file mode 100644
index 000000000..daa72061d
--- /dev/null
+++ b/arch/powerpc/lib/string.S
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+ .text
+
+/* This clears out any unused part of the destination buffer,
+ just as the libc version does. -- paulus */
+_GLOBAL(strncpy)
+ PPC_LCMPI 0,r5,0
+ beqlr
+ mtctr r5
+ addi r6,r3,-1
+ addi r4,r4,-1
+ .balign IFETCH_ALIGN_BYTES
+1: lbzu r0,1(r4)
+ cmpwi 0,r0,0
+ stbu r0,1(r6)
+ bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */
+ bnelr /* if we didn't hit a null char, we're done */
+ mfctr r5
+ PPC_LCMPI 0,r5,0 /* any space left in destination buffer? */
+ beqlr /* we know r0 == 0 here */
+2: stbu r0,1(r6) /* clear it out if so */
+ bdnz 2b
+ blr
+EXPORT_SYMBOL(strncpy)
+
+_GLOBAL(strncmp)
+ PPC_LCMPI 0,r5,0
+ beq- 2f
+ mtctr r5
+ addi r5,r3,-1
+ addi r4,r4,-1
+ .balign IFETCH_ALIGN_BYTES
+1: lbzu r3,1(r5)
+ cmpwi 1,r3,0
+ lbzu r0,1(r4)
+ subf. r3,r0,r3
+ beqlr 1
+ bdnzt eq,1b
+ blr
+2: li r3,0
+ blr
+EXPORT_SYMBOL(strncmp)
+
+_GLOBAL(memchr)
+ PPC_LCMPI 0,r5,0
+ beq- 2f
+ mtctr r5
+ addi r3,r3,-1
+ .balign IFETCH_ALIGN_BYTES
+1: lbzu r0,1(r3)
+ cmpw 0,r0,r4
+ bdnzf 2,1b
+ beqlr
+2: li r3,0
+ blr
+EXPORT_SYMBOL(memchr)
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 000000000..3ee45619a
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(__arch_clear_user)
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable.
+ */
+ cmplwi cr0, r4, 4
+ mr r10, r3
+ li r3, 0
+ blt 7f
+
+11: stw r3, 0(r10)
+ beqlr
+ andi. r0, r10, 3
+ add r11, r0, r4
+ subf r6, r0, r10
+
+ clrlwi r7, r6, 32 - LG_CACHELINE_BYTES
+ add r8, r7, r11
+ srwi r9, r8, LG_CACHELINE_BYTES
+ addic. r9, r9, -1 /* total number of complete cachelines */
+ ble 2f
+ xori r0, r7, CACHELINE_MASK & ~3
+ srwi. r0, r0, 2
+ beq 3f
+ mtctr r0
+4: stwu r3, 4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7, 4
+10: dcbz r7, r6
+ addi r6, r6, CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r11, r8, 32 - LG_CACHELINE_BYTES
+ addi r11, r11, 4
+
+2: srwi r0 ,r11 ,2
+ mtctr r0
+ bdz 6f
+1: stwu r3, 4(r6)
+ bdnz 1b
+6: andi. r11, r11, 3
+ beqlr
+ mtctr r11
+ addi r6, r6, 3
+8: stbu r3, 1(r6)
+ bdnz 8b
+ blr
+
+7: cmpwi cr0, r4, 0
+ beqlr
+ mtctr r4
+ addi r6, r10, -1
+9: stbu r3, 1(r6)
+ bdnz 9b
+ blr
+
+90: mr r3, r4
+ blr
+91: add r3, r10, r4
+ subf r3, r6, r3
+ blr
+
+ EX_TABLE(11b, 90b)
+ EX_TABLE(4b, 91b)
+ EX_TABLE(10b, 91b)
+ EX_TABLE(1b, 91b)
+ EX_TABLE(8b, 91b)
+ EX_TABLE(9b, 91b)
+
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
new file mode 100644
index 000000000..a25eb8588
--- /dev/null
+++ b/arch/powerpc/lib/string_64.S
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/linkage.h>
+#include <asm/asm-offsets.h>
+
+/**
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
+ * @to: Destination address, in user space.
+ * @n: Number of bytes to zero.
+ *
+ * Zero a block of memory in user space. Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be cleared.
+ * On success, this will be zero.
+ */
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300:
+ EX_TABLE(300b,.Ldo_err3)
+ .endm
+
+.Ldo_err1:
+ mr r3,r8
+
+.Ldo_err2:
+ mtctr r4
+1:
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ addi r4,r4,-1
+ bdnz 1b
+
+.Ldo_err3:
+ mr r3,r4
+ blr
+
+_GLOBAL_TOC(__arch_clear_user)
+ cmpdi r4,32
+ neg r6,r3
+ li r0,0
+ blt .Lshort_clear
+ mr r8,r3
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ /* Get the destination 8 byte aligned */
+ bf cr7*4+3,1f
+err1; stb r0,0(r3)
+ addi r3,r3,1
+
+1: bf cr7*4+2,2f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+2: bf cr7*4+1,3f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+3: sub r4,r4,r6
+
+ cmpdi r4,32
+ cmpdi cr1,r4,512
+ blt .Lshort_clear
+ bgt cr1,.Llong_clear
+
+.Lmedium_clear:
+ srdi r6,r4,5
+ mtctr r6
+
+ /* Do 32 byte chunks */
+4:
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+err2; std r0,16(r3)
+err2; std r0,24(r3)
+ addi r3,r3,32
+ addi r4,r4,-32
+ bdnz 4b
+
+.Lshort_clear:
+ /* up to 31 bytes to go */
+ cmpdi r4,16
+ blt 6f
+err2; std r0,0(r3)
+err2; std r0,8(r3)
+ addi r3,r3,16
+ addi r4,r4,-16
+
+ /* Up to 15 bytes to go */
+6: mr r8,r3
+ clrldi r4,r4,(64-4)
+ mtocrf 0x01,r4
+ bf cr7*4+0,7f
+err1; std r0,0(r3)
+ addi r3,r3,8
+
+7: bf cr7*4+1,8f
+err1; stw r0,0(r3)
+ addi r3,r3,4
+
+8: bf cr7*4+2,9f
+err1; sth r0,0(r3)
+ addi r3,r3,2
+
+9: bf cr7*4+3,10f
+err1; stb r0,0(r3)
+
+10: li r3,0
+ blr
+
+.Llong_clear:
+ LOAD_REG_ADDR(r5, ppc64_caches)
+
+ bf cr7*4+0,11f
+err2; std r0,0(r3)
+ addi r3,r3,8
+ addi r4,r4,-8
+
+ /* Destination is 16 byte aligned, need to get it cache block aligned */
+11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5)
+ lwz r9,DCACHEL1BLOCKSIZE(r5)
+
+ /*
+ * With worst case alignment the long clear loop takes a minimum
+ * of 1 byte less than 2 cachelines.
+ */
+ sldi r10,r9,2
+ cmpd r4,r10
+ blt .Lmedium_clear
+
+ neg r6,r3
+ addi r10,r9,-1
+ and. r5,r6,r10
+ beq 13f
+
+ srdi r6,r5,4
+ mtctr r6
+ mr r8,r3
+12:
+err1; std r0,0(r3)
+err1; std r0,8(r3)
+ addi r3,r3,16
+ bdnz 12b
+
+ sub r4,r4,r5
+
+13: srd r6,r4,r7
+ mtctr r6
+ mr r8,r3
+14:
+err1; dcbz 0,r3
+ add r3,r3,r9
+ bdnz 14b
+
+ and r4,r4,r10
+
+ cmpdi r4,32
+ blt .Lshort_clear
+ b .Lmedium_clear
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
new file mode 100644
index 000000000..bbd24feb2
--- /dev/null
+++ b/arch/powerpc/lib/strlen_32.S
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen() for PPC32
+ *
+ * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
+ *
+ * Inspired from glibc implementation
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+ .text
+
+/*
+ * Algorithm:
+ *
+ * 1) Given a word 'x', we can test to see if it contains any 0 bytes
+ * by subtracting 0x01010101, and seeing if any of the high bits of each
+ * byte changed from 0 to 1. This works because the least significant
+ * 0 byte must have had no incoming carry (otherwise it's not the least
+ * significant), so it is 0x00 - 0x01 == 0xff. For all other
+ * byte values, either they have the high bit set initially, or when
+ * 1 is subtracted you get a value in the range 0x00-0x7f, none of which
+ * have their high bit set. The expression here is
+ * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
+ * there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ * match, but possibly false 0x80 matches in the next more significant
+ * byte to a true match due to carries. For little-endian this is
+ * of no consequence since the least significant match is the one
+ * we're interested in, but big-endian needs method 2 to find which
+ * byte matches.
+ * 2) Given a word 'x', we can test to see _which_ byte was zero by
+ * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
+ * This produces 0x80 in each byte that was zero, and 0x00 in all
+ * the other bytes. The '| ~0x80808080' clears the low 7 bits in each
+ * byte, and the '| x' part ensures that bytes with the high bit set
+ * produce 0x00. The addition will carry into the high bit of each byte
+ * iff that byte had one of its low 7 bits set. We can then just see
+ * which was the most significant bit set and divide by 8 to find how
+ * many to add to the index.
+ * This is from the book 'The PowerPC Compiler Writer's Guide',
+ * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+ */
+
+_GLOBAL(strlen)
+ andi. r0, r3, 3
+ lis r7, 0x0101
+ addi r10, r3, -4
+ addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
+ rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */
+ bne- 3f
+ .balign IFETCH_ALIGN_BYTES
+1: lwzu r9, 4(r10)
+2: subf r8, r7, r9
+ and. r8, r8, r6
+ beq+ 1b
+ andc. r8, r8, r9
+ beq+ 1b
+ andc r8, r9, r6
+ orc r9, r9, r6
+ subfe r8, r6, r8
+ nor r8, r8, r9
+ cntlzw r8, r8
+ subf r3, r3, r10
+ srwi r8, r8, 3
+ add r3, r3, r8
+ blr
+
+ /* Missaligned string: make sure bytes before string are seen not 0 */
+3: xor r10, r10, r0
+ orc r8, r8, r8
+ lwzu r9, 4(r10)
+ slwi r0, r0, 3
+ srw r8, r8, r0
+ orc r9, r9, r8
+ b 2b
+EXPORT_SYMBOL(strlen)
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
new file mode 100644
index 000000000..c44823292
--- /dev/null
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/code-patching.h>
+
+static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+ instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
+static void __init test_trampoline(void)
+{
+ asm ("nop;nop;\n");
+}
+
+#define check(x) do { \
+ if (!(x)) \
+ pr_err("code-patching: test failed at line %d\n", __LINE__); \
+} while (0)
+
+static void __init test_branch_iform(void)
+{
+ int err;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned long addr = (unsigned long)tmp;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_iform(ppc_inst(0x48000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
+
+ /* Simplest case, branch to self with link */
+ check(instr_is_branch_iform(ppc_inst(0x48000001)));
+ /* All bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
+ /* Some bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
+ /* Must be a valid branch to start with */
+ check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
+
+ /* Absolute branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x48000103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute branch to 0x420fc */
+ ppc_inst_write(iptr, ppc_inst(0x480420ff));
+ check(instr_is_branch_to_addr(iptr, 0x420fc));
+ /* Maximum positive relative branch, + 20MB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x49fffffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
+ /* Smallest negative relative branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x4bfffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative branch, - 32 MB */
+ ppc_inst_write(iptr, ppc_inst(0x4a000000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Branch to self, with link */
+ err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100, with link */
+ err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100, no link */
+ err = create_branch(&instr, iptr, addr + 0x100, 0);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 MB */
+ err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Out of range relative negative offset, - 32 MB + 4*/
+ err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 MB */
+ err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
+ check(err);
+
+ /* Unaligned target */
+ err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
+}
+
+static void __init test_create_function_call(void)
+{
+ u32 *iptr;
+ unsigned long dest;
+ ppc_inst_t instr;
+
+ /* Check we can create a function call */
+ iptr = (u32 *)ppc_function_entry(test_trampoline);
+ dest = ppc_function_entry(test_create_function_call);
+ create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+ int err;
+ unsigned long addr;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned int flags;
+
+ addr = (unsigned long)iptr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_bform(ppc_inst(0x40000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
+
+ /* Absolute conditional branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x43ff0103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute conditional branch to 0x20fc */
+ ppc_inst_write(iptr, ppc_inst(0x43ff20ff));
+ check(instr_is_branch_to_addr(iptr, 0x20fc));
+ /* Maximum positive relative conditional branch, + 32 KB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43ff7ffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
+ /* Smallest negative relative conditional branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43fffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative conditional branch, - 32 KB */
+ ppc_inst_write(iptr, ppc_inst(0x43ff8000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* All condition code bits set & link */
+ flags = 0x3ff000 | BRANCH_SET_LINK;
+
+ /* Branch to self */
+ err = create_cond_branch(&instr, iptr, addr, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100 */
+ err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100 */
+ err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 KB */
+ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* Out of range relative negative offset, - 32 KB + 4*/
+ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 KB */
+ err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
+ check(err);
+
+ /* Unaligned target */
+ err = create_cond_branch(&instr, iptr, addr + 3, flags);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
+}
+
+static void __init test_translate_branch(void)
+{
+ unsigned long addr;
+ void *p, *q;
+ ppc_inst_t instr;
+ void *buf;
+
+ buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 MB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 0x2000000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
+
+ /* Maximum positive case, move x to x - 32 MB + 4 */
+ p = buf + 0x2000000;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
+
+ /* Jump to x + 16 MB moved to x + 20 MB */
+ p = buf;
+ addr = 0x1000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x1400000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 16 MB moved to x - 16 MB + 4 */
+ p = buf + 0x1000000;
+ addr = 0x2000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+
+ /* Conditional branch tests */
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 KB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 0x8000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
+
+ /* Maximum positive case, move x to x - 32 KB + 4 */
+ p = buf + 0x8000;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
+
+ /* Jump to x + 12 KB moved to x + 20 KB */
+ p = buf;
+ addr = 0x3000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x5000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 8 KB moved to x - 8 KB + 4 */
+ p = buf + 0x2000;
+ addr = 0x4000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Free the buffer we were using */
+ vfree(buf);
+}
+
+static void __init test_prefixed_patching(void)
+{
+ u32 *iptr = (u32 *)ppc_function_entry(test_trampoline);
+ u32 expected[2] = {OP_PREFIX << 26, 0};
+ ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0);
+
+ if (!IS_ENABLED(CONFIG_PPC64))
+ return;
+
+ patch_instruction(iptr, inst);
+
+ check(!memcmp(iptr, expected, sizeof(expected)));
+}
+
+static int __init test_code_patching(void)
+{
+ pr_info("Running code patching self-tests ...\n");
+
+ test_branch_iform();
+ test_branch_bform();
+ test_create_function_call();
+ test_translate_branch();
+ test_prefixed_patching();
+
+ return 0;
+}
+late_initcall(test_code_patching);
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000000000..23c7805fb
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,1741 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Simple sanity tests for instruction emulation infrastructure.
+ *
+ * Copyright IBM Corp. 2016
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+#include <asm/code-patching.h>
+#include <asm/inst.h>
+
+#define MAX_SUBTESTS 16
+
+#define IGNORE_GPR(n) (0x1UL << (n))
+#define IGNORE_XER (0x1UL << 32)
+#define IGNORE_CCR (0x1UL << 33)
+#define NEGATIVE_TEST (0x1UL << 63)
+
+#define TEST_PLD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLWZ(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_LWZ(r, base, i))
+
+#define TEST_PSTD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PADDI(t, a, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_ADDI(t, a, i))
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+ static unsigned long msr;
+ static bool msr_cached;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ if (likely(msr_cached)) {
+ regs->msr = msr;
+ return;
+ }
+
+ asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+ regs->msr |= MSR_FP;
+ regs->msr |= MSR_VEC;
+ regs->msr |= MSR_VSX;
+
+ msr = regs->msr;
+ msr_cached = true;
+}
+
+static void __init show_result(char *mnemonic, char *result)
+{
+ pr_info("%-14s : %s\n", mnemonic, result);
+}
+
+static void __init show_result_with_descr(char *mnemonic, char *descr,
+ char *result)
+{
+ pr_info("%-14s : %-50s %s\n", mnemonic, descr, result);
+}
+
+static void __init test_ld(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x23;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+
+ /* ld r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LD(5, 3, 0)));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("ld", "PASS");
+ else
+ show_result("ld", "FAIL");
+}
+
+static void __init test_pld(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x23;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* pld r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLD(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("pld", "PASS");
+ else
+ show_result("pld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+ struct pt_regs regs;
+ unsigned int a = 0x4545;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+
+ /* lwz r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZ(5, 3, 0)));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("lwz", "PASS");
+ else
+ show_result("lwz", "FAIL");
+}
+
+static void __init test_plwz(void)
+{
+ struct pt_regs regs;
+ unsigned int a = 0x4545;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* plwz r5, 0(r3), 0 */
+
+ stepped = emulate_step(&regs, TEST_PLWZ(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("plwz", "PASS");
+ else
+ show_result("plwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+ struct pt_regs regs;
+ unsigned int a[3] = {0x0, 0x0, 0x1234};
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) a;
+ regs.gpr[4] = 8;
+ regs.gpr[5] = 0x8765;
+
+ /* lwzx r5, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZX(5, 3, 4)));
+ if (stepped == 1 && regs.gpr[5] == a[2])
+ show_result("lwzx", "PASS");
+ else
+ show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+ regs.gpr[5] = 0x5678;
+
+ /* std r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STD(5, 3, 0)));
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("std", "PASS");
+ else
+ show_result("std", "FAIL");
+}
+
+static void __init test_pstd(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+ regs.gpr[5] = 0x5678;
+
+ /* pstd r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTD(5, 3, 0, 0));
+ if (stepped == 1 || regs.gpr[5] == a)
+ show_result("pstd", "PASS");
+ else
+ show_result("pstd", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+ unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+ init_pt_regs(&regs);
+ asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+ /*** ldarx ***/
+
+ regs.gpr[3] = (unsigned long) &a;
+ regs.gpr[4] = 0;
+ regs.gpr[5] = 0x5678;
+
+ /* ldarx r5, r3, r4, 0 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0)));
+
+ /*
+ * Don't touch 'a' here. Touching 'a' can do Load/store
+ * of 'a' which result in failure of subsequent stdcx.
+ * Instead, use hardcoded value for comparison.
+ */
+ if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+ show_result("ldarx / stdcx.", "FAIL (ldarx)");
+ return;
+ }
+
+
+ /*** stdcx. ***/
+
+ regs.gpr[5] = 0x9ABC;
+
+ /* stdcx. r5, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STDCX(5, 3, 4)));
+
+ /*
+ * Two possible scenarios that indicates successful emulation
+ * of stdcx. :
+ * 1. Reservation is active and store is performed. In this
+ * case cr0.eq bit will be set to 1.
+ * 2. Reservation is not active and store is not performed.
+ * In this case cr0.eq bit will be set to 0.
+ */
+ if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+ || (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+ show_result("ldarx / stdcx.", "PASS");
+ else
+ show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+ struct pt_regs regs;
+ union {
+ float a;
+ int b;
+ } c;
+ int cached_b;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lfsx ***/
+
+ c.a = 123.45;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lfsx frt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFSX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lfsx", "PASS");
+ else
+ show_result("lfsx", "FAIL");
+
+
+ /*** stfsx ***/
+
+ c.a = 678.91;
+
+ /* stfsx frs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFSX(10, 3, 4)));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("stfsx", "PASS");
+ else
+ show_result("stfsx", "FAIL");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+ struct pt_regs regs;
+ union {
+ float a;
+ int b;
+ } c;
+ int cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfs ***/
+
+ c.a = 123.45;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfs frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFS(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfs", "PASS");
+ else
+ show_result("plfs", "FAIL");
+
+
+ /*** pstfs ***/
+
+ c.a = 678.91;
+
+ /* pstfs frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFS(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfs", "PASS");
+ else
+ show_result("pstfs", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+ struct pt_regs regs;
+ union {
+ double a;
+ long b;
+ } c;
+ long cached_b;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lfdx ***/
+
+ c.a = 123456.78;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lfdx frt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFDX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lfdx", "PASS");
+ else
+ show_result("lfdx", "FAIL");
+
+
+ /*** stfdx ***/
+
+ c.a = 987654.32;
+
+ /* stfdx frs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFDX(10, 3, 4)));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("stfdx", "PASS");
+ else
+ show_result("stfdx", "FAIL");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+ struct pt_regs regs;
+ union {
+ double a;
+ long b;
+ } c;
+ long cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfd ***/
+
+ c.a = 123456.78;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfd frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFD(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfd", "PASS");
+ else
+ show_result("plfd", "FAIL");
+
+
+ /*** pstfd ***/
+
+ c.a = 987654.32;
+
+ /* pstfd frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFD(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfd", "PASS");
+ else
+ show_result("pstfd", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+ show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+ show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+ show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+ show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c;
+ u32 cached_b[4];
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lvx ***/
+
+ cached_b[0] = c.b[0] = 923745;
+ cached_b[1] = c.b[1] = 2139478;
+ cached_b[2] = c.b[2] = 9012;
+ cached_b[3] = c.b[3] = 982134;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lvx vrt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LVX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lvx", "PASS");
+ else
+ show_result("lvx", "FAIL");
+
+
+ /*** stvx ***/
+
+ c.b[0] = 4987513;
+ c.b[1] = 84313948;
+ c.b[2] = 71;
+ c.b[3] = 498532;
+
+ /* stvx vrs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STVX(10, 3, 4)));
+
+ if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+ cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+ show_result("stvx", "PASS");
+ else
+ show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+ show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+ show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c;
+ u32 cached_b[4];
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lxvd2x ***/
+
+ cached_b[0] = c.b[0] = 18233;
+ cached_b[1] = c.b[1] = 34863571;
+ cached_b[2] = c.b[2] = 834;
+ cached_b[3] = c.b[3] = 6138911;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lxvd2x vsr39, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvd2x", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvd2x", "FAIL");
+ }
+
+
+ /*** stxvd2x ***/
+
+ c.b[0] = 21379463;
+ c.b[1] = 87;
+ c.b[2] = 374234;
+ c.b[3] = 4;
+
+ /* stxvd2x vsr39, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4)));
+
+ if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+ cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvd2x", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvd2x", "FAIL");
+ }
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+ show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvp_stxvp(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[4] = (unsigned long)&c[0].a;
+
+ /*
+ * lxvp XTp,DQ(RA)
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVP(34, 4, 0)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvp", "FAIL");
+ }
+
+ /*** stxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvp XSp,DQ(RA)
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVP(34, 4, 0)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvp", "FAIL");
+ }
+}
+#else
+static void __init test_lxvp_stxvp(void)
+{
+ show_result("lxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvpx_stxvpx(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvpx ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[3] = (unsigned long)&c[0].a;
+ regs.gpr[4] = 0;
+
+ /*
+ * lxvpx XTp,RA,RB
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvpx", "FAIL");
+ }
+
+ /*** stxvpx ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvpx XSp,RA,RB
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvpx", "FAIL");
+ }
+}
+#else
+static void __init test_lxvpx_stxvpx(void)
+{
+ show_result("lxvpx", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvpx", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_plxvp_pstxvp(void)
+{
+ ppc_inst_t instr;
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ /*** plxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&c[0].a;
+
+ /*
+ * plxvp XTp,D(RA),R
+ * XTp = 32xTX + 2xTp
+ * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("plxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("plxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("plxvp", "FAIL");
+ }
+
+ /*** pstxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * pstxvp XSp,D(RA),R
+ * XSp = 32xSX + 2xSp
+ * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("pstxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("pstxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("pstxvp", "FAIL");
+ }
+}
+#else
+static void __init test_plxvp_pstxvp(void)
+{
+ show_result("plxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("pstxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static void __init run_tests_load_store(void)
+{
+ test_ld();
+ test_pld();
+ test_lwz();
+ test_plwz();
+ test_lwzx();
+ test_std();
+ test_pstd();
+ test_ldarx_stdcx();
+ test_lfsx_stfsx();
+ test_plfs_pstfs();
+ test_lfdx_stfdx();
+ test_plfd_pstfd();
+ test_lvx_stvx();
+ test_lxvd2x_stxvd2x();
+ test_lxvp_stxvp();
+ test_lxvpx_stxvpx();
+ test_plxvp_pstxvp();
+}
+
+struct compute_test {
+ char *mnemonic;
+ unsigned long cpu_feature;
+ struct {
+ char *descr;
+ unsigned long flags;
+ ppc_inst_t instr;
+ struct pt_regs regs;
+ } subtests[MAX_SUBTESTS + 1];
+};
+
+/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */
+#define SI_MIN BIT(33)
+#define SI_MAX (BIT(33) - 1)
+#define SI_UMAX (BIT(34) - 1)
+
+static struct compute_test compute_tests[] = {
+ {
+ .mnemonic = "nop",
+ .subtests = {
+ {
+ .descr = "R0 = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_NOP()),
+ .regs = {
+ .gpr[0] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "setb",
+ .cpu_feature = CPU_FTR_ARCH_300,
+ .subtests = {
+ {
+ .descr = "BFA = 1, CR = GT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 1)),
+ .regs = {
+ .ccr = 0x4000000,
+ }
+ },
+ {
+ .descr = "BFA = 4, CR = LT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 4)),
+ .regs = {
+ .ccr = 0x8000,
+ }
+ },
+ {
+ .descr = "BFA = 5, CR = EQ",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 5)),
+ .regs = {
+ .ccr = 0x200,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divde",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divde.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "paddi",
+ .cpu_feature = CPU_FTR_ARCH_31,
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_UMAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ }
+ },
+ {
+ .descr = "RA = 0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ .gpr[22] = 0x0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 0, 0, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 1",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ /* Invalid instruction form with R = 1 and RA != 0 */
+ {
+ .descr = "RA = R22(0), SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 22, 0, 1),
+ .flags = NEGATIVE_TEST,
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = 0,
+ }
+ }
+ }
+ }
+};
+
+static int __init emulate_compute_instr(struct pt_regs *regs,
+ ppc_inst_t instr,
+ bool negative)
+{
+ int analysed;
+ struct instruction_op op;
+
+ if (!regs || !ppc_inst_val(instr))
+ return -EINVAL;
+
+ /* This is not a return frame regs */
+ regs->nip = patch_site_addr(&patch__exec_instr);
+
+ analysed = analyse_instr(&op, regs, instr);
+ if (analysed != 1 || GETTYPE(op.type) != COMPUTE) {
+ if (negative)
+ return -EFAULT;
+ pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ return -EFAULT;
+ }
+ if (analysed == 1 && negative)
+ pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ if (!negative)
+ emulate_update_regs(regs, &op);
+ return 0;
+}
+
+static int __init execute_compute_instr(struct pt_regs *regs,
+ ppc_inst_t instr)
+{
+ extern int exec_instr(struct pt_regs *regs);
+
+ if (!regs || !ppc_inst_val(instr))
+ return -EINVAL;
+
+ /* Patch the NOP with the actual instruction */
+ patch_instruction_site(&patch__exec_instr, instr);
+ if (exec_instr(regs)) {
+ pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#define gpr_mismatch(gprn, exp, got) \
+ pr_info("GPR%u mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ gprn, exp, got)
+
+#define reg_mismatch(name, exp, got) \
+ pr_info("%s mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ name, exp, got)
+
+static void __init run_tests_compute(void)
+{
+ unsigned long flags;
+ struct compute_test *test;
+ struct pt_regs *regs, exp, got;
+ unsigned int i, j, k;
+ ppc_inst_t instr;
+ bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
+
+ for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
+ test = &compute_tests[i];
+
+ if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) {
+ show_result(test->mnemonic, "SKIP (!CPU_FTR)");
+ continue;
+ }
+
+ for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
+ instr = test->subtests[j].instr;
+ flags = test->subtests[j].flags;
+ regs = &test->subtests[j].regs;
+ negative = flags & NEGATIVE_TEST;
+ ignore_xer = flags & IGNORE_XER;
+ ignore_ccr = flags & IGNORE_CCR;
+ passed = true;
+
+ memcpy(&exp, regs, sizeof(struct pt_regs));
+ memcpy(&got, regs, sizeof(struct pt_regs));
+
+ /*
+ * Set a compatible MSR value explicitly to ensure
+ * that XER and CR bits are updated appropriately
+ */
+ exp.msr = MSR_KERNEL;
+ got.msr = MSR_KERNEL;
+
+ rc = emulate_compute_instr(&got, instr, negative) != 0;
+ if (negative) {
+ /* skip executing instruction */
+ passed = rc;
+ goto print;
+ } else if (rc || execute_compute_instr(&exp, instr)) {
+ passed = false;
+ goto print;
+ }
+
+ /* Verify GPR values */
+ for (k = 0; k < 32; k++) {
+ ignore_gpr = flags & IGNORE_GPR(k);
+ if (!ignore_gpr && exp.gpr[k] != got.gpr[k]) {
+ passed = false;
+ gpr_mismatch(k, exp.gpr[k], got.gpr[k]);
+ }
+ }
+
+ /* Verify LR value */
+ if (exp.link != got.link) {
+ passed = false;
+ reg_mismatch("LR", exp.link, got.link);
+ }
+
+ /* Verify XER value */
+ if (!ignore_xer && exp.xer != got.xer) {
+ passed = false;
+ reg_mismatch("XER", exp.xer, got.xer);
+ }
+
+ /* Verify CR value */
+ if (!ignore_ccr && exp.ccr != got.ccr) {
+ passed = false;
+ reg_mismatch("CR", exp.ccr, got.ccr);
+ }
+
+print:
+ show_result_with_descr(test->mnemonic,
+ test->subtests[j].descr,
+ passed ? "PASS" : "FAIL");
+ }
+ }
+}
+
+static int __init test_emulate_step(void)
+{
+ printk(KERN_INFO "Running instruction emulation self-tests ...\n");
+ run_tests_load_store();
+ run_tests_compute();
+
+ return 0;
+}
+late_initcall(test_emulate_step);
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
new file mode 100644
index 000000000..e2b646a4f
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Non-emulated single-stepping support (currently limited to basic integer
+ * computations) used to validate the instruction emulation infrastructure.
+ *
+ * Copyright (C) 2019 IBM Corporation
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <linux/errno.h>
+
+/* int exec_instr(struct pt_regs *regs) */
+_GLOBAL(exec_instr)
+
+ /*
+ * Stack frame layout (INT_FRAME_SIZE bytes)
+ * In-memory pt_regs (SP + STACK_INT_FRAME_REGS)
+ * Scratch space (SP + 8)
+ * Back chain (SP + 0)
+ */
+
+ /*
+ * Allocate a new stack frame with enough space to hold the register
+ * states in an in-memory pt_regs and also create the back chain to
+ * the caller's stack frame.
+ */
+ stdu r1, -INT_FRAME_SIZE(r1)
+
+ /*
+ * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2)
+ * and local variables (GPR14 to GPR31). The register for the pt_regs
+ * parameter (GPR3) is saved additionally to ensure that the resulting
+ * register state can still be saved even if GPR3 gets overwritten
+ * when loading the initial register state for the test instruction.
+ * The stack pointer (GPR1) and the thread pointer (GPR13) are not
+ * saved as these should not be modified anyway.
+ */
+ SAVE_GPRS(2, 3, r1)
+ SAVE_NVGPRS(r1)
+
+ /*
+ * Save LR on stack to ensure that the return address is available
+ * even if it gets overwritten by the test instruction.
+ */
+ mflr r0
+ std r0, _LINK(r1)
+
+ /*
+ * Save CR on stack. For simplicity, the entire register is saved
+ * even though only fields 2 to 4 are non-volatile.
+ */
+ mfcr r0
+ std r0, _CCR(r1)
+
+ /*
+ * Load register state for the test instruction without touching the
+ * critical non-volatile registers. The register state is passed as a
+ * pointer to a pt_regs instance.
+ */
+ subi r31, r3, GPR0
+
+ /* Load LR from pt_regs */
+ ld r0, _LINK(r31)
+ mtlr r0
+
+ /* Load CR from pt_regs */
+ ld r0, _CCR(r31)
+ mtcr r0
+
+ /* Load XER from pt_regs */
+ ld r0, _XER(r31)
+ mtxer r0
+
+ /* Load GPRs from pt_regs */
+ REST_GPR(0, r31)
+ REST_GPRS(2, 12, r31)
+ REST_NVGPRS(r31)
+
+ /* Placeholder for the test instruction */
+ .balign 64
+1: nop
+ nop
+ patch_site 1b patch__exec_instr
+
+ /*
+ * Since GPR3 is overwritten, temporarily restore it back to its
+ * original state, i.e. the pointer to pt_regs, to ensure that the
+ * resulting register state can be saved. Before doing this, a copy
+ * of it is created in the scratch space which is used later on to
+ * save it to pt_regs.
+ */
+ std r3, 8(r1)
+ REST_GPR(3, r1)
+
+ /* Save resulting GPR state to pt_regs */
+ subi r3, r3, GPR0
+ SAVE_GPR(0, r3)
+ SAVE_GPR(2, r3)
+ SAVE_GPRS(4, 12, r3)
+ SAVE_NVGPRS(r3)
+
+ /* Save resulting LR to pt_regs */
+ mflr r0
+ std r0, _LINK(r3)
+
+ /* Save resulting CR to pt_regs */
+ mfcr r0
+ std r0, _CCR(r3)
+
+ /* Save resulting XER to pt_regs */
+ mfxer r0
+ std r0, _XER(r3)
+
+ /* Restore resulting GPR3 from scratch space and save it to pt_regs */
+ ld r0, 8(r1)
+ std r0, GPR3(r3)
+
+ /* Set return value to denote execution success */
+ li r3, 0
+
+ /* Continue */
+ b 3f
+
+ /* Set return value to denote execution failure */
+2: li r3, -EFAULT
+
+ /* Restore the non-volatile GPRs from stack */
+3: REST_GPR(2, r1)
+ REST_NVGPRS(r1)
+
+ /* Restore LR from stack to be able to return */
+ ld r0, _LINK(r1)
+ mtlr r0
+
+ /* Restore CR from stack */
+ ld r0, _CCR(r1)
+ mtcr r0
+
+ /* Tear down stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ /* Return */
+ blr
+
+ /* Setup exception table */
+ EX_TABLE(1b, 2b)
+
+_ASM_NOKPROBE_SYMBOL(exec_instr)
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
new file mode 100644
index 000000000..d491da8d1
--- /dev/null
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2011
+ *
+ * Authors: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
+ * Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <asm/switch_to.h>
+
+int enter_vmx_usercopy(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ preempt_disable();
+ /*
+ * We need to disable page faults as they can call schedule and
+ * thus make us lose the VMX context. So on page faults, we just
+ * fail which will cause a fallback to the normal non-vmx copy.
+ */
+ pagefault_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * This function must return 0 because we tail call optimise when calling
+ * from __copy_tofrom_user_power7 which returns 0 on success.
+ */
+int exit_vmx_usercopy(void)
+{
+ disable_kernel_altivec();
+ pagefault_enable();
+ preempt_enable_no_resched();
+ /*
+ * Must never explicitly call schedule (including preempt_enable())
+ * while in a kuap-unlocked user copy, because the AMR register will
+ * not be saved and restored across context switch. However preempt
+ * kernels need to be preempted as soon as possible if need_resched is
+ * set and we are preemptible. The hack here is to schedule a
+ * decrementer to fire here and reschedule for us if necessary.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT) && need_resched())
+ set_dec(1);
+ return 0;
+}
+
+int enter_vmx_ops(void)
+{
+ if (in_interrupt())
+ return 0;
+
+ preempt_disable();
+
+ enable_kernel_altivec();
+
+ return 1;
+}
+
+/*
+ * All calls to this function will be optimised into tail calls. We are
+ * passed a pointer to the destination which we return as required by a
+ * memcpy implementation.
+ */
+void *exit_vmx_ops(void *dest)
+{
+ disable_kernel_altivec();
+ preempt_enable();
+ return dest;
+}
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
new file mode 100644
index 000000000..aab49d056
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+
+/*
+ * Sparse (as at v0.5.0) gets very, very confused by this file.
+ * Make it a bit simpler for it.
+ */
+#if !defined(__CHECKER__)
+#include <altivec.h>
+#else
+#define vec_xor(a, b) a ^ b
+#define vector __attribute__((vector_size(16)))
+#endif
+
+#include "xor_vmx.h"
+
+typedef vector signed char unative_t;
+
+#define DEFINE(V) \
+ unative_t *V = (unative_t *)V##_in; \
+ unative_t V##_0, V##_1, V##_2, V##_3
+
+#define LOAD(V) \
+ do { \
+ V##_0 = V[0]; \
+ V##_1 = V[1]; \
+ V##_2 = V[2]; \
+ V##_3 = V[3]; \
+ } while (0)
+
+#define STORE(V) \
+ do { \
+ V[0] = V##_0; \
+ V[1] = V##_1; \
+ V[2] = V##_2; \
+ V[3] = V##_3; \
+ } while (0)
+
+#define XOR(V1, V2) \
+ do { \
+ V1##_0 = vec_xor(V1##_0, V2##_0); \
+ V1##_1 = vec_xor(V1##_1, V2##_1); \
+ V1##_2 = vec_xor(V1##_2, V2##_2); \
+ V1##_3 = vec_xor(V1##_3, V2##_3); \
+ } while (0)
+
+void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ XOR(v1, v2);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ XOR(v1, v2);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ } while (--lines > 0);
+}
+
+void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
+{
+ DEFINE(v1);
+ DEFINE(v2);
+ DEFINE(v3);
+ DEFINE(v4);
+ DEFINE(v5);
+ unsigned long lines = bytes / (sizeof(unative_t)) / 4;
+
+ do {
+ LOAD(v1);
+ LOAD(v2);
+ LOAD(v3);
+ LOAD(v4);
+ LOAD(v5);
+ XOR(v1, v2);
+ XOR(v3, v4);
+ XOR(v1, v5);
+ XOR(v1, v3);
+ STORE(v1);
+
+ v1 += 4;
+ v2 += 4;
+ v3 += 4;
+ v4 += 4;
+ v5 += 4;
+ } while (--lines > 0);
+}
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
new file mode 100644
index 000000000..573c41d90
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
new file mode 100644
index 000000000..35d917ece
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include <asm/xor_altivec.h>
+#include "xor_vmx.h"
+
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_2(bytes, p1, p2);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_3(bytes, p1, p2, p3);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_4(bytes, p1, p2, p3, p4);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);