1 files changed, 480 insertions, 0 deletions
diff --git a/debian/patches/features/arm64/arm64-compat-Implement-misalignment-fixups-for-multi.patch b/debian/patches/features/arm64/arm64-compat-Implement-misalignment-fixups-for-multi.patch
new file mode 100644
index 000000000..303e69f83
--- /dev/null
+++ b/debian/patches/features/arm64/arm64-compat-Implement-misalignment-fixups-for-multi.patch
@@ -0,0 +1,480 @@
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Fri, 1 Jul 2022 15:53:22 +0200
+Subject: arm64: compat: Implement misalignment fixups for multiword loads
+Origin: https://git.kernel.org/linus/3fc24ef32d3b9368f4c103dcd21d6a3f959b4870
+
+The 32-bit ARM kernel implements fixups on behalf of user space when
+using LDM/STM or LDRD/STRD instructions on addresses that are not 32-bit
+aligned. This is not something that is supported by the architecture,
+but was done anyway to increase compatibility with user space software,
+which mostly targeted x86 at the time and did not care about aligned
+accesses.
+
+This feature is one of the remaining impediments to being able to switch
+to 64-bit kernels on 64-bit capable hardware running 32-bit user space,
+so let's implement it for the arm64 compat layer as well.
+
+Note that the intent is to implement the exact same handling of
+misaligned multi-word loads and stores as the 32-bit kernel does,
+including what appears to be missing support for user space programs
+that rely on SETEND to switch to a different byte order and back. Also,
+like the 32-bit ARM version, we rely on the faulting address reported by
+the CPU to infer the memory address, instead of decoding the instruction
+fully to obtain this information.
+
+This implementation is taken from the 32-bit ARM tree, with all pieces
+removed that deal with instructions other than LDRD/STRD and LDM/STM, or
+that deal with alignment exceptions taken in kernel mode.
+
+Cc: debian-arm@lists.debian.org
+Cc: Vagrant Cascadian <vagrant@debian.org>
+Cc: Riku Voipio <riku.voipio@iki.fi>
+Cc: Steve McIntyre <steve@einval.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Arnd Bergmann <arnd@arndb.de>
+Link: https://lore.kernel.org/r/20220701135322.3025321-1-ardb@kernel.org
+[catalin.marinas@arm.com: change the option to 'default n']
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+---
+ arch/arm64/Kconfig                   |   3 +
+ arch/arm64/include/asm/exception.h   |   1 +
+ arch/arm64/kernel/Makefile           |   1 +
+ arch/arm64/kernel/compat_alignment.c | 387 +++++++++++++++++++++++++++
+ arch/arm64/mm/fault.c                |   3 +
+ 5 files changed, 395 insertions(+)
+ create mode 100644 arch/arm64/kernel/compat_alignment.c
+
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -1328,6 +1328,9 @@ config THUMB2_COMPAT_VDSO
+ 	  Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y,
+ 	  otherwise with '-marm'.
+ 
++config COMPAT_ALIGNMENT_FIXUPS
++	bool "Fix up misaligned multi-word loads and stores in user space"
++
+ menuconfig ARMV8_DEPRECATED
+ 	bool "Emulate deprecated/obsolete ARMv8 instructions"
+ 	depends on SYSCTL
+--- a/arch/arm64/include/asm/exception.h
++++ b/arch/arm64/include/asm/exception.h
+@@ -47,6 +47,7 @@ void do_el0_sys(unsigned long esr, struc
+ void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+ void do_el0_cp15(unsigned long esr, struct pt_regs *regs);
++int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs);
+ void do_el0_svc(struct pt_regs *regs);
+ void do_el0_svc_compat(struct pt_regs *regs);
+ void do_el0_fpac(struct pt_regs *regs, unsigned long esr);
+--- a/arch/arm64/kernel/Makefile
++++ b/arch/arm64/kernel/Makefile
+@@ -28,6 +28,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
+ obj-$(CONFIG_COMPAT)			+= sys32.o signal32.o			\
+ 					   sys_compat.o
+ obj-$(CONFIG_COMPAT)			+= sigreturn32.o
++obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS)	+= compat_alignment.o
+ obj-$(CONFIG_KUSER_HELPERS)		+= kuser32.o
+ obj-$(CONFIG_FUNCTION_TRACER)		+= ftrace.o entry-ftrace.o
+ obj-$(CONFIG_MODULES)			+= module.o
+--- /dev/null
++++ b/arch/arm64/kernel/compat_alignment.c
+@@ -0,0 +1,387 @@
++// SPDX-License-Identifier: GPL-2.0-only
++// based on arch/arm/mm/alignment.c
++
++#include <linux/compiler.h>
++#include <linux/errno.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/perf_event.h>
++#include <linux/uaccess.h>
++
++#include <asm/exception.h>
++#include <asm/ptrace.h>
++#include <asm/traps.h>
++
++/*
++ * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
++ *
++ * Speed optimisations and better fault handling by Russell King.
++ */
++#define CODING_BITS(i)	(i & 0x0e000000)
++
++#define LDST_P_BIT(i)	(i & (1 << 24))		/* Preindex		*/
++#define LDST_U_BIT(i)	(i & (1 << 23))		/* Add offset		*/
++#define LDST_W_BIT(i)	(i & (1 << 21))		/* Writeback		*/
++#define LDST_L_BIT(i)	(i & (1 << 20))		/* Load			*/
++
++#define LDST_P_EQ_U(i)	((((i) ^ ((i) >> 1)) & (1 << 23)) == 0)
++
++#define LDSTHD_I_BIT(i)	(i & (1 << 22))		/* double/half-word immed */
++
++#define RN_BITS(i)	((i >> 16) & 15)	/* Rn			*/
++#define RD_BITS(i)	((i >> 12) & 15)	/* Rd			*/
++#define RM_BITS(i)	(i & 15)		/* Rm			*/
++
++#define REGMASK_BITS(i)	(i & 0xffff)
++
++#define BAD_INSTR 	0xdeadc0de
++
++/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */
++#define IS_T32(hi16) \
++	(((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800))
++
++union offset_union {
++	unsigned long un;
++	  signed long sn;
++};
++
++#define TYPE_ERROR	0
++#define TYPE_FAULT	1
++#define TYPE_LDST	2
++#define TYPE_DONE	3
++
++static void
++do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs,
++			 union offset_union offset)
++{
++	if (!LDST_U_BIT(instr))
++		offset.un = -offset.un;
++
++	if (!LDST_P_BIT(instr))
++		addr += offset.un;
++
++	if (!LDST_P_BIT(instr) || LDST_W_BIT(instr))
++		regs->regs[RN_BITS(instr)] = addr;
++}
++
++static int
++do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
++{
++	unsigned int rd = RD_BITS(instr);
++	unsigned int rd2;
++	int load;
++
++	if ((instr & 0xfe000000) == 0xe8000000) {
++		/* ARMv7 Thumb-2 32-bit LDRD/STRD */
++		rd2 = (instr >> 8) & 0xf;
++		load = !!(LDST_L_BIT(instr));
++	} else if (((rd & 1) == 1) || (rd == 14)) {
++		return TYPE_ERROR;
++	} else {
++		load = ((instr & 0xf0) == 0xd0);
++		rd2 = rd + 1;
++	}
++
++	if (load) {
++		unsigned int val, val2;
++
++		if (get_user(val, (u32 __user *)addr) ||
++		    get_user(val2, (u32 __user *)(addr + 4)))
++			return TYPE_FAULT;
++		regs->regs[rd] = val;
++		regs->regs[rd2] = val2;
++	} else {
++		if (put_user(regs->regs[rd], (u32 __user *)addr) ||
++		    put_user(regs->regs[rd2], (u32 __user *)(addr + 4)))
++			return TYPE_FAULT;
++	}
++	return TYPE_LDST;
++}
++
++/*
++ * LDM/STM alignment handler.
++ *
++ * There are 4 variants of this instruction:
++ *
++ * B = rn pointer before instruction, A = rn pointer after instruction
++ *              ------ increasing address ----->
++ *	        |    | r0 | r1 | ... | rx |    |
++ * PU = 01             B                    A
++ * PU = 11        B                    A
++ * PU = 00        A                    B
++ * PU = 10             A                    B
++ */
++static int
++do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
++{
++	unsigned int rd, rn, nr_regs, regbits;
++	unsigned long eaddr, newaddr;
++	unsigned int val;
++
++	/* count the number of registers in the mask to be transferred */
++	nr_regs = hweight16(REGMASK_BITS(instr)) * 4;
++
++	rn = RN_BITS(instr);
++	newaddr = eaddr = regs->regs[rn];
++
++	if (!LDST_U_BIT(instr))
++		nr_regs = -nr_regs;
++	newaddr += nr_regs;
++	if (!LDST_U_BIT(instr))
++		eaddr = newaddr;
++
++	if (LDST_P_EQ_U(instr))	/* U = P */
++		eaddr += 4;
++
++	for (regbits = REGMASK_BITS(instr), rd = 0; regbits;
++	     regbits >>= 1, rd += 1)
++		if (regbits & 1) {
++			if (LDST_L_BIT(instr)) {
++				if (get_user(val, (u32 __user *)eaddr))
++					return TYPE_FAULT;
++				if (rd < 15)
++					regs->regs[rd] = val;
++				else
++					regs->pc = val;
++			} else {
++				/*
++				 * The PC register has a bias of +8 in ARM mode
++				 * and +4 in Thumb mode. This means that a read
++				 * of the value of PC should account for this.
++				 * Since Thumb does not permit STM instructions
++				 * to refer to PC, just add 8 here.
++				 */
++				val = (rd < 15) ? regs->regs[rd] : regs->pc + 8;
++				if (put_user(val, (u32 __user *)eaddr))
++					return TYPE_FAULT;
++			}
++			eaddr += 4;
++		}
++
++	if (LDST_W_BIT(instr))
++		regs->regs[rn] = newaddr;
++
++	return TYPE_DONE;
++}
++
++/*
++ * Convert Thumb multi-word load/store instruction forms to equivalent ARM
++ * instructions so we can reuse ARM userland alignment fault fixups for Thumb.
++ *
++ * This implementation was initially based on the algorithm found in
++ * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same
++ * to convert only Thumb ld/st instruction forms to equivalent ARM forms.
++ *
++ * NOTES:
++ * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections.
++ * 2. If for some reason we're passed an non-ld/st Thumb instruction to
++ *    decode, we return 0xdeadc0de. This should never happen under normal
++ *    circumstances but if it does, we've got other problems to deal with
++ *    elsewhere and we obviously can't fix those problems here.
++ */
++
++static unsigned long thumb2arm(u16 tinstr)
++{
++	u32 L = (tinstr & (1<<11)) >> 11;
++
++	switch ((tinstr & 0xf800) >> 11) {
++	/* 6.6.1 Format 1: */
++	case 0xc000 >> 11:				/* 7.1.51 STMIA */
++	case 0xc800 >> 11:				/* 7.1.25 LDMIA */
++		{
++			u32 Rn = (tinstr & (7<<8)) >> 8;
++			u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21;
++
++			return 0xe8800000 | W | (L<<20) | (Rn<<16) |
++				(tinstr&255);
++		}
++
++	/* 6.6.1 Format 2: */
++	case 0xb000 >> 11:				/* 7.1.48 PUSH */
++	case 0xb800 >> 11:				/* 7.1.47 POP */
++		if ((tinstr & (3 << 9)) == 0x0400) {
++			static const u32 subset[4] = {
++				0xe92d0000,	/* STMDB sp!,{registers} */
++				0xe92d4000,	/* STMDB sp!,{registers,lr} */
++				0xe8bd0000,	/* LDMIA sp!,{registers} */
++				0xe8bd8000	/* LDMIA sp!,{registers,pc} */
++			};
++			return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] |
++			    (tinstr & 255);		/* register_list */
++		}
++		fallthrough;	/* for illegal instruction case */
++
++	default:
++		return BAD_INSTR;
++	}
++}
++
++/*
++ * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction
++ * handlable by ARM alignment handler, also find the corresponding handler,
++ * so that we can reuse ARM userland alignment fault fixups for Thumb.
++ *
++ * @pinstr: original Thumb-2 instruction; returns new handlable instruction
++ * @regs: register context.
++ * @poffset: return offset from faulted addr for later writeback
++ *
++ * NOTES:
++ * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections.
++ * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
++ */
++static void *
++do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
++			    union offset_union *poffset)
++{
++	u32 instr = *pinstr;
++	u16 tinst1 = (instr >> 16) & 0xffff;
++	u16 tinst2 = instr & 0xffff;
++
++	switch (tinst1 & 0xffe0) {
++	/* A6.3.5 Load/Store multiple */
++	case 0xe880:		/* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */
++	case 0xe8a0:		/* ...above writeback version */
++	case 0xe900:		/* STMDB/STMFD, LDMDB/LDMEA */
++	case 0xe920:		/* ...above writeback version */
++		/* no need offset decision since handler calculates it */
++		return do_alignment_ldmstm;
++
++	case 0xf840:		/* POP/PUSH T3 (single register) */
++		if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) {
++			u32 L = !!(LDST_L_BIT(instr));
++			const u32 subset[2] = {
++				0xe92d0000,	/* STMDB sp!,{registers} */
++				0xe8bd0000,	/* LDMIA sp!,{registers} */
++			};
++			*pinstr = subset[L] | (1<<RD_BITS(instr));
++			return do_alignment_ldmstm;
++		}
++		/* Else fall through for illegal instruction case */
++		break;
++
++	/* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */
++	case 0xe860:
++	case 0xe960:
++	case 0xe8e0:
++	case 0xe9e0:
++		poffset->un = (tinst2 & 0xff) << 2;
++		fallthrough;
++
++	case 0xe940:
++	case 0xe9c0:
++		return do_alignment_ldrdstrd;
++
++	/*
++	 * No need to handle load/store instructions up to word size
++	 * since ARMv6 and later CPUs can perform unaligned accesses.
++	 */
++	default:
++		break;
++	}
++	return NULL;
++}
++
++static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst)
++{
++	__le32 instr = 0;
++	int fault;
++
++	fault = get_user(instr, ip);
++	if (fault)
++		return fault;
++
++	*inst = __le32_to_cpu(instr);
++	return 0;
++}
++
++static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst)
++{
++	__le16 instr = 0;
++	int fault;
++
++	fault = get_user(instr, ip);
++	if (fault)
++		return fault;
++
++	*inst = __le16_to_cpu(instr);
++	return 0;
++}
++
++int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs)
++{
++	union offset_union offset;
++	unsigned long instrptr;
++	int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
++	unsigned int type;
++	u32 instr = 0;
++	u16 tinstr = 0;
++	int isize = 4;
++	int thumb2_32b = 0;
++	int fault;
++
++	instrptr = instruction_pointer(regs);
++
++	if (compat_thumb_mode(regs)) {
++		__le16 __user *ptr = (__le16 __user *)(instrptr & ~1);
++
++		fault = alignment_get_thumb(regs, ptr, &tinstr);
++		if (!fault) {
++			if (IS_T32(tinstr)) {
++				/* Thumb-2 32-bit */
++				u16 tinst2;
++				fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
++				instr = ((u32)tinstr << 16) | tinst2;
++				thumb2_32b = 1;
++			} else {
++				isize = 2;
++				instr = thumb2arm(tinstr);
++			}
++		}
++	} else {
++		fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr);
++	}
++
++	if (fault)
++		return 1;
++
++	switch (CODING_BITS(instr)) {
++	case 0x00000000:	/* 3.13.4 load/store instruction extensions */
++		if (LDSTHD_I_BIT(instr))
++			offset.un = (instr & 0xf00) >> 4 | (instr & 15);
++		else
++			offset.un = regs->regs[RM_BITS(instr)];
++
++		if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */
++		    (instr & 0x001000f0) == 0x000000f0)   /* STRD */
++			handler = do_alignment_ldrdstrd;
++		else
++			return 1;
++		break;
++
++	case 0x08000000:	/* ldm or stm, or thumb-2 32bit instruction */
++		if (thumb2_32b) {
++			offset.un = 0;
++			handler = do_alignment_t32_to_handler(&instr, regs, &offset);
++		} else {
++			offset.un = 0;
++			handler = do_alignment_ldmstm;
++		}
++		break;
++
++	default:
++		return 1;
++	}
++
++	type = handler(addr, instr, regs);
++
++	if (type == TYPE_ERROR || type == TYPE_FAULT)
++		return 1;
++
++	if (type == TYPE_LDST)
++		do_alignment_finish_ldst(addr, instr, regs, offset);
++
++	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc);
++	arm64_skip_faulting_instruction(regs, isize);
++
++	return 0;
++}
+--- a/arch/arm64/mm/fault.c
++++ b/arch/arm64/mm/fault.c
+@@ -610,6 +610,9 @@ static int __kprobes do_translation_faul
+ static int do_alignment_fault(unsigned long addr, unsigned int esr,
+ 			      struct pt_regs *regs)
+ {
++	if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) &&
++	    compat_user_mode(regs))
++		return do_compat_alignment_fixup(addr, regs);
+ 	do_bad_area(addr, esr, regs);
+ 	return 0;
+ }