108 files changed, 34033 insertions, 0 deletions
diff --git a/arch/s390/kernel/.gitignore b/arch/s390/kernel/.gitignore
new file mode 100644
index 0000000000..bbb90f92d0
--- /dev/null
+++ b/arch/s390/kernel/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vmlinux.lds
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
new file mode 100644
index 0000000000..0df2b88cc0
--- /dev/null
+++ b/arch/s390/kernel/Makefile
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+
+# Do not trace tracer code
+CFLAGS_REMOVE_ftrace.o		= $(CC_FLAGS_FTRACE)
+
+# Do not trace early setup code
+CFLAGS_REMOVE_early.o		= $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_rethook.o		= $(CC_FLAGS_FTRACE)
+
+endif
+
+GCOV_PROFILE_early.o		:= n
+KCOV_INSTRUMENT_early.o		:= n
+UBSAN_SANITIZE_early.o		:= n
+KASAN_SANITIZE_ipl.o		:= n
+KASAN_SANITIZE_machine_kexec.o	:= n
+
+#
+# Passing null pointers is ok for smp code, since we access the lowcore here.
+#
+CFLAGS_smp.o		:= -Wno-nonnull
+
+#
+# Disable tailcall optimizations for stack / callchain walking functions
+# since this might generate broken code when accessing register 15 and
+# passing its content to other functions.
+#
+CFLAGS_stacktrace.o	+= -fno-optimize-sibling-calls
+CFLAGS_dumpstack.o	+= -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o	+= -fno-optimize-sibling-calls
+
+obj-y	:= head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
+obj-y	+= processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
+obj-y	+= debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
+obj-y	+= sysinfo.o lgr.o os_info.o
+obj-y	+= runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
+obj-y	+= entry.o reipl.o kdebugfs.o alternative.o
+obj-y	+= nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
+obj-y	+= smp.o text_amode31.o stacktrace.o abs_lowcore.o
+
+extra-y				+= vmlinux.lds
+
+obj-$(CONFIG_SYSFS)		+= nospec-sysfs.o
+CFLAGS_REMOVE_nospec-branch.o	+= $(CC_FLAGS_EXPOLINE)
+
+obj-$(CONFIG_MODULES)		+= module.o
+obj-$(CONFIG_SCHED_TOPOLOGY)	+= topology.o
+obj-$(CONFIG_NUMA)		+= numa.o
+obj-$(CONFIG_AUDIT)		+= audit.o
+compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
+obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o
+obj-$(CONFIG_COMPAT)		+= $(compat-obj-y)
+obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_KPROBES)		+= kprobes.o
+obj-$(CONFIG_KPROBES)		+= kprobes_insn_page.o
+obj-$(CONFIG_KPROBES)		+= mcount.o
+obj-$(CONFIG_RETHOOK)		+= rethook.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_UPROBES)		+= uprobes.o
+obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
+
+obj-$(CONFIG_KEXEC_FILE)	+= machine_kexec_file.o kexec_image.o
+obj-$(CONFIG_KEXEC_FILE)	+= kexec_elf.o
+obj-$(CONFIG_CERT_STORE)	+= cert_store.o
+obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)	+= ima_arch.o
+
+obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf.o perf_cpum_sf.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_cpum_cf_events.o perf_regs.o
+obj-$(CONFIG_PERF_EVENTS)	+= perf_pai_crypto.o perf_pai_ext.o
+
+obj-$(CONFIG_TRACEPOINTS)	+= trace.o
+obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE))	+= uv.o
+
+# vdso
+obj-y				+= vdso64/
+obj-$(CONFIG_COMPAT)		+= vdso32/
diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c
new file mode 100644
index 0000000000..f9efc54ec4
--- /dev/null
+++ b/arch/s390/kernel/abs_lowcore.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pgtable.h>
+#include <asm/abs_lowcore.h>
+
+unsigned long __bootdata_preserved(__abs_lowcore);
+
+int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	unsigned long phys = __pa(lc);
+	int rc, i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		rc = __vmem_map_4k_page(addr, phys, PAGE_KERNEL, alloc);
+		if (rc) {
+			/*
+			 * Do not unmap allocated page tables in case the
+			 * allocation was not requested. In such a case the
+			 * request is expected coming from an atomic context,
+			 * while the unmap attempt might sleep.
+			 */
+			if (alloc) {
+				for (--i; i >= 0; i--) {
+					addr -= PAGE_SIZE;
+					vmem_unmap_4k_page(addr);
+				}
+			}
+			return rc;
+		}
+		addr += PAGE_SIZE;
+		phys += PAGE_SIZE;
+	}
+	return 0;
+}
+
+void abs_lowcore_unmap(int cpu)
+{
+	unsigned long addr = __abs_lowcore + (cpu * sizeof(struct lowcore));
+	int i;
+
+	for (i = 0; i < LC_PAGES; i++) {
+		vmem_unmap_4k_page(addr);
+		addr += PAGE_SIZE;
+	}
+}
diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c
new file mode 100644
index 0000000000..e7bca29f9c
--- /dev/null
+++ b/arch/s390/kernel/alternative.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/text-patching.h>
+#include <asm/alternative.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+static int __initdata_or_module alt_instr_disabled;
+
+static int __init disable_alternative_instructions(char *str)
+{
+	alt_instr_disabled = 1;
+	return 0;
+}
+
+early_param("noaltinstr", disable_alternative_instructions);
+
+static void __init_or_module __apply_alternatives(struct alt_instr *start,
+						  struct alt_instr *end)
+{
+	struct alt_instr *a;
+	u8 *instr, *replacement;
+
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite previously scanned alternative code.
+	 */
+	for (a = start; a < end; a++) {
+		instr = (u8 *)&a->instr_offset + a->instr_offset;
+		replacement = (u8 *)&a->repl_offset + a->repl_offset;
+
+		if (!__test_facility(a->facility, alt_stfle_fac_list))
+			continue;
+
+		if (unlikely(a->instrlen % 2)) {
+			WARN_ONCE(1, "cpu alternatives instructions length is "
+				     "odd, skipping patching\n");
+			continue;
+		}
+
+		s390_kernel_write(instr, replacement, a->instrlen);
+	}
+}
+
+void __init_or_module apply_alternatives(struct alt_instr *start,
+					 struct alt_instr *end)
+{
+	if (!alt_instr_disabled)
+		__apply_alternatives(start, end);
+}
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+void __init apply_alternative_instructions(void)
+{
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+}
+
+static void do_sync_core(void *info)
+{
+	sync_core();
+}
+
+void text_poke_sync(void)
+{
+	on_each_cpu(do_sync_core, NULL, 1);
+}
+
+void text_poke_sync_lock(void)
+{
+	cpus_read_lock();
+	text_poke_sync();
+	cpus_read_unlock();
+}
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
new file mode 100644
index 0000000000..fa5f6885c7
--- /dev/null
+++ b/arch/s390/kernel/asm-offsets.c
@@ -0,0 +1,190 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed to extract
+ * and format the required data.
+ */
+
+#define ASM_OFFSETS_C
+
+#include <linux/kbuild.h>
+#include <linux/kvm_host.h>
+#include <linux/sched.h>
+#include <linux/purgatory.h>
+#include <linux/pgtable.h>
+#include <linux/ftrace.h>
+#include <asm/idle.h>
+#include <asm/gmap.h>
+#include <asm/stacktrace.h>
+
+int main(void)
+{
+	/* task struct offsets */
+	OFFSET(__TASK_stack, task_struct, stack);
+	OFFSET(__TASK_thread, task_struct, thread);
+	OFFSET(__TASK_pid, task_struct, pid);
+	BLANK();
+	/* thread struct offsets */
+	OFFSET(__THREAD_ksp, thread_struct, ksp);
+	BLANK();
+	/* thread info offsets */
+	OFFSET(__TI_flags, task_struct, thread_info.flags);
+	BLANK();
+	/* pt_regs offsets */
+	OFFSET(__PT_PSW, pt_regs, psw);
+	OFFSET(__PT_GPRS, pt_regs, gprs);
+	OFFSET(__PT_R0, pt_regs, gprs[0]);
+	OFFSET(__PT_R1, pt_regs, gprs[1]);
+	OFFSET(__PT_R2, pt_regs, gprs[2]);
+	OFFSET(__PT_R3, pt_regs, gprs[3]);
+	OFFSET(__PT_R4, pt_regs, gprs[4]);
+	OFFSET(__PT_R5, pt_regs, gprs[5]);
+	OFFSET(__PT_R6, pt_regs, gprs[6]);
+	OFFSET(__PT_R7, pt_regs, gprs[7]);
+	OFFSET(__PT_R8, pt_regs, gprs[8]);
+	OFFSET(__PT_R9, pt_regs, gprs[9]);
+	OFFSET(__PT_R10, pt_regs, gprs[10]);
+	OFFSET(__PT_R11, pt_regs, gprs[11]);
+	OFFSET(__PT_R12, pt_regs, gprs[12]);
+	OFFSET(__PT_R13, pt_regs, gprs[13]);
+	OFFSET(__PT_R14, pt_regs, gprs[14]);
+	OFFSET(__PT_R15, pt_regs, gprs[15]);
+	OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
+	OFFSET(__PT_FLAGS, pt_regs, flags);
+	OFFSET(__PT_CR1, pt_regs, cr1);
+	OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
+	DEFINE(__PT_SIZE, sizeof(struct pt_regs));
+	BLANK();
+	/* stack_frame offsets */
+	OFFSET(__SF_BACKCHAIN, stack_frame, back_chain);
+	OFFSET(__SF_GPRS, stack_frame, gprs);
+	OFFSET(__SF_EMPTY, stack_frame, empty[0]);
+	OFFSET(__SF_SIE_CONTROL, stack_frame, sie_control_block);
+	OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
+	OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
+	OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+	OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
+	DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
+	BLANK();
+	/* idle data offsets */
+	OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
+	OFFSET(__TIMER_IDLE_ENTER, s390_idle_data, timer_idle_enter);
+	OFFSET(__MT_CYCLES_ENTER, s390_idle_data, mt_cycles_enter);
+	BLANK();
+	/* hardware defined lowcore locations 0x000 - 0x1ff */
+	OFFSET(__LC_EXT_PARAMS, lowcore, ext_params);
+	OFFSET(__LC_EXT_CPU_ADDR, lowcore, ext_cpu_addr);
+	OFFSET(__LC_EXT_INT_CODE, lowcore, ext_int_code);
+	OFFSET(__LC_PGM_ILC, lowcore, pgm_ilc);
+	OFFSET(__LC_PGM_INT_CODE, lowcore, pgm_code);
+	OFFSET(__LC_DATA_EXC_CODE, lowcore, data_exc_code);
+	OFFSET(__LC_MON_CLASS_NR, lowcore, mon_class_num);
+	OFFSET(__LC_PER_CODE, lowcore, per_code);
+	OFFSET(__LC_PER_ATMID, lowcore, per_atmid);
+	OFFSET(__LC_PER_ADDRESS, lowcore, per_address);
+	OFFSET(__LC_EXC_ACCESS_ID, lowcore, exc_access_id);
+	OFFSET(__LC_PER_ACCESS_ID, lowcore, per_access_id);
+	OFFSET(__LC_OP_ACCESS_ID, lowcore, op_access_id);
+	OFFSET(__LC_AR_MODE_ID, lowcore, ar_mode_id);
+	OFFSET(__LC_TRANS_EXC_CODE, lowcore, trans_exc_code);
+	OFFSET(__LC_MON_CODE, lowcore, monitor_code);
+	OFFSET(__LC_SUBCHANNEL_ID, lowcore, subchannel_id);
+	OFFSET(__LC_SUBCHANNEL_NR, lowcore, subchannel_nr);
+	OFFSET(__LC_IO_INT_PARM, lowcore, io_int_parm);
+	OFFSET(__LC_IO_INT_WORD, lowcore, io_int_word);
+	OFFSET(__LC_MCCK_CODE, lowcore, mcck_interruption_code);
+	OFFSET(__LC_EXT_DAMAGE_CODE, lowcore, external_damage_code);
+	OFFSET(__LC_MCCK_FAIL_STOR_ADDR, lowcore, failing_storage_address);
+	OFFSET(__LC_PGM_LAST_BREAK, lowcore, pgm_last_break);
+	OFFSET(__LC_RETURN_LPSWE, lowcore, return_lpswe);
+	OFFSET(__LC_RETURN_MCCK_LPSWE, lowcore, return_mcck_lpswe);
+	OFFSET(__LC_RST_OLD_PSW, lowcore, restart_old_psw);
+	OFFSET(__LC_EXT_OLD_PSW, lowcore, external_old_psw);
+	OFFSET(__LC_SVC_OLD_PSW, lowcore, svc_old_psw);
+	OFFSET(__LC_PGM_OLD_PSW, lowcore, program_old_psw);
+	OFFSET(__LC_MCK_OLD_PSW, lowcore, mcck_old_psw);
+	OFFSET(__LC_IO_OLD_PSW, lowcore, io_old_psw);
+	OFFSET(__LC_RST_NEW_PSW, lowcore, restart_psw);
+	OFFSET(__LC_EXT_NEW_PSW, lowcore, external_new_psw);
+	OFFSET(__LC_SVC_NEW_PSW, lowcore, svc_new_psw);
+	OFFSET(__LC_PGM_NEW_PSW, lowcore, program_new_psw);
+	OFFSET(__LC_MCK_NEW_PSW, lowcore, mcck_new_psw);
+	OFFSET(__LC_IO_NEW_PSW, lowcore, io_new_psw);
+	/* software defined lowcore locations 0x200 - 0xdff*/
+	OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
+	OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
+	OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
+	OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
+	OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
+	OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
+	OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
+	OFFSET(__LC_MCCK_ENTER_TIMER, lowcore, mcck_enter_timer);
+	OFFSET(__LC_EXIT_TIMER, lowcore, exit_timer);
+	OFFSET(__LC_LAST_UPDATE_TIMER, lowcore, last_update_timer);
+	OFFSET(__LC_LAST_UPDATE_CLOCK, lowcore, last_update_clock);
+	OFFSET(__LC_INT_CLOCK, lowcore, int_clock);
+	OFFSET(__LC_BOOT_CLOCK, lowcore, boot_clock);
+	OFFSET(__LC_CURRENT, lowcore, current_task);
+	OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
+	OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
+	OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
+	OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
+	OFFSET(__LC_MCCK_STACK, lowcore, mcck_stack);
+	OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
+	OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
+	OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source);
+	OFFSET(__LC_RESTART_FLAGS, lowcore, restart_flags);
+	OFFSET(__LC_KERNEL_ASCE, lowcore, kernel_asce);
+	OFFSET(__LC_USER_ASCE, lowcore, user_asce);
+	OFFSET(__LC_LPP, lowcore, lpp);
+	OFFSET(__LC_CURRENT_PID, lowcore, current_pid);
+	OFFSET(__LC_GMAP, lowcore, gmap);
+	OFFSET(__LC_LAST_BREAK, lowcore, last_break);
+	/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
+	OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
+	OFFSET(__LC_VMCORE_INFO, lowcore, vmcore_info);
+	OFFSET(__LC_OS_INFO, lowcore, os_info);
+	/* hardware defined lowcore locations 0x1000 - 0x18ff */
+	OFFSET(__LC_MCESAD, lowcore, mcesad);
+	OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
+	OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
+	OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
+	OFFSET(__LC_PSW_SAVE_AREA, lowcore, psw_save_area);
+	OFFSET(__LC_PREFIX_SAVE_AREA, lowcore, prefixreg_save_area);
+	OFFSET(__LC_FP_CREG_SAVE_AREA, lowcore, fpt_creg_save_area);
+	OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
+	OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
+	OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+	OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area);
+	OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
+	OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
+	OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
+	BLANK();
+	/* gmap/sie offsets */
+	OFFSET(__GMAP_ASCE, gmap, asce);
+	OFFSET(__SIE_PROG0C, kvm_s390_sie_block, prog0c);
+	OFFSET(__SIE_PROG20, kvm_s390_sie_block, prog20);
+	/* kexec_sha_region */
+	OFFSET(__KEXEC_SHA_REGION_START, kexec_sha_region, start);
+	OFFSET(__KEXEC_SHA_REGION_LEN, kexec_sha_region, len);
+	DEFINE(__KEXEC_SHA_REGION_SIZE, sizeof(struct kexec_sha_region));
+	/* sizeof kernel parameter area */
+	DEFINE(__PARMAREA_SIZE, sizeof(struct parmarea));
+	/* kernel parameter area offsets */
+	DEFINE(IPL_DEVICE, PARMAREA + offsetof(struct parmarea, ipl_device));
+	DEFINE(INITRD_START, PARMAREA + offsetof(struct parmarea, initrd_start));
+	DEFINE(INITRD_SIZE, PARMAREA + offsetof(struct parmarea, initrd_size));
+	DEFINE(OLDMEM_BASE, PARMAREA + offsetof(struct parmarea, oldmem_base));
+	DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
+	DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
+	DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* function graph return value tracing */
+	OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
+	OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
+	DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
+#endif
+	OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
+	DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
+	return 0;
+}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
new file mode 100644
index 0000000000..02051a596b
--- /dev/null
+++ b/arch/s390/kernel/audit.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/audit.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+static unsigned dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+static unsigned read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+static unsigned write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+static unsigned chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+static unsigned signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int audit_classify_arch(int arch)
+{
+#ifdef CONFIG_COMPAT
+	if (arch == AUDIT_ARCH_S390)
+		return 1;
+#endif
+	return 0;
+}
+
+int audit_classify_syscall(int abi, unsigned syscall)
+{
+#ifdef CONFIG_COMPAT
+	if (abi == AUDIT_ARCH_S390)
+		return s390_classify_syscall(syscall);
+#endif
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_NATIVE;
+	}
+}
+
+static int __init audit_classes_init(void)
+{
+#ifdef CONFIG_COMPAT
+	audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class);
+	audit_register_class(AUDIT_CLASS_READ_32, s390_read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR_32, s390_chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL_32, s390_signal_class);
+#endif
+	audit_register_class(AUDIT_CLASS_WRITE, write_class);
+	audit_register_class(AUDIT_CLASS_READ, read_class);
+	audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class);
+	audit_register_class(AUDIT_CLASS_CHATTR, chattr_class);
+	audit_register_class(AUDIT_CLASS_SIGNAL, signal_class);
+	return 0;
+}
+
+__initcall(audit_classes_init);
diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h
new file mode 100644
index 0000000000..4d4b596412
--- /dev/null
+++ b/arch/s390/kernel/audit.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_AUDIT_H
+#define __ARCH_S390_KERNEL_AUDIT_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_COMPAT
+extern int s390_classify_syscall(unsigned);
+extern __u32 s390_dir_class[];
+extern __u32 s390_write_class[];
+extern __u32 s390_read_class[];
+extern __u32 s390_chattr_class[];
+extern __u32 s390_signal_class[];
+#endif /* CONFIG_COMPAT */
+
+#endif /* __ARCH_S390_KERNEL_AUDIT_H */
diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c
new file mode 100644
index 0000000000..56254fa06f
--- /dev/null
+++ b/arch/s390/kernel/cache.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Extract CPU cache information and expose them via sysfs.
+ *
+ *    Copyright IBM Corp. 2012
+ */
+
+#include <linux/seq_file.h>
+#include <linux/cpu.h>
+#include <linux/cacheinfo.h>
+#include <asm/facility.h>
+
+enum {
+	CACHE_SCOPE_NOTEXISTS,
+	CACHE_SCOPE_PRIVATE,
+	CACHE_SCOPE_SHARED,
+	CACHE_SCOPE_RESERVED,
+};
+
+enum {
+	CTYPE_SEPARATE,
+	CTYPE_DATA,
+	CTYPE_INSTRUCTION,
+	CTYPE_UNIFIED,
+};
+
+enum {
+	EXTRACT_TOPOLOGY,
+	EXTRACT_LINE_SIZE,
+	EXTRACT_SIZE,
+	EXTRACT_ASSOCIATIVITY,
+};
+
+enum {
+	CACHE_TI_UNIFIED = 0,
+	CACHE_TI_DATA = 0,
+	CACHE_TI_INSTRUCTION,
+};
+
+struct cache_info {
+	unsigned char	    : 4;
+	unsigned char scope : 2;
+	unsigned char type  : 2;
+};
+
+#define CACHE_MAX_LEVEL 8
+union cache_topology {
+	struct cache_info ci[CACHE_MAX_LEVEL];
+	unsigned long raw;
+};
+
+static const char * const cache_type_string[] = {
+	"",
+	"Instruction",
+	"Data",
+	"",
+	"Unified",
+};
+
+static const enum cache_type cache_type_map[] = {
+	[CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE,
+	[CTYPE_DATA] = CACHE_TYPE_DATA,
+	[CTYPE_INSTRUCTION] = CACHE_TYPE_INST,
+	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
+};
+
+void show_cacheinfo(struct seq_file *m)
+{
+	struct cpu_cacheinfo *this_cpu_ci;
+	struct cacheinfo *cache;
+	int idx;
+
+	this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask));
+	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
+		cache = this_cpu_ci->info_list + idx;
+		seq_printf(m, "cache%-11d: ", idx);
+		seq_printf(m, "level=%d ", cache->level);
+		seq_printf(m, "type=%s ", cache_type_string[cache->type]);
+		seq_printf(m, "scope=%s ",
+			   cache->disable_sysfs ? "Shared" : "Private");
+		seq_printf(m, "size=%dK ", cache->size >> 10);
+		seq_printf(m, "line_size=%u ", cache->coherency_line_size);
+		seq_printf(m, "associativity=%d", cache->ways_of_associativity);
+		seq_puts(m, "\n");
+	}
+}
+
+static inline enum cache_type get_cache_type(struct cache_info *ci, int level)
+{
+	if (level >= CACHE_MAX_LEVEL)
+		return CACHE_TYPE_NOCACHE;
+	ci += level;
+	if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE)
+		return CACHE_TYPE_NOCACHE;
+	return cache_type_map[ci->type];
+}
+
+static inline unsigned long ecag(int ai, int li, int ti)
+{
+	return __ecag(ECAG_CACHE_ATTRIBUTE, ai << 4 | li << 1 | ti);
+}
+
+static void ci_leaf_init(struct cacheinfo *this_leaf, int private,
+			 enum cache_type type, unsigned int level, int cpu)
+{
+	int ti, num_sets;
+
+	if (type == CACHE_TYPE_INST)
+		ti = CACHE_TI_INSTRUCTION;
+	else
+		ti = CACHE_TI_UNIFIED;
+	this_leaf->level = level + 1;
+	this_leaf->type = type;
+	this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti);
+	this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti);
+	this_leaf->size = ecag(EXTRACT_SIZE, level, ti);
+	num_sets = this_leaf->size / this_leaf->coherency_line_size;
+	num_sets /= this_leaf->ways_of_associativity;
+	this_leaf->number_of_sets = num_sets;
+	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+	if (!private)
+		this_leaf->disable_sysfs = true;
+}
+
+int init_cache_level(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int level = 0, leaves = 0;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	if (!this_cpu_ci)
+		return -EINVAL;
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	do {
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_NOCACHE)
+			break;
+		/* Separate instruction and data caches */
+		leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1;
+	} while (++level < CACHE_MAX_LEVEL);
+	this_cpu_ci->num_levels = level;
+	this_cpu_ci->num_leaves = leaves;
+	return 0;
+}
+
+int populate_cache_leaves(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+	unsigned int level, idx, pvt;
+	union cache_topology ct;
+	enum cache_type ctype;
+
+	ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0);
+	for (idx = 0, level = 0; level < this_cpu_ci->num_levels &&
+	     idx < this_cpu_ci->num_leaves; idx++, level++) {
+		if (!this_leaf)
+			return -EINVAL;
+		pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0;
+		ctype = get_cache_type(&ct.ci[0], level);
+		if (ctype == CACHE_TYPE_SEPARATE) {
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level, cpu);
+			ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level, cpu);
+		} else {
+			ci_leaf_init(this_leaf++, pvt, ctype, level, cpu);
+		}
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644
index 0000000000..554447768b
--- /dev/null
+++ b/arch/s390/kernel/cert_store.c
@@ -0,0 +1,812 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s):	Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES		10
+
+#define VCE_FLAGS_VALID_MASK		0x80
+
+#define ISM_LEN_DWORDS			4
+#define VCSSB_LEN_BYTES			128
+#define VCSSB_LEN_NO_CERTS		4
+#define VCB_LEN_NO_CERTS		64
+#define VC_NAME_LEN_BYTES		64
+
+#define CERT_STORE_KEY_TYPE_NAME	"cert_store_key"
+#define CERT_STORE_KEYRING_NAME		"cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+	debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+	DIAG320_SUBCODES	= 0,
+	DIAG320_STORAGE		= 1,
+	DIAG320_CERT_BLOCK	= 2,
+};
+
+enum diag320_rc {
+	DIAG320_RC_OK		= 0x0001,
+	DIAG320_RC_CS_NOMATCH	= 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+	u32 vcssb_length;
+	u8  pad_0x04[3];
+	u8  version;
+	u8  pad_0x08[8];
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u16 total_vc_index_count;
+	u16 max_vc_index_count;
+	u8  pad_0x24[28];
+	u32 max_vce_length;
+	u32 max_vcxe_length;
+	u8  pad_0x48[8];
+	u32 max_single_vcb_length;
+	u32 total_vcb_length;
+	u32 max_single_vcxb_length;
+	u32 total_vcxb_length;
+	u8  pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+	u32 vce_length;
+	u8  flags;
+	u8  key_type;
+	u16 vc_index;
+	u8  vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+	u8  vc_format;
+	u8  pad_0x49;
+	u16 key_id_length;
+	u8  pad_0x4c;
+	u8  vc_hash_type;
+	u16 vc_hash_length;
+	u8  pad_0x50[4];
+	u32 vc_length;
+	u8  pad_0x58[8];
+	u16 vc_hash_offset;
+	u16 vc_offset;
+	u8  pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+	u32 vcb_input_length;
+	u8  pad_0x04[4];
+	u16 first_vc_index;
+	u16 last_vc_index;
+	u32 pad_0x0c;
+	u32 cs_token;
+	u8  pad_0x14[12];
+	u32 vcb_output_length;
+	u8  pad_0x24[3];
+	u8  version;
+	u16 stored_vc_count;
+	u16 remaining_vc_count;
+	u8  pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+	struct vcb_header vcb_hdr;
+	u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+	struct vce_header vce_hdr;
+	u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+	char ascii[VC_NAME_LEN_BYTES + 1];
+
+	/*
+	 * First 64 bytes of the key description is key name in EBCDIC CP 500.
+	 * Convert it to ASCII for displaying in /proc/keys.
+	 */
+	strscpy(ascii, key->description, sizeof(ascii));
+	EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+	seq_puts(m, ascii);
+
+	seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+	if (key_is_positive(key))
+		seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+	.name		= CERT_STORE_KEY_TYPE_NAME,
+	.preparse	= user_preparse,
+	.free_preparse	= user_free_preparse,
+	.instantiate	= generic_key_instantiate,
+	.revoke		= user_revoke,
+	.destroy	= user_destroy,
+	.describe	= cert_store_key_describe,
+	.read		= user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+	pr_dbf_msg("VCB Header:");
+	pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+	pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+	pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+	pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+	pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+	pr_dbf_msg("version: %d", b->vcb_hdr.version);
+	pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+	pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+	unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+	char log_string[VC_NAME_LEN_BYTES + 40];
+
+	pr_dbf_msg("VCE Header:");
+	pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+	pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+	pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+	pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+	pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+	pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+	pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+	pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+	pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+	pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+	pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+	/* Certificate name in ASCII. */
+	memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+	EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+	vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+	snprintf(log_string, sizeof(log_string),
+		 "index: %d vce_hdr.vc_name (ASCII): %s",
+		 e->vce_hdr.vc_index, vc_name);
+	debug_text_event(cert_store_hexdump, 3, log_string);
+
+	/* Certificate data. */
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+	debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+	debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+	debug_event(cert_store_hexdump, 3,
+		    (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+	debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+	pr_dbf_msg("VCSSB:");
+	pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+	pr_dbf_msg("version: %u", s->version);
+	pr_dbf_msg("cs_token: %u", s->cs_token);
+	pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+	pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+	pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+	pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+	pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+	pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+	pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+	union register_pair rp = { .even = (unsigned long)addr, };
+
+	asm volatile(
+		"	diag	%[rp],%[subcode],0x320\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b, 0b)
+		: [rp] "+d" (rp.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+
+	return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X320);
+
+	return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+	u8 hash[SHA256_DIGEST_SIZE];
+	u16 vc_hash_length;
+	u8 *vce_hash;
+
+	vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+	vc_hash_length = vce->vce_hdr.vc_hash_length;
+	sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+	if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+		return 0;
+
+	pr_dbf_msg("SHA256 hash of received certificate does not match");
+	debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+	debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+	debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+	debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+	return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+	if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+		pr_dbf_msg("Certificate entry is invalid");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_format != 1) {
+		pr_dbf_msg("Certificate format is not supported");
+		return -EINVAL;
+	}
+	if (vce->vce_hdr.vc_hash_type != 1) {
+		pr_dbf_msg("Hash type is not supported");
+		return -EINVAL;
+	}
+
+	return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+	key_ref_t us_keyring_ref;
+
+	us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+					 KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+	if (IS_ERR(us_keyring_ref)) {
+		pr_dbf_msg("Couldn't get user session keyring: %ld",
+			   PTR_ERR(us_keyring_ref));
+		return ERR_PTR(-ENOKEY);
+	}
+	key_ref_put(us_keyring_ref);
+	return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+	unsigned long num_keys, key_index;
+	size_t keyring_payload_len;
+	key_serial_t *key_array;
+	struct key *current_key;
+	int rc;
+
+	keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+	num_keys = keyring_payload_len / sizeof(key_serial_t);
+	key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+	if (!key_array)
+		return -ENOMEM;
+
+	rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+	if (rc != keyring_payload_len) {
+		pr_dbf_msg("Couldn't read keyring payload");
+		goto out;
+	}
+
+	for (key_index = 0; key_index < num_keys; key_index++) {
+		current_key = key_lookup(key_array[key_index]);
+		pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+		key_invalidate(current_key);
+		key_put(current_key);
+		rc = key_unlink(keyring, current_key);
+		if (rc) {
+			pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+			break;
+		}
+	}
+out:
+	kfree(key_array);
+	return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+	key_ref_t cs_keyring_ref;
+	struct key *cs_keyring;
+
+	cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+					&key_type_keyring, CERT_STORE_KEYRING_NAME,
+					false);
+	if (!IS_ERR(cs_keyring_ref)) {
+		cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+		key_ref_put(cs_keyring_ref);
+		goto found;
+	}
+	/* Search default locations: thread, process, session keyrings */
+	cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+	if (IS_ERR(cs_keyring))
+		return NULL;
+	key_put(cs_keyring);
+found:
+	return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+	struct key *cs_keyring;
+
+	cs_keyring = find_cs_keyring();
+	if (!cs_keyring)
+		return;
+
+	pr_dbf_msg("Found cert_store keyring. Purging...");
+	/*
+	 * Remove cert_store_key_type in case invalidation
+	 * of old cert_store keys failed (= severe error).
+	 */
+	if (invalidate_keyring_keys(cs_keyring))
+		unregister_key_type(&key_type_cert_store_key);
+
+	keyring_clear(cs_keyring);
+	key_invalidate(cs_keyring);
+	key_put(cs_keyring);
+	key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+	static struct key *cs_keyring;
+
+	/* Cleanup previous cs_keyring and all associated keys if any. */
+	cleanup_cs_keys();
+	cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+				   GLOBAL_ROOT_GID, current_cred(),
+				   (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+				   KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+				   NULL, get_user_session_keyring());
+	if (IS_ERR(cs_keyring)) {
+		pr_dbf_msg("Can't allocate cert_store keyring");
+		return NULL;
+	}
+
+	pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+	/*
+	 * In case a previous clean-up ran into an
+	 * error and unregistered key type.
+	 */
+	register_key_type(&key_type_cert_store_key);
+
+	return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+	size_t len, name_len;
+	u32 cs_token;
+	char *desc;
+
+	cs_token = vcssb->cs_token;
+	/* Description string contains "%64s:%05u:%010u\0". */
+	name_len = sizeof(vce->vce_hdr.vc_name);
+	len = name_len + 1 + 5 + 1 + 10 + 1;
+	desc = kmalloc(len, GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	memcpy(desc, vce->vce_hdr.vc_name, name_len);
+	snprintf(desc + name_len, len - name_len, ":%05u:%010u",
+		 vce->vce_hdr.vc_index, cs_token);
+
+	return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+			       struct key *keyring)
+{
+	key_ref_t newkey;
+	char *desc;
+	int rc;
+
+	desc = get_key_description(vcssb, vce);
+	if (!desc)
+		return -ENOMEM;
+
+	newkey = key_create_or_update(
+		make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+		desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+		vce->vce_hdr.vc_length,
+		(KEY_POS_ALL & ~KEY_POS_SETATTR)  | KEY_USR_VIEW | KEY_USR_READ,
+		KEY_ALLOC_NOT_IN_QUOTA);
+
+	rc = PTR_ERR_OR_ZERO(newkey);
+	if (rc) {
+		pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+		rc = -ENOKEY;
+		goto out;
+	}
+
+	key_ref_put(newkey);
+out:
+	kfree(desc);
+	return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+	int diag320_rc;
+
+	memset(vcssb, 0, sizeof(*vcssb));
+	vcssb->vcssb_length = VCSSB_LEN_BYTES;
+	diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+	pr_dbf_vcssb(vcssb);
+
+	if (diag320_rc != DIAG320_RC_OK) {
+		pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+		return -EIO;
+	}
+	if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+		pr_dbf_msg("No certificates available for current configuration");
+		return -ENOKEY;
+	}
+
+	return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+	return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+	memset(vcb, 0, sizeof(*vcb));
+	vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+	vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+	/* Request single entry. */
+	vcb->vcb_hdr.first_vc_index = index;
+	vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+	struct vce *extracted_vce;
+
+	extracted_vce = (struct vce *)vcb->vcb_buf;
+	memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+	pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+	int rc, diag320_rc;
+
+	fill_vcb_input(vcssb, vcb, index);
+
+	diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+	pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+	pr_dbf_vcb(vcb);
+
+	switch (diag320_rc) {
+	case DIAG320_RC_OK:
+		rc = 0;
+		if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+			pr_dbf_msg("No certificate entry for index %u", index);
+			rc = -ENOKEY;
+		} else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+			/* Retry on insufficient space. */
+			pr_dbf_msg("Couldn't get all requested certificates");
+			rc = -EAGAIN;
+		}
+		break;
+	case DIAG320_RC_CS_NOMATCH:
+		pr_dbf_msg("Certificate Store token mismatch");
+		rc = -EAGAIN;
+		break;
+	default:
+		pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+				 struct key *keyring)
+{
+	struct vcb *vcb;
+	struct vce *vce;
+	int rc;
+
+	rc = -ENOMEM;
+	vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+	vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+	if (!vcb || !vce)
+		goto out;
+
+	rc = get_sevcb(vcssb, index, vcb);
+	if (rc)
+		goto out;
+
+	extract_vce_from_sevcb(vcb, vce);
+	rc = check_certificate_valid(vce);
+	if (rc)
+		goto out;
+
+	rc = create_key_from_vce(vcssb, vce, keyring);
+	if (rc)
+		goto out;
+
+	pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+	vfree(vce);
+	vfree(vcb);
+	return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+	int rc, index, count, added;
+
+	count = 0;
+	added = 0;
+	/* Certificate Store entries indices start with 1 and have no gaps. */
+	for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+		pr_dbf_msg("Creating key from VCE %u", index);
+		rc = create_key_from_sevcb(vcssb, index, keyring);
+		count++;
+
+		if (rc == -EAGAIN)
+			return rc;
+
+		if (rc)
+			pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+		else
+			added++;
+	}
+
+	if (added == 0) {
+		pr_dbf_msg("Processed %d entries. No keys created", count);
+		return -ENOKEY;
+	}
+
+	pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+	/*
+	 * Do not allow to link more keys to certificate store keyring after all
+	 * the VCEs were processed.
+	 */
+	rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+	if (rc)
+		pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+	return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+	unsigned long ism[ISM_LEN_DWORDS];
+	int rc;
+
+	rc = diag320(0, ism);
+	if (rc != DIAG320_RC_OK) {
+		pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+		return -ENOENT;
+	}
+
+	debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+	debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+	if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+		pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+	struct key *cs_keyring;
+	struct vcssb *vcssb;
+	int rc;
+
+	rc = -ENOMEM;
+	vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+	if (!vcssb)
+		goto cleanup_keys;
+
+	rc = -ENOENT;
+	if (!sclp.has_diag320) {
+		pr_dbf_msg("Certificate Store is not supported");
+		goto cleanup_keys;
+	}
+
+	rc = query_diag320_subcodes();
+	if (rc)
+		goto cleanup_keys;
+
+	rc = get_vcssb(vcssb);
+	if (rc)
+		goto cleanup_keys;
+
+	rc = -ENOMEM;
+	cs_keyring = create_cs_keyring();
+	if (!cs_keyring)
+		goto cleanup_keys;
+
+	rc = add_certificates_to_keyring(vcssb, cs_keyring);
+	if (rc)
+		goto cleanup_cs_keyring;
+
+	goto out;
+
+cleanup_cs_keyring:
+	key_put(cs_keyring);
+cleanup_keys:
+	cleanup_cs_keys();
+out:
+	kfree(vcssb);
+	return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	if (cs_status_val == -1)
+		return sysfs_emit(buf, "uninitialized\n");
+	else if (cs_status_val == 0)
+		return sysfs_emit(buf, "ok\n");
+
+	return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	int rc, retries;
+
+	pr_dbf_msg("Refresh certificate store information requested");
+	rc = mutex_lock_interruptible(&cs_refresh_lock);
+	if (rc)
+		return rc;
+
+	for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+		/* Request certificates from certificate store. */
+		rc = fill_cs_keyring();
+		if (rc)
+			pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+		if (rc != -EAGAIN)
+			break;
+	}
+	cs_status_val = rc;
+	mutex_unlock(&cs_refresh_lock);
+
+	return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+	&cs_status_attr.attr,
+	&refresh_attr.attr,
+	NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+	int rc = -ENOMEM;
+
+	cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+	if (!cert_store_dbf)
+		goto cleanup_dbf;
+
+	cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+	if (!cert_store_hexdump)
+		goto cleanup_dbf;
+
+	debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+	debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+	/* Create directory /sys/firmware/cert_store. */
+	cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+	if (!cert_store_kobj)
+		goto cleanup_dbf;
+
+	rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+	if (rc)
+		goto cleanup_kobj;
+
+	register_key_type(&key_type_cert_store_key);
+
+	return rc;
+
+cleanup_kobj:
+	kobject_put(cert_store_kobj);
+cleanup_dbf:
+	debug_unregister(cert_store_dbf);
+	debug_unregister(cert_store_hexdump);
+
+	return rc;
+}
+device_initcall(cert_store_init);
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
new file mode 100644
index 0000000000..a7c46e8310
--- /dev/null
+++ b/arch/s390/kernel/compat_audit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef __s390x__
+#include <linux/audit_arch.h>
+#include <asm/unistd.h>
+#include "audit.h"
+
+unsigned s390_dir_class[] = {
+#include <asm-generic/audit_dir_write.h>
+~0U
+};
+
+unsigned s390_chattr_class[] = {
+#include <asm-generic/audit_change_attr.h>
+~0U
+};
+
+unsigned s390_write_class[] = {
+#include <asm-generic/audit_write.h>
+~0U
+};
+
+unsigned s390_read_class[] = {
+#include <asm-generic/audit_read.h>
+~0U
+};
+
+unsigned s390_signal_class[] = {
+#include <asm-generic/audit_signal.h>
+~0U
+};
+
+int s390_classify_syscall(unsigned syscall)
+{
+	switch(syscall) {
+	case __NR_open:
+		return AUDITSC_OPEN;
+	case __NR_openat:
+		return AUDITSC_OPENAT;
+	case __NR_socketcall:
+		return AUDITSC_SOCKETCALL;
+	case __NR_execve:
+		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+	default:
+		return AUDITSC_COMPAT;
+	}
+}
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
new file mode 100644
index 0000000000..f9d418d1b6
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Gerhard Tonn (ton@de.ibm.com)   
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Conversion between 31bit and 64bit native syscalls.
+ *
+ * Heavily inspired by the 32-bit Sparc compat code which is 
+ * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
+ * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
+ *
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/fs.h> 
+#include <linux/mm.h> 
+#include <linux/file.h> 
+#include <linux/signal.h>
+#include <linux/resource.h>
+#include <linux/times.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/uio.h>
+#include <linux/quota.h>
+#include <linux/poll.h>
+#include <linux/personality.h>
+#include <linux/stat.h>
+#include <linux/filter.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/icmpv6.h>
+#include <linux/syscalls.h>
+#include <linux/sysctl.h>
+#include <linux/binfmts.h>
+#include <linux/capability.h>
+#include <linux/compat.h>
+#include <linux/vfs.h>
+#include <linux/ptrace.h>
+#include <linux/fadvise.h>
+#include <linux/ipc.h>
+#include <linux/slab.h>
+
+#include <asm/types.h>
+#include <linux/uaccess.h>
+
+#include <net/scm.h>
+#include <net/sock.h>
+
+#include "compat_linux.h"
+
+#ifdef CONFIG_SYSVIPC
+COMPAT_SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, compat_ulong_t, second,
+		compat_ulong_t, third, compat_uptr_t, ptr)
+{
+	if (call >> 16)		/* hack for backward compatibility */
+		return -EINVAL;
+	return compat_ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif
+
+COMPAT_SYSCALL_DEFINE3(s390_truncate64, const char __user *, path, u32, high, u32, low)
+{
+	return ksys_truncate(path, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_ftruncate64, unsigned int, fd, u32, high, u32, low)
+{
+	return ksys_ftruncate(fd, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pread64, unsigned int, fd, char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return ksys_pread64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE5(s390_pwrite64, unsigned int, fd, const char __user *, ubuf,
+		       compat_size_t, count, u32, high, u32, low)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL;
+	return ksys_pwrite64(fd, ubuf, count, (unsigned long)high << 32 | low);
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_readahead, int, fd, u32, high, u32, low, s32, count)
+{
+	return ksys_readahead(fd, (unsigned long)high << 32 | low, count);
+}
+
+struct stat64_emu31 {
+	unsigned long long  st_dev;
+	unsigned int    __pad1;
+#define STAT64_HAS_BROKEN_ST_INO        1
+	u32             __st_ino;
+	unsigned int    st_mode;
+	unsigned int    st_nlink;
+	u32             st_uid;
+	u32             st_gid;
+	unsigned long long  st_rdev;
+	unsigned int    __pad3;
+	long            st_size;
+	u32             st_blksize;
+	unsigned char   __pad4[4];
+	u32             __pad5;     /* future possible st_blocks high bits */
+	u32             st_blocks;  /* Number 512-byte blocks allocated. */
+	u32             st_atime;
+	u32             __pad6;
+	u32             st_mtime;
+	u32             __pad7;
+	u32             st_ctime;
+	u32             __pad8;     /* will be high 32 bits of ctime someday */
+	unsigned long   st_ino;
+};	
+
+static int cp_stat64(struct stat64_emu31 __user *ubuf, struct kstat *stat)
+{
+	struct stat64_emu31 tmp;
+
+	memset(&tmp, 0, sizeof(tmp));
+
+	tmp.st_dev = huge_encode_dev(stat->dev);
+	tmp.st_ino = stat->ino;
+	tmp.__st_ino = (u32)stat->ino;
+	tmp.st_mode = stat->mode;
+	tmp.st_nlink = (unsigned int)stat->nlink;
+	tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid);
+	tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid);
+	tmp.st_rdev = huge_encode_dev(stat->rdev);
+	tmp.st_size = stat->size;
+	tmp.st_blksize = (u32)stat->blksize;
+	tmp.st_blocks = (u32)stat->blocks;
+	tmp.st_atime = (u32)stat->atime.tv_sec;
+	tmp.st_mtime = (u32)stat->mtime.tv_sec;
+	tmp.st_ctime = (u32)stat->ctime.tv_sec;
+
+	return copy_to_user(ubuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; 
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_stat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_stat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_lstat64, const char __user *, filename, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_lstat(filename, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE2(s390_fstat64, unsigned int, fd, struct stat64_emu31 __user *, statbuf)
+{
+	struct kstat stat;
+	int ret = vfs_fstat(fd, &stat);
+	if (!ret)
+		ret = cp_stat64(statbuf, &stat);
+	return ret;
+}
+
+COMPAT_SYSCALL_DEFINE4(s390_fstatat64, unsigned int, dfd, const char __user *, filename,
+		       struct stat64_emu31 __user *, statbuf, int, flag)
+{
+	struct kstat stat;
+	int error;
+
+	error = vfs_fstatat(dfd, filename, &stat, flag);
+	if (error)
+		return error;
+	return cp_stat64(statbuf, &stat);
+}
+
+/*
+ * Linux/i386 didn't use to be able to handle more than
+ * 4 system call parameters, so these system calls used a memory
+ * block for parameter passing..
+ */
+
+struct mmap_arg_struct_emu31 {
+	compat_ulong_t addr;
+	compat_ulong_t len;
+	compat_ulong_t prot;
+	compat_ulong_t flags;
+	compat_ulong_t fd;
+	compat_ulong_t offset;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	if (a.offset & ~PAGE_MASK)
+		return -EINVAL;
+	return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+			       a.offset >> PAGE_SHIFT);
+}
+
+COMPAT_SYSCALL_DEFINE1(s390_mmap2, struct mmap_arg_struct_emu31 __user *, arg)
+{
+	struct mmap_arg_struct_emu31 a;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		return -EFAULT;
+	return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_read, unsigned int, fd, char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return ksys_read(fd, buf, count);
+}
+
+COMPAT_SYSCALL_DEFINE3(s390_write, unsigned int, fd, const char __user *, buf, compat_size_t, count)
+{
+	if ((compat_ssize_t) count < 0)
+		return -EINVAL; 
+
+	return ksys_write(fd, buf, count);
+}
+
+/*
+ * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
+ * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
+ * because the 31 bit values differ from the 64 bit values.
+ */
+
+COMPAT_SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, high, u32, low, compat_size_t, len, int, advise)
+{
+	if (advise == 4)
+		advise = POSIX_FADV_DONTNEED;
+	else if (advise == 5)
+		advise = POSIX_FADV_NOREUSE;
+	return ksys_fadvise64_64(fd, (unsigned long)high << 32 | low, len,
+				 advise);
+}
+
+struct fadvise64_64_args {
+	int fd;
+	long long offset;
+	long long len;
+	int advice;
+};
+
+COMPAT_SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args)
+{
+	struct fadvise64_64_args a;
+
+	if ( copy_from_user(&a, args, sizeof(a)) )
+		return -EFAULT;
+	if (a.advice == 4)
+		a.advice = POSIX_FADV_DONTNEED;
+	else if (a.advice == 5)
+		a.advice = POSIX_FADV_NOREUSE;
+	return ksys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_sync_file_range, int, fd, u32, offhigh, u32, offlow,
+		       u32, nhigh, u32, nlow, unsigned int, flags)
+{
+	return ksys_sync_file_range(fd, ((loff_t)offhigh << 32) + offlow,
+				   ((u64)nhigh << 32) + nlow, flags);
+}
+
+COMPAT_SYSCALL_DEFINE6(s390_fallocate, int, fd, int, mode, u32, offhigh, u32, offlow,
+		       u32, lenhigh, u32, lenlow)
+{
+	return ksys_fallocate(fd, mode, ((loff_t)offhigh << 32) + offlow,
+			      ((u64)lenhigh << 32) + lenlow);
+}
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
new file mode 100644
index 0000000000..ef23739b27
--- /dev/null
+++ b/arch/s390/kernel/compat_linux.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390X_S390_H
+#define _ASM_S390X_S390_H
+
+#include <linux/compat.h>
+#include <linux/socket.h>
+#include <linux/syscalls.h>
+#include <asm/ptrace.h>
+
+/*
+ * Macro that masks the high order bit of a 32 bit pointer and
+ * converts it to a 64 bit pointer.
+ */
+#define A(__x)	((unsigned long)((__x) & 0x7FFFFFFFUL))
+#define AA(__x)	((unsigned long)(__x))
+
+/* Now 32bit compatibility types */
+struct ipc_kludge_32 {
+	__u32	msgp;	/* pointer */
+	__s32	msgtyp;
+};
+
+/* asm/sigcontext.h */
+typedef union {
+	__u64	d;
+	__u32	f;
+} freg_t32;
+
+typedef struct {
+	unsigned int	fpc;
+	unsigned int	pad;
+	freg_t32	fprs[__NUM_FPRS];
+} _s390_fp_regs32;
+
+typedef struct {
+	psw_t32		psw;
+	__u32		gprs[__NUM_GPRS];
+	__u32		acrs[__NUM_ACRS];
+} _s390_regs_common32;
+
+typedef struct {
+	_s390_regs_common32 regs;
+	_s390_fp_regs32	    fpregs;
+} _sigregs32;
+
+typedef struct {
+	__u32		gprs_high[__NUM_GPRS];
+	__u64		vxrs_low[__NUM_VXRS_LOW];
+	__vector128	vxrs_high[__NUM_VXRS_HIGH];
+	__u8		__reserved[128];
+} _sigregs_ext32;
+
+#define _SIGCONTEXT_NSIG32	64
+#define _SIGCONTEXT_NSIG_BPW32	32
+#define __SIGNAL_FRAMESIZE32	96
+#define _SIGMASK_COPY_SIZE32	(sizeof(u32) * 2)
+
+struct sigcontext32 {
+	__u32	oldmask[_COMPAT_NSIG_WORDS];
+	__u32	sregs;	/* pointer */
+};
+
+/* asm/signal.h */
+
+/* asm/ucontext.h */
+struct ucontext32 {
+	__u32			uc_flags;
+	__u32			uc_link;	/* pointer */
+	compat_stack_t		uc_stack;
+	_sigregs32		uc_mcontext;
+	compat_sigset_t		uc_sigmask;
+	/* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
+	unsigned char		__unused[128 - sizeof(compat_sigset_t)];
+	_sigregs_ext32		uc_mcontext_ext;
+};
+
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+
+long compat_sys_s390_truncate64(const char __user *path, u32 high, u32 low);
+long compat_sys_s390_ftruncate64(unsigned int fd, u32 high, u32 low);
+long compat_sys_s390_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count, u32 high, u32 low);
+long compat_sys_s390_readahead(int fd, u32 high, u32 low, s32 count);
+long compat_sys_s390_stat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_lstat64(const char __user *filename, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstat64(unsigned int fd, struct stat64_emu31 __user *statbuf);
+long compat_sys_s390_fstatat64(unsigned int dfd, const char __user *filename, struct stat64_emu31 __user *statbuf, int flag);
+long compat_sys_s390_old_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long compat_sys_s390_read(unsigned int fd, char __user *buf, compat_size_t count);
+long compat_sys_s390_write(unsigned int fd, const char __user *buf, compat_size_t count);
+long compat_sys_s390_fadvise64(int fd, u32 high, u32 low, compat_size_t len, int advise);
+long compat_sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long compat_sys_s390_sync_file_range(int fd, u32 offhigh, u32 offlow, u32 nhigh, u32 nlow, unsigned int flags);
+long compat_sys_s390_fallocate(int fd, int mode, u32 offhigh, u32 offlow, u32 lenhigh, u32 lenlow);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+
+#endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
new file mode 100644
index 0000000000..3c400fc7e9
--- /dev/null
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PTRACE32_H
+#define _PTRACE32_H
+
+#include <asm/ptrace.h>    /* needed for NUM_CR_WORDS */
+#include "compat_linux.h"  /* needed for psw_compat_t */
+
+struct compat_per_struct_kernel {
+	__u32 cr9;		/* PER control bits */
+	__u32 cr10;		/* PER starting address */
+	__u32 cr11;		/* PER ending address */
+	__u32 bits;		/* Obsolete software bits */
+	__u32 starting_addr;	/* User specified start address */
+	__u32 ending_addr;	/* User specified end address */
+	__u16 perc_atmid;	/* PER trap ATMID */
+	__u32 address;		/* PER trap instruction address */
+	__u8  access_id;	/* PER trap access identification */
+};
+
+struct compat_user_regs_struct
+{
+	psw_compat_t psw;
+	u32 gprs[NUM_GPRS];
+	u32 acrs[NUM_ACRS];
+	u32 orig_gpr2;
+	/* nb: there's a 4-byte hole here */
+	s390_fp_regs fp_regs;
+	/*
+	 * These per registers are in here so that gdb can modify them
+	 * itself as there is no "official" ptrace interface for hardware
+	 * watchpoints. This is the way intel does it.
+	 */
+	struct compat_per_struct_kernel per_info;
+	u32  ieee_instruction_pointer;	/* obsolete, always 0 */
+};
+
+struct compat_user {
+	/* We start with the registers, to mimic the way that "memory"
+	   is returned from the ptrace(3,...) function.  */
+	struct compat_user_regs_struct regs;
+	/* The rest of this junk is to help gdb figure out what goes where */
+	u32 u_tsize;		/* Text segment size (pages). */
+	u32 u_dsize;	        /* Data segment size (pages). */
+	u32 u_ssize;	        /* Stack segment size (pages). */
+	u32 start_code;         /* Starting virtual address of text. */
+	u32 start_stack;	/* Starting virtual address of stack area.
+				   This is actually the bottom of the stack,
+				   the top of the stack is always found in the
+				   esp register.  */
+	s32 signal;     	 /* Signal that caused the core dump. */
+	u32 u_ar0;               /* Used by gdb to help find the values for */
+	                         /* the registers. */
+	u32 magic;		 /* To uniquely identify a core file */
+	char u_comm[32];	 /* User command that was responsible */
+};
+
+typedef struct
+{
+	__u32   len;
+	__u32   kernel_addr;
+	__u32   process_addr;
+} compat_ptrace_area;
+
+#endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
new file mode 100644
index 0000000000..cecedd01d4
--- /dev/null
+++ b/arch/s390/kernel/compat_signal.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2000, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *               Gerhard Tonn (ton@de.ibm.com)                  
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include <asm/vdso.h>
+#include "compat_linux.h"
+#include "compat_ptrace.h"
+#include "entry.h"
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	struct sigcontext32 sc;
+	_sigregs32 sregs;
+	int signo;
+	_sigregs_ext32 sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+} sigframe32;
+
+typedef struct 
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
+	__u16 svc_insn;
+	compat_siginfo_t info;
+	struct ucontext32 uc;
+} rt_sigframe32;
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+}
+
+static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	user_sregs.regs.psw.mask = (__u32)(regs->psw.mask >> 32);
+	user_sregs.regs.psw.mask &= PSW32_MASK_USER | PSW32_MASK_RI;
+	user_sregs.regs.psw.mask |= PSW32_USER_BITS;
+	user_sregs.regs.psw.addr = (__u32) regs->psw.addr |
+		(__u32)(regs->psw.mask & PSW_MASK_BA);
+	for (i = 0; i < NUM_GPRS; i++)
+		user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	fpregs_store((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
+{
+	_sigregs32 user_sregs;
+	int i;
+
+	/* Always make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, &sregs->regs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW32_MASK_RI))
+		return -EINVAL;
+
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_USER) << 32 |
+		(__u64)(user_sregs.regs.psw.mask & PSW32_MASK_RI) << 32 |
+		(__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_AMODE);
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64)(user_sregs.regs.psw.addr & PSW32_ADDR_INSN);
+	for (i = 0; i < NUM_GPRS; i++)
+		regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+	fpregs_load((_s390_fp_regs *) &user_sregs.fpregs, &current->thread.fpu);
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+static int save_sigregs_ext32(struct pt_regs *regs,
+			      _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save high gprs to signal stack */
+	for (i = 0; i < NUM_GPRS; i++)
+		gprs_high[i] = regs->gprs[i] >> 32;
+	if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+			   sizeof(sregs_ext->gprs_high)))
+		return -EFAULT;
+
+	/* Save vector registers to signal stack */
+	if (MACHINE_HAS_VX) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = current->thread.fpu.vxrs[i].low;
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext32(struct pt_regs *regs,
+				 _sigregs_ext32 __user *sregs_ext)
+{
+	__u32 gprs_high[NUM_GPRS];
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore high gprs from signal stack */
+	if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+			     sizeof(sregs_ext->gprs_high)))
+		return -EFAULT;
+	for (i = 0; i < NUM_GPRS; i++)
+		*(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+	/* Restore vector registers from signal stack */
+	if (MACHINE_HAS_VX) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			current->thread.fpu.vxrs[i].low = vxrs[i];
+	}
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask))
+		goto badframe;
+	set_current_blocked(&set);
+	save_fpu_regs();
+	if (restore_sigregs32(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (get_compat_sigset(&set, &frame->uc.uc_sigmask))
+		goto badframe;
+	set_current_blocked(&set);
+	if (compat_restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	save_fpu_regs();
+	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}	
+
+/*
+ * Set up a signal frame.
+ */
+
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = (unsigned long) A(regs->gprs[15]);
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+			 struct pt_regs *regs)
+{
+	int sig = ksig->sig;
+	sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+	if (!MACHINE_HAS_VX)
+		frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+			      sizeof(frame->sregs_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext32 on the signal stack */
+	if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask,
+			      set, sizeof(compat_sigset_t)))
+		return -EFAULT;
+	if (__put_user(ptr_to_compat(&frame->sregs), &frame->sc.sregs))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs32 on the signal stack */
+	if (save_sigregs32(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext32 on the signal stack */
+	if (save_sigregs_ext32(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		restorer = VDSO32_SYMBOL(current, sigreturn);
+        }
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (__force __u64) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = current->thread.last_break;
+	}
+
+	return 0;
+}
+
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+			    struct pt_regs *regs)
+{
+	rt_sigframe32 __user *frame;
+	unsigned long restorer;
+	size_t frame_size;
+	u32 uc_flags;
+
+	frame_size = sizeof(*frame) -
+		     sizeof(frame->uc.uc_mcontext_ext.__reserved);
+	/*
+	 * gprs_high are always present for 31-bit compat tasks.
+	 * The space for vector registers is only allocated if
+	 * the machine supports it
+	 */
+	uc_flags = UC_GPRS_HIGH;
+	if (MACHINE_HAS_VX) {
+		uc_flags |= UC_VXRS;
+	} else
+		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+		restorer = (unsigned long __force)
+			ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
+	} else {
+		restorer = VDSO32_SYMBOL(current, rt_sigreturn);
+	}
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(0, &frame->uc.uc_link) ||
+	    __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+	    put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) ||
+	    save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (__force __u64) frame;
+	/* Force 31 bit amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (__force __u64) &frame->info;
+	regs->gprs[4] = (__force __u64) &frame->uc;
+	regs->gprs[5] = current->thread.last_break;
+	return 0;
+}
+
+/*
+ * OK, we're invoking a handler
+ */	
+
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame32(ksig, oldset, regs);
+	else
+		ret = setup_frame32(ksig, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
new file mode 100644
index 0000000000..b210a29d3e
--- /dev/null
+++ b/arch/s390/kernel/cpcmd.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2007
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Christian Borntraeger (cborntra@de.ibm.com),
+ */
+
+#define KMSG_COMPONENT "cpcmd"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/cpcmd.h>
+
+static DEFINE_SPINLOCK(cpcmd_lock);
+static char cpcmd_buf[241];
+
+static int diag8_noresponse(int cmdlen)
+{
+	asm volatile(
+		"	diag	%[rx],%[ry],0x8\n"
+		: [ry] "+&d" (cmdlen)
+		: [rx] "d" (__pa(cpcmd_buf))
+		: "cc");
+	return cmdlen;
+}
+
+static int diag8_response(int cmdlen, char *response, int *rlen)
+{
+	union register_pair rx, ry;
+	int cc;
+
+	rx.even = __pa(cpcmd_buf);
+	rx.odd	= __pa(response);
+	ry.even = cmdlen | 0x40000000L;
+	ry.odd	= *rlen;
+	asm volatile(
+		"	diag	%[rx],%[ry],0x8\n"
+		"	ipm	%[cc]\n"
+		"	srl	%[cc],28\n"
+		: [cc] "=&d" (cc), [ry] "+&d" (ry.pair)
+		: [rx] "d" (rx.pair)
+		: "cc");
+	if (cc)
+		*rlen += ry.odd;
+	else
+		*rlen = ry.odd;
+	return ry.even;
+}
+
+/*
+ * __cpcmd has some restrictions over cpcmd
+ *  - __cpcmd is unlocked and therefore not SMP-safe
+ */
+int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	int cmdlen;
+	int rc;
+	int response_len;
+
+	cmdlen = strlen(cmd);
+	BUG_ON(cmdlen > 240);
+	memcpy(cpcmd_buf, cmd, cmdlen);
+	ASCEBC(cpcmd_buf, cmdlen);
+
+	diag_stat_inc(DIAG_STAT_X008);
+	if (response) {
+		memset(response, 0, rlen);
+		response_len = rlen;
+		rc = diag8_response(cmdlen, response, &rlen);
+		EBCASC(response, response_len);
+        } else {
+		rc = diag8_noresponse(cmdlen);
+        }
+	if (response_code)
+		*response_code = rc;
+	return rlen;
+}
+EXPORT_SYMBOL(__cpcmd);
+
+int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
+{
+	unsigned long flags;
+	char *lowbuf;
+	int len;
+
+	if (is_vmalloc_or_module_addr(response)) {
+		lowbuf = kmalloc(rlen, GFP_KERNEL);
+		if (!lowbuf) {
+			pr_warn("The cpcmd kernel function failed to allocate a response buffer\n");
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, lowbuf, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+		memcpy(response, lowbuf, rlen);
+		kfree(lowbuf);
+	} else {
+		spin_lock_irqsave(&cpcmd_lock, flags);
+		len = __cpcmd(cmd, response, rlen, response_code);
+		spin_unlock_irqrestore(&cpcmd_lock, flags);
+	}
+	return len;
+}
+EXPORT_SYMBOL(cpcmd);
diff --git a/arch/s390/kernel/cpufeature.c b/arch/s390/kernel/cpufeature.c
new file mode 100644
index 0000000000..1b2ae42a0c
--- /dev/null
+++ b/arch/s390/kernel/cpufeature.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2022
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/bug.h>
+#include <asm/elf.h>
+
+enum {
+	TYPE_HWCAP,
+	TYPE_FACILITY,
+};
+
+struct s390_cpu_feature {
+	unsigned int type	: 4;
+	unsigned int num	: 28;
+};
+
+static struct s390_cpu_feature s390_cpu_features[MAX_CPU_FEATURES] = {
+	[S390_CPU_FEATURE_MSA]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_MSA},
+	[S390_CPU_FEATURE_VXRS]	= {.type = TYPE_HWCAP, .num = HWCAP_NR_VXRS},
+	[S390_CPU_FEATURE_UV]	= {.type = TYPE_FACILITY, .num = 158},
+};
+
+/*
+ * cpu_have_feature - Test CPU features on module initialization
+ */
+int cpu_have_feature(unsigned int num)
+{
+	struct s390_cpu_feature *feature;
+
+	if (WARN_ON_ONCE(num >= MAX_CPU_FEATURES))
+		return 0;
+	feature = &s390_cpu_features[num];
+	switch (feature->type) {
+	case TYPE_HWCAP:
+		return !!(elf_hwcap & BIT(feature->num));
+	case TYPE_FACILITY:
+		return test_facility(feature->num);
+	default:
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+EXPORT_SYMBOL(cpu_have_feature);
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644
index 0000000000..7af69948b2
--- /dev/null
+++ b/arch/s390/kernel/crash_dump.c
@@ -0,0 +1,652 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/elf.h>
+#include <linux/uio.h>
+#include <asm/asm-offsets.h>
+#include <asm/os_info.h>
+#include <asm/elf.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/maccess.h>
+
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+#define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
+#define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y))))
+
+static struct memblock_region oldmem_region;
+
+static struct memblock_type oldmem_type = {
+	.cnt = 1,
+	.max = 1,
+	.total_size = 0,
+	.regions = &oldmem_region,
+	.name = "oldmem",
+};
+
+struct save_area {
+	struct list_head list;
+	u64 psw[2];
+	u64 ctrs[16];
+	u64 gprs[16];
+	u32 acrs[16];
+	u64 fprs[16];
+	u32 fpc;
+	u32 prefix;
+	u32 todpreg;
+	u64 timer;
+	u64 todcmp;
+	u64 vxrs_low[16];
+	__vector128 vxrs_high[16];
+};
+
+static LIST_HEAD(dump_save_areas);
+
+/*
+ * Allocate a save area
+ */
+struct save_area * __init save_area_alloc(bool is_boot_cpu)
+{
+	struct save_area *sa;
+
+	sa = memblock_alloc(sizeof(*sa), 8);
+	if (!sa)
+		return NULL;
+
+	if (is_boot_cpu)
+		list_add(&sa->list, &dump_save_areas);
+	else
+		list_add_tail(&sa->list, &dump_save_areas);
+	return sa;
+}
+
+/*
+ * Return the address of the save area for the boot CPU
+ */
+struct save_area * __init save_area_boot_cpu(void)
+{
+	return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
+}
+
+/*
+ * Copy CPU registers into the save area
+ */
+void __init save_area_add_regs(struct save_area *sa, void *regs)
+{
+	struct lowcore *lc;
+
+	lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA);
+	memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw));
+	memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs));
+	memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs));
+	memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs));
+	memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs));
+	memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc));
+	memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix));
+	memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg));
+	memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer));
+	memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp));
+}
+
+/*
+ * Copy vector registers into the save area
+ */
+void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs)
+{
+	int i;
+
+	/* Copy lower halves of vector registers 0-15 */
+	for (i = 0; i < 16; i++)
+		sa->vxrs_low[i] = vxrs[i].low;
+	/* Copy vector registers 16-31 */
+	memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128));
+}
+
+static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count)
+{
+	size_t len, copied, res = 0;
+
+	while (count) {
+		if (!oldmem_data.start && src < sclp.hsa_size) {
+			/* Copy from zfcp/nvme dump HSA area */
+			len = min(count, sclp.hsa_size - src);
+			copied = memcpy_hsa_iter(iter, src, len);
+		} else {
+			/* Check for swapped kdump oldmem areas */
+			if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) {
+				src -= oldmem_data.start;
+				len = min(count, oldmem_data.size - src);
+			} else if (oldmem_data.start && src < oldmem_data.size) {
+				len = min(count, oldmem_data.size - src);
+				src += oldmem_data.start;
+			} else {
+				len = count;
+			}
+			copied = memcpy_real_iter(iter, src, len);
+		}
+		count -= copied;
+		src += copied;
+		res += copied;
+		if (copied < len)
+			break;
+	}
+	return res;
+}
+
+int copy_oldmem_kernel(void *dst, unsigned long src, size_t count)
+{
+	struct iov_iter iter;
+	struct kvec kvec;
+
+	kvec.iov_base = dst;
+	kvec.iov_len = count;
+	iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
+	if (copy_oldmem_iter(&iter, src, count) < count)
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize,
+			 unsigned long offset)
+{
+	unsigned long src;
+
+	src = pfn_to_phys(pfn) + offset;
+	return copy_oldmem_iter(iter, src, csize);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+					unsigned long from, unsigned long pfn,
+					unsigned long size, pgprot_t prot)
+{
+	unsigned long size_old;
+	int rc;
+
+	if (pfn < oldmem_data.size >> PAGE_SHIFT) {
+		size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT));
+		rc = remap_pfn_range(vma, from,
+				     pfn + (oldmem_data.start >> PAGE_SHIFT),
+				     size_old, prot);
+		if (rc || size == size_old)
+			return rc;
+		size -= size_old;
+		from += size_old;
+		pfn += size_old >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcp/nvme dump
+ *
+ * We only map available memory above HSA size. Memory below HSA size
+ * is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+					   unsigned long from,
+					   unsigned long pfn,
+					   unsigned long size, pgprot_t prot)
+{
+	unsigned long hsa_end = sclp.hsa_size;
+	unsigned long size_hsa;
+
+	if (pfn < hsa_end >> PAGE_SHIFT) {
+		size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT));
+		if (size == size_hsa)
+			return 0;
+		size -= size_hsa;
+		from += size_hsa;
+		pfn += size_hsa >> PAGE_SHIFT;
+	}
+	return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcp/nvme dump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+			   unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+	if (oldmem_data.start)
+		return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+	else
+		return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+						       prot);
+}
+
+static const char *nt_name(Elf64_Word type)
+{
+	const char *name = "LINUX";
+
+	if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG)
+		name = KEXEC_CORE_NOTE_NAME;
+	return name;
+}
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len,
+			  const char *name)
+{
+	Elf64_Nhdr *note;
+	u64 len;
+
+	note = (Elf64_Nhdr *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = d_len;
+	note->n_type = type;
+	len = sizeof(Elf64_Nhdr);
+
+	memcpy(buf + len, name, note->n_namesz);
+	len = roundup(len + note->n_namesz, 4);
+
+	memcpy(buf + len, desc, note->n_descsz);
+	len = roundup(len + note->n_descsz, 4);
+
+	return PTR_ADD(buf, len);
+}
+
+static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len)
+{
+	return nt_init_name(buf, type, desc, d_len, nt_name(type));
+}
+
+/*
+ * Calculate the size of ELF note
+ */
+static size_t nt_size_name(int d_len, const char *name)
+{
+	size_t size;
+
+	size = sizeof(Elf64_Nhdr);
+	size += roundup(strlen(name) + 1, 4);
+	size += roundup(d_len, 4);
+
+	return size;
+}
+
+static inline size_t nt_size(Elf64_Word type, int d_len)
+{
+	return nt_size_name(d_len, nt_name(type));
+}
+
+/*
+ * Fill ELF notes for one CPU with save area registers
+ */
+static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa)
+{
+	struct elf_prstatus nt_prstatus;
+	elf_fpregset_t nt_fpregset;
+
+	/* Prepare prstatus note */
+	memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+	memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs));
+	memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+	memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs));
+	nt_prstatus.common.pr_pid = cpu;
+	/* Prepare fpregset (floating point) note */
+	memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+	memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc));
+	memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs));
+	/* Create ELF notes for the CPU */
+	ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus));
+	ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset));
+	ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer));
+	ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp));
+	ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg));
+	ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs));
+	ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix));
+	if (MACHINE_HAS_VX) {
+		ptr = nt_init(ptr, NT_S390_VXRS_HIGH,
+			      &sa->vxrs_high, sizeof(sa->vxrs_high));
+		ptr = nt_init(ptr, NT_S390_VXRS_LOW,
+			      &sa->vxrs_low, sizeof(sa->vxrs_low));
+	}
+	return ptr;
+}
+
+/*
+ * Calculate size of ELF notes per cpu
+ */
+static size_t get_cpu_elf_notes_size(void)
+{
+	struct save_area *sa = NULL;
+	size_t size;
+
+	size =	nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus));
+	size +=  nt_size(NT_PRFPREG, sizeof(elf_fpregset_t));
+	size +=  nt_size(NT_S390_TIMER, sizeof(sa->timer));
+	size +=  nt_size(NT_S390_TODCMP, sizeof(sa->todcmp));
+	size +=  nt_size(NT_S390_TODPREG, sizeof(sa->todpreg));
+	size +=  nt_size(NT_S390_CTRS, sizeof(sa->ctrs));
+	size +=  nt_size(NT_S390_PREFIX, sizeof(sa->prefix));
+	if (MACHINE_HAS_VX) {
+		size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high));
+		size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low));
+	}
+
+	return size;
+}
+
+/*
+ * Initialize prpsinfo note (new kernel)
+ */
+static void *nt_prpsinfo(void *ptr)
+{
+	struct elf_prpsinfo prpsinfo;
+
+	memset(&prpsinfo, 0, sizeof(prpsinfo));
+	prpsinfo.pr_sname = 'R';
+	strcpy(prpsinfo.pr_fname, "vmlinux");
+	return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo));
+}
+
+/*
+ * Get vmcoreinfo using lowcore->vmcore_info (new kernel)
+ */
+static void *get_vmcoreinfo_old(unsigned long *size)
+{
+	char nt_name[11], *vmcoreinfo;
+	unsigned long addr;
+	Elf64_Nhdr note;
+
+	if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr)))
+		return NULL;
+	memset(nt_name, 0, sizeof(nt_name));
+	if (copy_oldmem_kernel(&note, addr, sizeof(note)))
+		return NULL;
+	if (copy_oldmem_kernel(nt_name, addr + sizeof(note),
+			       sizeof(nt_name) - 1))
+		return NULL;
+	if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0)
+		return NULL;
+	vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL);
+	if (!vmcoreinfo)
+		return NULL;
+	if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) {
+		kfree(vmcoreinfo);
+		return NULL;
+	}
+	*size = note.n_descsz;
+	return vmcoreinfo;
+}
+
+/*
+ * Initialize vmcoreinfo note (new kernel)
+ */
+static void *nt_vmcoreinfo(void *ptr)
+{
+	const char *name = VMCOREINFO_NOTE_NAME;
+	unsigned long size;
+	void *vmcoreinfo;
+
+	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+	if (vmcoreinfo)
+		return nt_init_name(ptr, 0, vmcoreinfo, size, name);
+
+	vmcoreinfo = get_vmcoreinfo_old(&size);
+	if (!vmcoreinfo)
+		return ptr;
+	ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name);
+	kfree(vmcoreinfo);
+	return ptr;
+}
+
+static size_t nt_vmcoreinfo_size(void)
+{
+	const char *name = VMCOREINFO_NOTE_NAME;
+	unsigned long size;
+	void *vmcoreinfo;
+
+	vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size);
+	if (vmcoreinfo)
+		return nt_size_name(size, name);
+
+	vmcoreinfo = get_vmcoreinfo_old(&size);
+	if (!vmcoreinfo)
+		return 0;
+
+	kfree(vmcoreinfo);
+	return nt_size_name(size, name);
+}
+
+/*
+ * Initialize final note (needed for /proc/vmcore code)
+ */
+static void *nt_final(void *ptr)
+{
+	Elf64_Nhdr *note;
+
+	note = (Elf64_Nhdr *) ptr;
+	note->n_namesz = 0;
+	note->n_descsz = 0;
+	note->n_type = 0;
+	return PTR_ADD(ptr, sizeof(Elf64_Nhdr));
+}
+
+/*
+ * Initialize ELF header (new kernel)
+ */
+static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt)
+{
+	memset(ehdr, 0, sizeof(*ehdr));
+	memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+	ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+	ehdr->e_ident[EI_DATA] = ELFDATA2MSB;
+	ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+	memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+	ehdr->e_type = ET_CORE;
+	ehdr->e_machine = EM_S390;
+	ehdr->e_version = EV_CURRENT;
+	ehdr->e_phoff = sizeof(Elf64_Ehdr);
+	ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+	ehdr->e_phentsize = sizeof(Elf64_Phdr);
+	ehdr->e_phnum = mem_chunk_cnt + 1;
+	return ehdr + 1;
+}
+
+/*
+ * Return CPU count for ELF header (new kernel)
+ */
+static int get_cpu_cnt(void)
+{
+	struct save_area *sa;
+	int cpus = 0;
+
+	list_for_each_entry(sa, &dump_save_areas, list)
+		if (sa->prefix != 0)
+			cpus++;
+	return cpus;
+}
+
+/*
+ * Return memory chunk count for ELF header (new kernel)
+ */
+static int get_mem_chunk_cnt(void)
+{
+	int cnt = 0;
+	u64 idx;
+
+	for_each_physmem_range(idx, &oldmem_type, NULL, NULL)
+		cnt++;
+	return cnt;
+}
+
+/*
+ * Initialize ELF loads (new kernel)
+ */
+static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
+{
+	phys_addr_t start, end;
+	u64 idx;
+
+	for_each_physmem_range(idx, &oldmem_type, &start, &end) {
+		phdr->p_filesz = end - start;
+		phdr->p_type = PT_LOAD;
+		phdr->p_offset = start;
+		phdr->p_vaddr = start;
+		phdr->p_paddr = start;
+		phdr->p_memsz = end - start;
+		phdr->p_flags = PF_R | PF_W | PF_X;
+		phdr->p_align = PAGE_SIZE;
+		phdr++;
+	}
+}
+
+/*
+ * Initialize notes (new kernel)
+ */
+static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
+{
+	struct save_area *sa;
+	void *ptr_start = ptr;
+	int cpu;
+
+	ptr = nt_prpsinfo(ptr);
+
+	cpu = 1;
+	list_for_each_entry(sa, &dump_save_areas, list)
+		if (sa->prefix != 0)
+			ptr = fill_cpu_elf_notes(ptr, cpu++, sa);
+	ptr = nt_vmcoreinfo(ptr);
+	ptr = nt_final(ptr);
+	memset(phdr, 0, sizeof(*phdr));
+	phdr->p_type = PT_NOTE;
+	phdr->p_offset = notes_offset;
+	phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
+	phdr->p_memsz = phdr->p_filesz;
+	return ptr;
+}
+
+static size_t get_elfcorehdr_size(int mem_chunk_cnt)
+{
+	size_t size;
+
+	size = sizeof(Elf64_Ehdr);
+	/* PT_NOTES */
+	size += sizeof(Elf64_Phdr);
+	/* nt_prpsinfo */
+	size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo));
+	/* regsets */
+	size += get_cpu_cnt() * get_cpu_elf_notes_size();
+	/* nt_vmcoreinfo */
+	size += nt_vmcoreinfo_size();
+	/* nt_final */
+	size += sizeof(Elf64_Nhdr);
+	/* PT_LOADS */
+	size += mem_chunk_cnt * sizeof(Elf64_Phdr);
+
+	return size;
+}
+
+/*
+ * Create ELF core header (new kernel)
+ */
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+	Elf64_Phdr *phdr_notes, *phdr_loads;
+	size_t alloc_size;
+	int mem_chunk_cnt;
+	void *ptr, *hdr;
+	u64 hdr_off;
+
+	/* If we are not in kdump or zfcp/nvme dump mode return */
+	if (!oldmem_data.start && !is_ipl_type_dump())
+		return 0;
+	/* If we cannot get HSA size for zfcp/nvme dump return error */
+	if (is_ipl_type_dump() && !sclp.hsa_size)
+		return -ENODEV;
+
+	/* For kdump, exclude previous crashkernel memory */
+	if (oldmem_data.start) {
+		oldmem_region.base = oldmem_data.start;
+		oldmem_region.size = oldmem_data.size;
+		oldmem_type.total_size = oldmem_data.size;
+	}
+
+	mem_chunk_cnt = get_mem_chunk_cnt();
+
+	alloc_size = get_elfcorehdr_size(mem_chunk_cnt);
+
+	hdr = kzalloc(alloc_size, GFP_KERNEL);
+
+	/* Without elfcorehdr /proc/vmcore cannot be created. Thus creating
+	 * a dump with this crash kernel will fail. Panic now to allow other
+	 * dump mechanisms to take over.
+	 */
+	if (!hdr)
+		panic("s390 kdump allocating elfcorehdr failed");
+
+	/* Init elf header */
+	ptr = ehdr_init(hdr, mem_chunk_cnt);
+	/* Init program headers */
+	phdr_notes = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr));
+	phdr_loads = ptr;
+	ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt);
+	/* Init notes */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
+	/* Init loads */
+	hdr_off = PTR_DIFF(ptr, hdr);
+	loads_init(phdr_loads, hdr_off);
+	*addr = (unsigned long long) hdr;
+	*size = (unsigned long long) hdr_off;
+	BUG_ON(elfcorehdr_size > alloc_size);
+	return 0;
+}
+
+/*
+ * Free ELF core header (new kernel)
+ */
+void elfcorehdr_free(unsigned long long addr)
+{
+	kfree((void *)(unsigned long)addr);
+}
+
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+
+	memcpy(buf, src, count);
+	*ppos += count;
+	return count;
+}
+
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+	void *src = (void *)(unsigned long)*ppos;
+
+	memcpy(buf, src, count);
+	*ppos += count;
+	return count;
+}
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
new file mode 100644
index 0000000000..a85e0c3e70
--- /dev/null
+++ b/arch/s390/kernel/debug.c
@@ -0,0 +1,1574 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   S/390 debug facility
+ *
+ *    Copyright IBM Corp. 1999, 2020
+ *
+ *    Author(s): Michael Holzheu (holzheu@de.ibm.com),
+ *		 Holger Smolinski (Holger.Smolinski@de.ibm.com)
+ *
+ *    Bugreports to: <Linux390@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "s390dbf"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/sysctl.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/minmax.h>
+#include <linux/debugfs.h>
+
+#include <asm/debug.h>
+
+#define DEBUG_PROLOG_ENTRY -1
+
+#define ALL_AREAS 0 /* copy all debug areas */
+#define NO_AREAS  1 /* copy no debug areas */
+
+/* typedefs */
+
+typedef struct file_private_info {
+	loff_t offset;			/* offset of last read in file */
+	int    act_area;		/* number of last formated area */
+	int    act_page;		/* act page in given area */
+	int    act_entry;		/* last formated entry (offset */
+					/* relative to beginning of last */
+					/* formated page) */
+	size_t act_entry_offset;	/* up to this offset we copied */
+					/* in last read the last formated */
+					/* entry to userland */
+	char   temp_buf[2048];		/* buffer for output */
+	debug_info_t *debug_info_org;	/* original debug information */
+	debug_info_t *debug_info_snap;	/* snapshot of debug information */
+	struct debug_view *view;	/* used view of debug info */
+} file_private_info_t;
+
+typedef struct {
+	char *string;
+	/*
+	 * This assumes that all args are converted into longs
+	 * on L/390 this is the case for all types of parameter
+	 * except of floats, and long long (32 bit)
+	 *
+	 */
+	long args[];
+} debug_sprintf_entry_t;
+
+/* internal function prototyes */
+
+static int debug_init(void);
+static ssize_t debug_output(struct file *file, char __user *user_buf,
+			    size_t user_len, loff_t *offset);
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+			   size_t user_len, loff_t *offset);
+static int debug_open(struct inode *inode, struct file *file);
+static int debug_close(struct inode *inode, struct file *file);
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+				       int nr_areas, int buf_size, umode_t mode);
+static void debug_info_get(debug_info_t *);
+static void debug_info_put(debug_info_t *);
+static int debug_prolog_level_fn(debug_info_t *id,
+				 struct debug_view *view, char *out_buf);
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_prolog_pages_fn(debug_info_t *id,
+				 struct debug_view *view, char *out_buf);
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_buf_size, loff_t *offset);
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+				     char *out_buf, const char *in_buf);
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+				   char *out_buf, const char *inbuf);
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b);
+static void debug_events_append(debug_info_t *dest, debug_info_t *src);
+
+/* globals */
+
+struct debug_view debug_hex_ascii_view = {
+	"hex_ascii",
+	NULL,
+	&debug_dflt_header_fn,
+	&debug_hex_ascii_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_hex_ascii_view);
+
+static struct debug_view debug_level_view = {
+	"level",
+	&debug_prolog_level_fn,
+	NULL,
+	NULL,
+	&debug_input_level_fn,
+	NULL
+};
+
+static struct debug_view debug_pages_view = {
+	"pages",
+	&debug_prolog_pages_fn,
+	NULL,
+	NULL,
+	&debug_input_pages_fn,
+	NULL
+};
+
+static struct debug_view debug_flush_view = {
+	"flush",
+	NULL,
+	NULL,
+	NULL,
+	&debug_input_flush_fn,
+	NULL
+};
+
+struct debug_view debug_sprintf_view = {
+	"sprintf",
+	NULL,
+	&debug_dflt_header_fn,
+	&debug_sprintf_format_fn,
+	NULL,
+	NULL
+};
+EXPORT_SYMBOL(debug_sprintf_view);
+
+/* used by dump analysis tools to determine version of debug feature */
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
+
+/* static globals */
+
+static debug_info_t *debug_area_first;
+static debug_info_t *debug_area_last;
+static DEFINE_MUTEX(debug_mutex);
+
+static int initialized;
+static int debug_critical;
+
+static const struct file_operations debug_file_ops = {
+	.owner	 = THIS_MODULE,
+	.read	 = debug_output,
+	.write	 = debug_input,
+	.open	 = debug_open,
+	.release = debug_close,
+	.llseek  = no_llseek,
+};
+
+static struct dentry *debug_debugfs_root_entry;
+
+/* functions */
+
+/*
+ * debug_areas_alloc
+ * - Debug areas are implemented as a threedimensonal array:
+ *   areas[areanumber][pagenumber][pageoffset]
+ */
+
+static debug_entry_t ***debug_areas_alloc(int pages_per_area, int nr_areas)
+{
+	debug_entry_t ***areas;
+	int i, j;
+
+	areas = kmalloc_array(nr_areas, sizeof(debug_entry_t **), GFP_KERNEL);
+	if (!areas)
+		goto fail_malloc_areas;
+	for (i = 0; i < nr_areas; i++) {
+		/* GFP_NOWARN to avoid user triggerable WARN, we handle fails */
+		areas[i] = kmalloc_array(pages_per_area,
+					 sizeof(debug_entry_t *),
+					 GFP_KERNEL | __GFP_NOWARN);
+		if (!areas[i])
+			goto fail_malloc_areas2;
+		for (j = 0; j < pages_per_area; j++) {
+			areas[i][j] = kzalloc(PAGE_SIZE, GFP_KERNEL);
+			if (!areas[i][j]) {
+				for (j--; j >= 0 ; j--)
+					kfree(areas[i][j]);
+				kfree(areas[i]);
+				goto fail_malloc_areas2;
+			}
+		}
+	}
+	return areas;
+
+fail_malloc_areas2:
+	for (i--; i >= 0; i--) {
+		for (j = 0; j < pages_per_area; j++)
+			kfree(areas[i][j]);
+		kfree(areas[i]);
+	}
+	kfree(areas);
+fail_malloc_areas:
+	return NULL;
+}
+
+/*
+ * debug_info_alloc
+ * - alloc new debug-info
+ */
+static debug_info_t *debug_info_alloc(const char *name, int pages_per_area,
+				      int nr_areas, int buf_size, int level,
+				      int mode)
+{
+	debug_info_t *rc;
+
+	/* alloc everything */
+	rc = kmalloc(sizeof(debug_info_t), GFP_KERNEL);
+	if (!rc)
+		goto fail_malloc_rc;
+	rc->active_entries = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if (!rc->active_entries)
+		goto fail_malloc_active_entries;
+	rc->active_pages = kcalloc(nr_areas, sizeof(int), GFP_KERNEL);
+	if (!rc->active_pages)
+		goto fail_malloc_active_pages;
+	if ((mode == ALL_AREAS) && (pages_per_area != 0)) {
+		rc->areas = debug_areas_alloc(pages_per_area, nr_areas);
+		if (!rc->areas)
+			goto fail_malloc_areas;
+	} else {
+		rc->areas = NULL;
+	}
+
+	/* initialize members */
+	spin_lock_init(&rc->lock);
+	rc->pages_per_area = pages_per_area;
+	rc->nr_areas	   = nr_areas;
+	rc->active_area    = 0;
+	rc->level	   = level;
+	rc->buf_size	   = buf_size;
+	rc->entry_size	   = sizeof(debug_entry_t) + buf_size;
+	strscpy(rc->name, name, sizeof(rc->name));
+	memset(rc->views, 0, DEBUG_MAX_VIEWS * sizeof(struct debug_view *));
+	memset(rc->debugfs_entries, 0, DEBUG_MAX_VIEWS * sizeof(struct dentry *));
+	refcount_set(&(rc->ref_count), 0);
+
+	return rc;
+
+fail_malloc_areas:
+	kfree(rc->active_pages);
+fail_malloc_active_pages:
+	kfree(rc->active_entries);
+fail_malloc_active_entries:
+	kfree(rc);
+fail_malloc_rc:
+	return NULL;
+}
+
+/*
+ * debug_areas_free
+ * - free all debug areas
+ */
+static void debug_areas_free(debug_info_t *db_info)
+{
+	int i, j;
+
+	if (!db_info->areas)
+		return;
+	for (i = 0; i < db_info->nr_areas; i++) {
+		for (j = 0; j < db_info->pages_per_area; j++)
+			kfree(db_info->areas[i][j]);
+		kfree(db_info->areas[i]);
+	}
+	kfree(db_info->areas);
+	db_info->areas = NULL;
+}
+
+/*
+ * debug_info_free
+ * - free memory debug-info
+ */
+static void debug_info_free(debug_info_t *db_info)
+{
+	debug_areas_free(db_info);
+	kfree(db_info->active_entries);
+	kfree(db_info->active_pages);
+	kfree(db_info);
+}
+
+/*
+ * debug_info_create
+ * - create new debug-info
+ */
+
+static debug_info_t *debug_info_create(const char *name, int pages_per_area,
+				       int nr_areas, int buf_size, umode_t mode)
+{
+	debug_info_t *rc;
+
+	rc = debug_info_alloc(name, pages_per_area, nr_areas, buf_size,
+			      DEBUG_DEFAULT_LEVEL, ALL_AREAS);
+	if (!rc)
+		goto out;
+
+	rc->mode = mode & ~S_IFMT;
+	refcount_set(&rc->ref_count, 1);
+out:
+	return rc;
+}
+
+/*
+ * debug_info_copy
+ * - copy debug-info
+ */
+static debug_info_t *debug_info_copy(debug_info_t *in, int mode)
+{
+	unsigned long flags;
+	debug_info_t *rc;
+	int i, j;
+
+	/* get a consistent copy of the debug areas */
+	do {
+		rc = debug_info_alloc(in->name, in->pages_per_area,
+			in->nr_areas, in->buf_size, in->level, mode);
+		spin_lock_irqsave(&in->lock, flags);
+		if (!rc)
+			goto out;
+		/* has something changed in the meantime ? */
+		if ((rc->pages_per_area == in->pages_per_area) &&
+		    (rc->nr_areas == in->nr_areas)) {
+			break;
+		}
+		spin_unlock_irqrestore(&in->lock, flags);
+		debug_info_free(rc);
+	} while (1);
+
+	if (mode == NO_AREAS)
+		goto out;
+
+	for (i = 0; i < in->nr_areas; i++) {
+		for (j = 0; j < in->pages_per_area; j++)
+			memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE);
+	}
+out:
+	spin_unlock_irqrestore(&in->lock, flags);
+	return rc;
+}
+
+/*
+ * debug_info_get
+ * - increments reference count for debug-info
+ */
+static void debug_info_get(debug_info_t *db_info)
+{
+	if (db_info)
+		refcount_inc(&db_info->ref_count);
+}
+
+/*
+ * debug_info_put:
+ * - decreases reference count for debug-info and frees it if necessary
+ */
+static void debug_info_put(debug_info_t *db_info)
+{
+	if (!db_info)
+		return;
+	if (refcount_dec_and_test(&db_info->ref_count))
+		debug_info_free(db_info);
+}
+
+/*
+ * debug_format_entry:
+ * - format one debug entry and return size of formated data
+ */
+static int debug_format_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id_snap	= p_info->debug_info_snap;
+	struct debug_view *view = p_info->view;
+	debug_entry_t *act_entry;
+	size_t len = 0;
+
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+		/* print prolog */
+		if (view->prolog_proc)
+			len += view->prolog_proc(id_snap, view, p_info->temp_buf);
+		goto out;
+	}
+	if (!id_snap->areas) /* this is true, if we have a prolog only view */
+		goto out;    /* or if 'pages_per_area' is 0 */
+	act_entry = (debug_entry_t *) ((char *)id_snap->areas[p_info->act_area]
+				       [p_info->act_page] + p_info->act_entry);
+
+	if (act_entry->clock == 0LL)
+		goto out; /* empty entry */
+	if (view->header_proc)
+		len += view->header_proc(id_snap, view, p_info->act_area,
+					 act_entry, p_info->temp_buf + len);
+	if (view->format_proc)
+		len += view->format_proc(id_snap, view, p_info->temp_buf + len,
+					 DEBUG_DATA(act_entry));
+out:
+	return len;
+}
+
+/*
+ * debug_next_entry:
+ * - goto next entry in p_info
+ */
+static inline int debug_next_entry(file_private_info_t *p_info)
+{
+	debug_info_t *id;
+
+	id = p_info->debug_info_snap;
+	if (p_info->act_entry == DEBUG_PROLOG_ENTRY) {
+		p_info->act_entry = 0;
+		p_info->act_page  = 0;
+		goto out;
+	}
+	if (!id->areas)
+		return 1;
+	p_info->act_entry += id->entry_size;
+	/* switch to next page, if we reached the end of the page  */
+	if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) {
+		/* next page */
+		p_info->act_entry = 0;
+		p_info->act_page += 1;
+		if ((p_info->act_page % id->pages_per_area) == 0) {
+			/* next area */
+			p_info->act_area++;
+			p_info->act_page = 0;
+		}
+		if (p_info->act_area >= id->nr_areas)
+			return 1;
+	}
+out:
+	return 0;
+}
+
+/*
+ * debug_output:
+ * - called for user read()
+ * - copies formated debug entries to the user buffer
+ */
+static ssize_t debug_output(struct file *file,		/* file descriptor */
+			    char __user *user_buf,	/* user buffer */
+			    size_t len,			/* length of buffer */
+			    loff_t *offset)		/* offset in the file */
+{
+	size_t count = 0;
+	size_t entry_offset;
+	file_private_info_t *p_info;
+
+	p_info = (file_private_info_t *) file->private_data;
+	if (*offset != p_info->offset)
+		return -EPIPE;
+	if (p_info->act_area >= p_info->debug_info_snap->nr_areas)
+		return 0;
+	entry_offset = p_info->act_entry_offset;
+	while (count < len) {
+		int formatted_line_residue;
+		int formatted_line_size;
+		int user_buf_residue;
+		size_t copy_size;
+
+		formatted_line_size = debug_format_entry(p_info);
+		formatted_line_residue = formatted_line_size - entry_offset;
+		user_buf_residue = len-count;
+		copy_size = min(user_buf_residue, formatted_line_residue);
+		if (copy_size) {
+			if (copy_to_user(user_buf + count, p_info->temp_buf
+					 + entry_offset, copy_size))
+				return -EFAULT;
+			count += copy_size;
+			entry_offset += copy_size;
+		}
+		if (copy_size == formatted_line_residue) {
+			entry_offset = 0;
+			if (debug_next_entry(p_info))
+				goto out;
+		}
+	}
+out:
+	p_info->offset		 = *offset + count;
+	p_info->act_entry_offset = entry_offset;
+	*offset = p_info->offset;
+	return count;
+}
+
+/*
+ * debug_input:
+ * - called for user write()
+ * - calls input function of view
+ */
+static ssize_t debug_input(struct file *file, const char __user *user_buf,
+			   size_t length, loff_t *offset)
+{
+	file_private_info_t *p_info;
+	int rc = 0;
+
+	mutex_lock(&debug_mutex);
+	p_info = ((file_private_info_t *) file->private_data);
+	if (p_info->view->input_proc) {
+		rc = p_info->view->input_proc(p_info->debug_info_org,
+					      p_info->view, file, user_buf,
+					      length, offset);
+	} else {
+		rc = -EPERM;
+	}
+	mutex_unlock(&debug_mutex);
+	return rc; /* number of input characters */
+}
+
+/*
+ * debug_open:
+ * - called for user open()
+ * - copies formated output to private_data area of the file
+ *   handle
+ */
+static int debug_open(struct inode *inode, struct file *file)
+{
+	debug_info_t *debug_info, *debug_info_snapshot;
+	file_private_info_t *p_info;
+	int i, rc = 0;
+
+	mutex_lock(&debug_mutex);
+	debug_info = file_inode(file)->i_private;
+	/* find debug view */
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!debug_info->views[i])
+			continue;
+		else if (debug_info->debugfs_entries[i] == file->f_path.dentry)
+			goto found; /* found view ! */
+	}
+	/* no entry found */
+	rc = -EINVAL;
+	goto out;
+
+found:
+
+	/* Make snapshot of current debug areas to get it consistent.	  */
+	/* To copy all the areas is only needed, if we have a view which  */
+	/* formats the debug areas. */
+
+	if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc)
+		debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS);
+	else
+		debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS);
+
+	if (!debug_info_snapshot) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL);
+	if (!p_info) {
+		debug_info_free(debug_info_snapshot);
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_info->offset = 0;
+	p_info->debug_info_snap = debug_info_snapshot;
+	p_info->debug_info_org	= debug_info;
+	p_info->view = debug_info->views[i];
+	p_info->act_area = 0;
+	p_info->act_page = 0;
+	p_info->act_entry = DEBUG_PROLOG_ENTRY;
+	p_info->act_entry_offset = 0;
+	file->private_data = p_info;
+	debug_info_get(debug_info);
+	nonseekable_open(inode, file);
+out:
+	mutex_unlock(&debug_mutex);
+	return rc;
+}
+
+/*
+ * debug_close:
+ * - called for user close()
+ * - deletes  private_data area of the file handle
+ */
+static int debug_close(struct inode *inode, struct file *file)
+{
+	file_private_info_t *p_info;
+
+	p_info = (file_private_info_t *) file->private_data;
+	if (p_info->debug_info_snap)
+		debug_info_free(p_info->debug_info_snap);
+	debug_info_put(p_info->debug_info_org);
+	kfree(file->private_data);
+	return 0; /* success */
+}
+
+/* Create debugfs entries and add to internal list. */
+static void _debug_register(debug_info_t *id)
+{
+	/* create root directory */
+	id->debugfs_root_entry = debugfs_create_dir(id->name,
+						    debug_debugfs_root_entry);
+
+	/* append new element to linked list */
+	if (!debug_area_first) {
+		/* first element in list */
+		debug_area_first = id;
+		id->prev = NULL;
+	} else {
+		/* append element to end of list */
+		debug_area_last->next = id;
+		id->prev = debug_area_last;
+	}
+	debug_area_last = id;
+	id->next = NULL;
+
+	debug_register_view(id, &debug_level_view);
+	debug_register_view(id, &debug_flush_view);
+	debug_register_view(id, &debug_pages_view);
+}
+
+/**
+ * debug_register_mode() - creates and initializes debug area.
+ *
+ * @name:	Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area:	Number of pages, which will be allocated per area
+ * @nr_areas:	Number of debug areas
+ * @buf_size:	Size of data area in each debug entry
+ * @mode:	File mode for debugfs files. E.g. S_IRWXUGO
+ * @uid:	User ID for debugfs files. Currently only 0 is supported.
+ * @gid:	Group ID for debugfs files. Currently only 0 is supported.
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * Must not be called within an interrupt handler.
+ */
+debug_info_t *debug_register_mode(const char *name, int pages_per_area,
+				  int nr_areas, int buf_size, umode_t mode,
+				  uid_t uid, gid_t gid)
+{
+	debug_info_t *rc = NULL;
+
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		pr_warn("Root becomes the owner of all s390dbf files in sysfs\n");
+	BUG_ON(!initialized);
+
+	/* create new debug_info */
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
+	if (rc) {
+		mutex_lock(&debug_mutex);
+		_debug_register(rc);
+		mutex_unlock(&debug_mutex);
+	} else {
+		pr_err("Registering debug feature %s failed\n", name);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_mode);
+
+/**
+ * debug_register() - creates and initializes debug area with default file mode.
+ *
+ * @name:	Name of debug log (e.g. used for debugfs entry)
+ * @pages_per_area:	Number of pages, which will be allocated per area
+ * @nr_areas:	Number of debug areas
+ * @buf_size:	Size of data area in each debug entry
+ *
+ * Return:
+ * - Handle for generated debug area
+ * - %NULL if register failed
+ *
+ * Allocates memory for a debug log.
+ * The debugfs file mode access permissions are read and write for user.
+ * Must not be called within an interrupt handler.
+ */
+debug_info_t *debug_register(const char *name, int pages_per_area,
+			     int nr_areas, int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
+EXPORT_SYMBOL(debug_register);
+
+/**
+ * debug_register_static() - registers a static debug area
+ *
+ * @id: Handle for static debug area
+ * @pages_per_area: Number of pages per area
+ * @nr_areas: Number of debug areas
+ *
+ * Register debug_info_t defined using DEFINE_STATIC_DEBUG_INFO.
+ *
+ * Note: This function is called automatically via an initcall generated by
+ *	 DEFINE_STATIC_DEBUG_INFO.
+ */
+void debug_register_static(debug_info_t *id, int pages_per_area, int nr_areas)
+{
+	unsigned long flags;
+	debug_info_t *copy;
+
+	if (!initialized) {
+		pr_err("Tried to register debug feature %s too early\n",
+		       id->name);
+		return;
+	}
+
+	copy = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+				id->level, ALL_AREAS);
+	if (!copy) {
+		pr_err("Registering debug feature %s failed\n", id->name);
+
+		/* Clear pointers to prevent tracing into released initdata. */
+		spin_lock_irqsave(&id->lock, flags);
+		id->areas = NULL;
+		id->active_pages = NULL;
+		id->active_entries = NULL;
+		spin_unlock_irqrestore(&id->lock, flags);
+
+		return;
+	}
+
+	/* Replace static trace area with dynamic copy. */
+	spin_lock_irqsave(&id->lock, flags);
+	debug_events_append(copy, id);
+	debug_areas_swap(id, copy);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	/* Clear pointers to initdata and discard copy. */
+	copy->areas = NULL;
+	copy->active_pages = NULL;
+	copy->active_entries = NULL;
+	debug_info_free(copy);
+
+	mutex_lock(&debug_mutex);
+	_debug_register(id);
+	mutex_unlock(&debug_mutex);
+}
+
+/* Remove debugfs entries and remove from internal list. */
+static void _debug_unregister(debug_info_t *id)
+{
+	int i;
+
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!id->views[i])
+			continue;
+		debugfs_remove(id->debugfs_entries[i]);
+	}
+	debugfs_remove(id->debugfs_root_entry);
+	if (id == debug_area_first)
+		debug_area_first = id->next;
+	if (id == debug_area_last)
+		debug_area_last = id->prev;
+	if (id->prev)
+		id->prev->next = id->next;
+	if (id->next)
+		id->next->prev = id->prev;
+}
+
+/**
+ * debug_unregister() - give back debug area.
+ *
+ * @id:		handle for debug log
+ *
+ * Return:
+ *    none
+ */
+void debug_unregister(debug_info_t *id)
+{
+	if (!id)
+		return;
+	mutex_lock(&debug_mutex);
+	_debug_unregister(id);
+	mutex_unlock(&debug_mutex);
+
+	debug_info_put(id);
+}
+EXPORT_SYMBOL(debug_unregister);
+
+/*
+ * debug_set_size:
+ * - set area size (number of pages) and number of areas
+ */
+static int debug_set_size(debug_info_t *id, int nr_areas, int pages_per_area)
+{
+	debug_info_t *new_id;
+	unsigned long flags;
+
+	if (!id || (nr_areas <= 0) || (pages_per_area < 0))
+		return -EINVAL;
+
+	new_id = debug_info_alloc("", pages_per_area, nr_areas, id->buf_size,
+				  id->level, ALL_AREAS);
+	if (!new_id) {
+		pr_info("Allocating memory for %i pages failed\n",
+			pages_per_area);
+		return -ENOMEM;
+	}
+
+	spin_lock_irqsave(&id->lock, flags);
+	debug_events_append(new_id, id);
+	debug_areas_swap(new_id, id);
+	debug_info_free(new_id);
+	spin_unlock_irqrestore(&id->lock, flags);
+	pr_info("%s: set new size (%i pages)\n", id->name, pages_per_area);
+
+	return 0;
+}
+
+/**
+ * debug_set_level() - Sets new actual debug level if new_level is valid.
+ *
+ * @id:		handle for debug log
+ * @new_level:	new debug level
+ *
+ * Return:
+ *    none
+ */
+void debug_set_level(debug_info_t *id, int new_level)
+{
+	unsigned long flags;
+
+	if (!id)
+		return;
+
+	if (new_level == DEBUG_OFF_LEVEL) {
+		pr_info("%s: switched off\n", id->name);
+	} else if ((new_level > DEBUG_MAX_LEVEL) || (new_level < 0)) {
+		pr_info("%s: level %i is out of range (%i - %i)\n",
+			id->name, new_level, 0, DEBUG_MAX_LEVEL);
+		return;
+	}
+
+	spin_lock_irqsave(&id->lock, flags);
+	id->level = new_level;
+	spin_unlock_irqrestore(&id->lock, flags);
+}
+EXPORT_SYMBOL(debug_set_level);
+
+/*
+ * proceed_active_entry:
+ * - set active entry to next in the ring buffer
+ */
+static inline void proceed_active_entry(debug_info_t *id)
+{
+	if ((id->active_entries[id->active_area] += id->entry_size)
+	    > (PAGE_SIZE - id->entry_size)) {
+		id->active_entries[id->active_area] = 0;
+		id->active_pages[id->active_area] =
+			(id->active_pages[id->active_area] + 1) %
+			id->pages_per_area;
+	}
+}
+
+/*
+ * proceed_active_area:
+ * - set active area to next in the ring buffer
+ */
+static inline void proceed_active_area(debug_info_t *id)
+{
+	id->active_area++;
+	id->active_area = id->active_area % id->nr_areas;
+}
+
+/*
+ * get_active_entry:
+ */
+static inline debug_entry_t *get_active_entry(debug_info_t *id)
+{
+	return (debug_entry_t *) (((char *) id->areas[id->active_area]
+				   [id->active_pages[id->active_area]]) +
+				  id->active_entries[id->active_area]);
+}
+
+/* Swap debug areas of a and b. */
+static void debug_areas_swap(debug_info_t *a, debug_info_t *b)
+{
+	swap(a->nr_areas, b->nr_areas);
+	swap(a->pages_per_area, b->pages_per_area);
+	swap(a->areas, b->areas);
+	swap(a->active_area, b->active_area);
+	swap(a->active_pages, b->active_pages);
+	swap(a->active_entries, b->active_entries);
+}
+
+/* Append all debug events in active area from source to destination log. */
+static void debug_events_append(debug_info_t *dest, debug_info_t *src)
+{
+	debug_entry_t *from, *to, *last;
+
+	if (!src->areas || !dest->areas)
+		return;
+
+	/* Loop over all entries in src, starting with oldest. */
+	from = get_active_entry(src);
+	last = from;
+	do {
+		if (from->clock != 0LL) {
+			to = get_active_entry(dest);
+			memset(to, 0, dest->entry_size);
+			memcpy(to, from, min(src->entry_size,
+					     dest->entry_size));
+			proceed_active_entry(dest);
+		}
+
+		proceed_active_entry(src);
+		from = get_active_entry(src);
+	} while (from != last);
+}
+
+/*
+ * debug_finish_entry:
+ * - set timestamp, caller address, cpu number etc.
+ */
+
+static inline void debug_finish_entry(debug_info_t *id, debug_entry_t *active,
+				      int level, int exception)
+{
+	unsigned long timestamp;
+	union tod_clock clk;
+
+	store_tod_clock_ext(&clk);
+	timestamp = clk.us;
+	timestamp -= TOD_UNIX_EPOCH >> 12;
+	active->clock = timestamp;
+	active->cpu = smp_processor_id();
+	active->caller = __builtin_return_address(0);
+	active->exception = exception;
+	active->level = level;
+	proceed_active_entry(id);
+	if (exception)
+		proceed_active_area(id);
+}
+
+static int debug_stoppable = 1;
+static int debug_active = 1;
+
+#define CTL_S390DBF_STOPPABLE 5678
+#define CTL_S390DBF_ACTIVE 5679
+
+/*
+ * proc handler for the running debug_active sysctl
+ * always allow read, allow write only if debug_stoppable is set or
+ * if debug_active is already off
+ */
+static int s390dbf_procactive(struct ctl_table *table, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (!write || debug_stoppable || !debug_active)
+		return proc_dointvec(table, write, buffer, lenp, ppos);
+	else
+		return 0;
+}
+
+static struct ctl_table s390dbf_table[] = {
+	{
+		.procname	= "debug_stoppable",
+		.data		= &debug_stoppable,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "debug_active",
+		.data		= &debug_active,
+		.maxlen		= sizeof(int),
+		.mode		= S_IRUGO | S_IWUSR,
+		.proc_handler	= s390dbf_procactive,
+	},
+	{ }
+};
+
+static struct ctl_table_header *s390dbf_sysctl_header;
+
+/**
+ * debug_stop_all() - stops the debug feature if stopping is allowed.
+ *
+ * Return:
+ * -   none
+ *
+ * Currently used in case of a kernel oops.
+ */
+void debug_stop_all(void)
+{
+	if (debug_stoppable)
+		debug_active = 0;
+}
+EXPORT_SYMBOL(debug_stop_all);
+
+/**
+ * debug_set_critical() - event/exception functions try lock instead of spin.
+ *
+ * Return:
+ * -   none
+ *
+ * Currently used in case of stopping all CPUs but the current one.
+ * Once in this state, functions to write a debug entry for an
+ * event or exception no longer spin on the debug area lock,
+ * but only try to get it and fail if they do not get the lock.
+ */
+void debug_set_critical(void)
+{
+	debug_critical = 1;
+}
+
+/*
+ * debug_event_common:
+ * - write debug entry with given size
+ */
+debug_entry_t *debug_event_common(debug_info_t *id, int level, const void *buf,
+				  int len)
+{
+	debug_entry_t *active;
+	unsigned long flags;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	do {
+		active = get_active_entry(id);
+		memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+		if (len < id->buf_size)
+			memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+		debug_finish_entry(id, active, level, 0);
+		len -= id->buf_size;
+		buf += id->buf_size;
+	} while (len > 0);
+
+	spin_unlock_irqrestore(&id->lock, flags);
+	return active;
+}
+EXPORT_SYMBOL(debug_event_common);
+
+/*
+ * debug_exception_common:
+ * - write debug entry with given size and switch to next debug area
+ */
+debug_entry_t *debug_exception_common(debug_info_t *id, int level,
+				      const void *buf, int len)
+{
+	debug_entry_t *active;
+	unsigned long flags;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	do {
+		active = get_active_entry(id);
+		memcpy(DEBUG_DATA(active), buf, min(len, id->buf_size));
+		if (len < id->buf_size)
+			memset((DEBUG_DATA(active)) + len, 0, id->buf_size - len);
+		debug_finish_entry(id, active, level, len <= id->buf_size);
+		len -= id->buf_size;
+		buf += id->buf_size;
+	} while (len > 0);
+
+	spin_unlock_irqrestore(&id->lock, flags);
+	return active;
+}
+EXPORT_SYMBOL(debug_exception_common);
+
+/*
+ * counts arguments in format string for sprintf view
+ */
+static inline int debug_count_numargs(char *string)
+{
+	int numargs = 0;
+
+	while (*string) {
+		if (*string++ == '%')
+			numargs++;
+	}
+	return numargs;
+}
+
+/*
+ * debug_sprintf_event:
+ */
+debug_entry_t *__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
+{
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+	unsigned long flags;
+	int numargs, idx;
+	va_list ap;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+	numargs = debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	active = get_active_entry(id);
+	curr_event = (debug_sprintf_entry_t *) DEBUG_DATA(active);
+	va_start(ap, string);
+	curr_event->string = string;
+	for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+		curr_event->args[idx] = va_arg(ap, long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 0);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_event);
+
+/*
+ * debug_sprintf_exception:
+ */
+debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
+{
+	debug_sprintf_entry_t *curr_event;
+	debug_entry_t *active;
+	unsigned long flags;
+	int numargs, idx;
+	va_list ap;
+
+	if (!debug_active || !id->areas)
+		return NULL;
+
+	numargs = debug_count_numargs(string);
+
+	if (debug_critical) {
+		if (!spin_trylock_irqsave(&id->lock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&id->lock, flags);
+	}
+	active = get_active_entry(id);
+	curr_event = (debug_sprintf_entry_t *)DEBUG_DATA(active);
+	va_start(ap, string);
+	curr_event->string = string;
+	for (idx = 0; idx < min(numargs, (int)(id->buf_size / sizeof(long)) - 1); idx++)
+		curr_event->args[idx] = va_arg(ap, long);
+	va_end(ap);
+	debug_finish_entry(id, active, level, 1);
+	spin_unlock_irqrestore(&id->lock, flags);
+
+	return active;
+}
+EXPORT_SYMBOL(__debug_sprintf_exception);
+
+/**
+ * debug_register_view() - registers new debug view and creates debugfs
+ *			   dir entry
+ *
+ * @id:		handle for debug log
+ * @view:	pointer to debug view struct
+ *
+ * Return:
+ * -   0  : ok
+ * -   < 0: Error
+ */
+int debug_register_view(debug_info_t *id, struct debug_view *view)
+{
+	unsigned long flags;
+	struct dentry *pde;
+	umode_t mode;
+	int rc = 0;
+	int i;
+
+	if (!id)
+		goto out;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
+				  id, &debug_file_ops);
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (!id->views[i])
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS) {
+		rc = -1;
+	} else {
+		id->views[i] = view;
+		id->debugfs_entries[i] = pde;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	if (rc) {
+		pr_err("Registering view %s/%s would exceed the maximum "
+		       "number of views %i\n", id->name, view->name, i);
+		debugfs_remove(pde);
+	}
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_register_view);
+
+/**
+ * debug_unregister_view() - unregisters debug view and removes debugfs
+ *			     dir entry
+ *
+ * @id:		handle for debug log
+ * @view:	pointer to debug view struct
+ *
+ * Return:
+ * -   0  : ok
+ * -   < 0: Error
+ */
+int debug_unregister_view(debug_info_t *id, struct debug_view *view)
+{
+	struct dentry *dentry = NULL;
+	unsigned long flags;
+	int i, rc = 0;
+
+	if (!id)
+		goto out;
+	spin_lock_irqsave(&id->lock, flags);
+	for (i = 0; i < DEBUG_MAX_VIEWS; i++) {
+		if (id->views[i] == view)
+			break;
+	}
+	if (i == DEBUG_MAX_VIEWS) {
+		rc = -1;
+	} else {
+		dentry = id->debugfs_entries[i];
+		id->views[i] = NULL;
+		id->debugfs_entries[i] = NULL;
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+	debugfs_remove(dentry);
+out:
+	return rc;
+}
+EXPORT_SYMBOL(debug_unregister_view);
+
+static inline char *debug_get_user_string(const char __user *user_buf,
+					  size_t user_len)
+{
+	char *buffer;
+
+	buffer = kmalloc(user_len + 1, GFP_KERNEL);
+	if (!buffer)
+		return ERR_PTR(-ENOMEM);
+	if (copy_from_user(buffer, user_buf, user_len) != 0) {
+		kfree(buffer);
+		return ERR_PTR(-EFAULT);
+	}
+	/* got the string, now strip linefeed. */
+	if (buffer[user_len - 1] == '\n')
+		buffer[user_len - 1] = 0;
+	else
+		buffer[user_len] = 0;
+	return buffer;
+}
+
+static inline int debug_get_uint(char *buf)
+{
+	int rc;
+
+	buf = skip_spaces(buf);
+	rc = simple_strtoul(buf, &buf, 10);
+	if (*buf)
+		rc = -EINVAL;
+	return rc;
+}
+
+/*
+ * functions for debug-views
+ ***********************************
+*/
+
+/*
+ * prints out actual debug level
+ */
+
+static int debug_prolog_pages_fn(debug_info_t *id, struct debug_view *view,
+				 char *out_buf)
+{
+	return sprintf(out_buf, "%i\n", id->pages_per_area);
+}
+
+/*
+ * reads new size (number of pages per debug area)
+ */
+
+static int debug_input_pages_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	int rc, new_pages;
+	char *str;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf, user_len);
+	if (IS_ERR(str)) {
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	new_pages = debug_get_uint(str);
+	if (new_pages < 0) {
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = debug_set_size(id, id->nr_areas, new_pages);
+	if (rc != 0) {
+		rc = -EINVAL;
+		goto free_str;
+	}
+	rc = user_len;
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * prints out actual debug level
+ */
+static int debug_prolog_level_fn(debug_info_t *id, struct debug_view *view,
+				 char *out_buf)
+{
+	int rc = 0;
+
+	if (id->level == DEBUG_OFF_LEVEL)
+		rc = sprintf(out_buf, "-\n");
+	else
+		rc = sprintf(out_buf, "%i\n", id->level);
+	return rc;
+}
+
+/*
+ * reads new debug level
+ */
+static int debug_input_level_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	int rc, new_level;
+	char *str;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	str = debug_get_user_string(user_buf, user_len);
+	if (IS_ERR(str)) {
+		rc = PTR_ERR(str);
+		goto out;
+	}
+	if (str[0] == '-') {
+		debug_set_level(id, DEBUG_OFF_LEVEL);
+		rc = user_len;
+		goto free_str;
+	} else {
+		new_level = debug_get_uint(str);
+	}
+	if (new_level < 0) {
+		pr_warn("%s is not a valid level for a debug feature\n", str);
+		rc = -EINVAL;
+	} else {
+		debug_set_level(id, new_level);
+		rc = user_len;
+	}
+free_str:
+	kfree(str);
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * flushes debug areas
+ */
+static void debug_flush(debug_info_t *id, int area)
+{
+	unsigned long flags;
+	int i, j;
+
+	if (!id || !id->areas)
+		return;
+	spin_lock_irqsave(&id->lock, flags);
+	if (area == DEBUG_FLUSH_ALL) {
+		id->active_area = 0;
+		memset(id->active_entries, 0, id->nr_areas * sizeof(int));
+		for (i = 0; i < id->nr_areas; i++) {
+			id->active_pages[i] = 0;
+			for (j = 0; j < id->pages_per_area; j++)
+				memset(id->areas[i][j], 0, PAGE_SIZE);
+		}
+	} else if (area >= 0 && area < id->nr_areas) {
+		id->active_entries[area] = 0;
+		id->active_pages[area] = 0;
+		for (i = 0; i < id->pages_per_area; i++)
+			memset(id->areas[area][i], 0, PAGE_SIZE);
+	}
+	spin_unlock_irqrestore(&id->lock, flags);
+}
+
+/*
+ * view function: flushes debug areas
+ */
+static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view,
+				struct file *file, const char __user *user_buf,
+				size_t user_len, loff_t *offset)
+{
+	char input_buf[1];
+	int rc = user_len;
+
+	if (user_len > 0x10000)
+		user_len = 0x10000;
+	if (*offset != 0) {
+		rc = -EPIPE;
+		goto out;
+	}
+	if (copy_from_user(input_buf, user_buf, 1)) {
+		rc = -EFAULT;
+		goto out;
+	}
+	if (input_buf[0] == '-') {
+		debug_flush(id, DEBUG_FLUSH_ALL);
+		goto out;
+	}
+	if (isdigit(input_buf[0])) {
+		int area = ((int) input_buf[0] - (int) '0');
+
+		debug_flush(id, area);
+		goto out;
+	}
+
+	pr_info("Flushing debug data failed because %c is not a valid "
+		 "area\n", input_buf[0]);
+
+out:
+	*offset += user_len;
+	return rc;		/* number of input characters */
+}
+
+/*
+ * prints debug data in hex/ascii format
+ */
+static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view,
+				     char *out_buf, const char *in_buf)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < id->buf_size; i++)
+		rc += sprintf(out_buf + rc, "%02x ", ((unsigned char *) in_buf)[i]);
+	rc += sprintf(out_buf + rc, "| ");
+	for (i = 0; i < id->buf_size; i++) {
+		unsigned char c = in_buf[i];
+
+		if (isascii(c) && isprint(c))
+			rc += sprintf(out_buf + rc, "%c", c);
+		else
+			rc += sprintf(out_buf + rc, ".");
+	}
+	rc += sprintf(out_buf + rc, "\n");
+	return rc;
+}
+
+/*
+ * prints header for debug entry
+ */
+int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view,
+			 int area, debug_entry_t *entry, char *out_buf)
+{
+	unsigned long sec, usec;
+	unsigned long caller;
+	unsigned int level;
+	char *except_str;
+	int rc = 0;
+
+	level = entry->level;
+	sec = entry->clock;
+	usec = do_div(sec, USEC_PER_SEC);
+
+	if (entry->exception)
+		except_str = "*";
+	else
+		except_str = "-";
+	caller = (unsigned long) entry->caller;
+	rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %04u %px  ",
+		      area, sec, usec, level, except_str,
+		      entry->cpu, (void *)caller);
+	return rc;
+}
+EXPORT_SYMBOL(debug_dflt_header_fn);
+
+/*
+ * prints debug data sprintf-formated:
+ * debug_sprinf_event/exception calls must be used together with this view
+ */
+
+#define DEBUG_SPRINTF_MAX_ARGS 10
+
+static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view,
+				   char *out_buf, const char *inbuf)
+{
+	debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf;
+	int num_longs, num_used_args = 0, i, rc = 0;
+	int index[DEBUG_SPRINTF_MAX_ARGS];
+
+	/* count of longs fit into one entry */
+	num_longs = id->buf_size / sizeof(long);
+
+	if (num_longs < 1)
+		goto out; /* bufsize of entry too small */
+	if (num_longs == 1) {
+		/* no args, we use only the string */
+		strcpy(out_buf, curr_event->string);
+		rc = strlen(curr_event->string);
+		goto out;
+	}
+
+	/* number of arguments used for sprintf (without the format string) */
+	num_used_args = min(DEBUG_SPRINTF_MAX_ARGS, (num_longs - 1));
+
+	memset(index, 0, DEBUG_SPRINTF_MAX_ARGS * sizeof(int));
+
+	for (i = 0; i < num_used_args; i++)
+		index[i] = i;
+
+	rc = sprintf(out_buf, curr_event->string, curr_event->args[index[0]],
+		     curr_event->args[index[1]], curr_event->args[index[2]],
+		     curr_event->args[index[3]], curr_event->args[index[4]],
+		     curr_event->args[index[5]], curr_event->args[index[6]],
+		     curr_event->args[index[7]], curr_event->args[index[8]],
+		     curr_event->args[index[9]]);
+out:
+	return rc;
+}
+
+/*
+ * debug_init:
+ * - is called exactly once to initialize the debug feature
+ */
+static int __init debug_init(void)
+{
+	s390dbf_sysctl_header = register_sysctl("s390dbf", s390dbf_table);
+	mutex_lock(&debug_mutex);
+	debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT, NULL);
+	initialized = 1;
+	mutex_unlock(&debug_mutex);
+	return 0;
+}
+postcore_initcall(debug_init);
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
new file mode 100644
index 0000000000..f9f06cd8fc
--- /dev/null
+++ b/arch/s390/kernel/diag.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implementation of s390 diagnose codes
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
+#include <asm/asm-extable.h>
+#include <asm/diag.h>
+#include <asm/trace/diag.h>
+#include <asm/sections.h>
+#include "entry.h"
+
+struct diag_stat {
+	unsigned int counter[NR_DIAG_STAT];
+};
+
+static DEFINE_PER_CPU(struct diag_stat, diag_stat);
+
+struct diag_desc {
+	int code;
+	char *name;
+};
+
+static const struct diag_desc diag_map[NR_DIAG_STAT] = {
+	[DIAG_STAT_X008] = { .code = 0x008, .name = "Console Function" },
+	[DIAG_STAT_X00C] = { .code = 0x00c, .name = "Pseudo Timer" },
+	[DIAG_STAT_X010] = { .code = 0x010, .name = "Release Pages" },
+	[DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" },
+	[DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" },
+	[DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" },
+	[DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" },
+	[DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" },
+	[DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" },
+	[DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" },
+	[DIAG_STAT_X210] = { .code = 0x210, .name = "Device Information" },
+	[DIAG_STAT_X224] = { .code = 0x224, .name = "EBCDIC-Name Table" },
+	[DIAG_STAT_X250] = { .code = 0x250, .name = "Block I/O" },
+	[DIAG_STAT_X258] = { .code = 0x258, .name = "Page-Reference Services" },
+	[DIAG_STAT_X26C] = { .code = 0x26c, .name = "Certain System Information" },
+	[DIAG_STAT_X288] = { .code = 0x288, .name = "Time Bomb" },
+	[DIAG_STAT_X2C4] = { .code = 0x2c4, .name = "FTP Services" },
+	[DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" },
+	[DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
+	[DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
+	[DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+	[DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
+	[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
+};
+
+struct diag_ops __amode31_ref diag_amode31_ops = {
+	.diag210 = _diag210_amode31,
+	.diag26c = _diag26c_amode31,
+	.diag14 = _diag14_amode31,
+	.diag0c = _diag0c_amode31,
+	.diag8c = _diag8c_amode31,
+	.diag308_reset = _diag308_reset_amode31
+};
+
+static struct diag210 _diag210_tmp_amode31 __section(".amode31.data");
+struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31;
+
+static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data");
+static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31;
+
+static int show_diag_stat(struct seq_file *m, void *v)
+{
+	struct diag_stat *stat;
+	unsigned long n = (unsigned long) v - 1;
+	int cpu, prec, tmp;
+
+	cpus_read_lock();
+	if (n == 0) {
+		seq_puts(m, "         ");
+
+		for_each_online_cpu(cpu) {
+			prec = 10;
+			for (tmp = 10; cpu >= tmp; tmp *= 10)
+				prec--;
+			seq_printf(m, "%*s%d", prec, "CPU", cpu);
+		}
+		seq_putc(m, '\n');
+	} else if (n <= NR_DIAG_STAT) {
+		seq_printf(m, "diag %03x:", diag_map[n-1].code);
+		for_each_online_cpu(cpu) {
+			stat = &per_cpu(diag_stat, cpu);
+			seq_printf(m, " %10u", stat->counter[n-1]);
+		}
+		seq_printf(m, "    %s\n", diag_map[n-1].name);
+	}
+	cpus_read_unlock();
+	return 0;
+}
+
+static void *show_diag_stat_start(struct seq_file *m, loff_t *pos)
+{
+	return *pos <= NR_DIAG_STAT ? (void *)((unsigned long) *pos + 1) : NULL;
+}
+
+static void *show_diag_stat_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return show_diag_stat_start(m, pos);
+}
+
+static void show_diag_stat_stop(struct seq_file *m, void *v)
+{
+}
+
+static const struct seq_operations show_diag_stat_sops = {
+	.start	= show_diag_stat_start,
+	.next	= show_diag_stat_next,
+	.stop	= show_diag_stat_stop,
+	.show	= show_diag_stat,
+};
+
+DEFINE_SEQ_ATTRIBUTE(show_diag_stat);
+
+static int __init show_diag_stat_init(void)
+{
+	debugfs_create_file("diag_stat", 0400, NULL, NULL,
+			    &show_diag_stat_fops);
+	return 0;
+}
+
+device_initcall(show_diag_stat_init);
+
+void diag_stat_inc(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_s390_diagnose(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc);
+
+void notrace diag_stat_inc_norecursion(enum diag_stat_enum nr)
+{
+	this_cpu_inc(diag_stat.counter[nr]);
+	trace_s390_diagnose_norecursion(diag_map[nr].code);
+}
+EXPORT_SYMBOL(diag_stat_inc_norecursion);
+
+/*
+ * Diagnose 14: Input spool file manipulation
+ */
+int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
+{
+	diag_stat_inc(DIAG_STAT_X014);
+	return diag_amode31_ops.diag14(rx, ry1, subcode);
+}
+EXPORT_SYMBOL(diag14);
+
+static inline int __diag204(unsigned long *subcode, unsigned long size, void *addr)
+{
+	union register_pair rp = { .even = *subcode, .odd = size };
+
+	asm volatile(
+		"	diag	%[addr],%[rp],0x204\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: [rp] "+&d" (rp.pair) : [addr] "d" (addr) : "memory");
+	*subcode = rp.even;
+	return rp.odd;
+}
+
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+	if (addr) {
+		if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+			return -1;
+		if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+			return -1;
+	}
+	if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+		addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
+	diag_stat_inc(DIAG_STAT_X204);
+	size = __diag204(&subcode, size, addr);
+	if (subcode)
+		return -1;
+	return size;
+}
+EXPORT_SYMBOL(diag204);
+
+/*
+ * Diagnose 210: Get information about a virtual device
+ */
+int diag210(struct diag210 *addr)
+{
+	static DEFINE_SPINLOCK(diag210_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag210_lock, flags);
+	*__diag210_tmp_amode31 = *addr;
+
+	diag_stat_inc(DIAG_STAT_X210);
+	ccode = diag_amode31_ops.diag210(__diag210_tmp_amode31);
+
+	*addr = *__diag210_tmp_amode31;
+	spin_unlock_irqrestore(&diag210_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag210);
+
+/*
+ * Diagnose 8C: Access 3270 Display Device Information
+ */
+int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
+{
+	static DEFINE_SPINLOCK(diag8c_lock);
+	unsigned long flags;
+	int ccode;
+
+	spin_lock_irqsave(&diag8c_lock, flags);
+
+	diag_stat_inc(DIAG_STAT_X08C);
+	ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr));
+
+	*addr = *__diag8c_tmp_amode31;
+	spin_unlock_irqrestore(&diag8c_lock, flags);
+
+	return ccode;
+}
+EXPORT_SYMBOL(diag8c);
+
+int diag224(void *ptr)
+{
+	int rc = -EOPNOTSUPP;
+
+	diag_stat_inc(DIAG_STAT_X224);
+	asm volatile(
+		"	diag	%1,%2,0x224\n"
+		"0:	lhi	%0,0x0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+	return rc;
+}
+EXPORT_SYMBOL(diag224);
+
+/*
+ * Diagnose 26C: Access Certain System Information
+ */
+int diag26c(void *req, void *resp, enum diag26c_sc subcode)
+{
+	diag_stat_inc(DIAG_STAT_X26C);
+	return diag_amode31_ops.diag26c(req, resp, subcode);
+}
+EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
new file mode 100644
index 0000000000..89dc826a8d
--- /dev/null
+++ b/arch/s390/kernel/dis.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Disassemble s390 instructions.
+ *
+ * Copyright IBM Corp. 2007
+ * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/timer.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/reboot.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/io.h>
+#include <asm/dis.h>
+#include <asm/cpcmd.h>
+#include <asm/lowcore.h>
+#include <asm/debug.h>
+#include <asm/irq.h>
+
+/* Type of operand */
+#define OPERAND_GPR	0x1	/* Operand printed as %rx */
+#define OPERAND_FPR	0x2	/* Operand printed as %fx */
+#define OPERAND_AR	0x4	/* Operand printed as %ax */
+#define OPERAND_CR	0x8	/* Operand printed as %cx */
+#define OPERAND_VR	0x10	/* Operand printed as %vx */
+#define OPERAND_DISP	0x20	/* Operand printed as displacement */
+#define OPERAND_BASE	0x40	/* Operand printed as base register */
+#define OPERAND_INDEX	0x80	/* Operand printed as index register */
+#define OPERAND_PCREL	0x100	/* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED	0x200	/* Operand printed as signed value */
+#define OPERAND_LENGTH	0x400	/* Operand printed as length (+1) */
+
+struct s390_operand {
+	unsigned char bits;	/* The number of bits in the operand. */
+	unsigned char shift;	/* The number of bits to shift. */
+	unsigned short flags;	/* One bit syntax flags. */
+};
+
+struct s390_insn {
+	union {
+		const char name[5];
+		struct {
+			unsigned char zero;
+			unsigned int offset;
+		} __packed;
+	};
+	unsigned char opfrag;
+	unsigned char format;
+};
+
+struct s390_opcode_offset {
+	unsigned char opcode;
+	unsigned char mask;
+	unsigned char byte;
+	unsigned short offset;
+	unsigned short count;
+} __packed;
+
+enum {
+	UNUSED,
+	A_8,	/* Access reg. starting at position 8 */
+	A_12,	/* Access reg. starting at position 12 */
+	A_24,	/* Access reg. starting at position 24 */
+	A_28,	/* Access reg. starting at position 28 */
+	B_16,	/* Base register starting at position 16 */
+	B_32,	/* Base register starting at position 32 */
+	C_8,	/* Control reg. starting at position 8 */
+	C_12,	/* Control reg. starting at position 12 */
+	D20_20, /* 20 bit displacement starting at 20 */
+	D_20,	/* Displacement starting at position 20 */
+	D_36,	/* Displacement starting at position 36 */
+	F_8,	/* FPR starting at position 8 */
+	F_12,	/* FPR starting at position 12 */
+	F_16,	/* FPR starting at position 16 */
+	F_24,	/* FPR starting at position 24 */
+	F_28,	/* FPR starting at position 28 */
+	F_32,	/* FPR starting at position 32 */
+	I8_8,	/* 8 bit signed value starting at 8 */
+	I8_32,	/* 8 bit signed value starting at 32 */
+	I16_16, /* 16 bit signed value starting at 16 */
+	I16_32, /* 16 bit signed value starting at 32 */
+	I32_16, /* 32 bit signed value starting at 16 */
+	J12_12, /* 12 bit PC relative offset at 12 */
+	J16_16, /* 16 bit PC relative offset at 16 */
+	J16_32, /* 16 bit PC relative offset at 32 */
+	J24_24, /* 24 bit PC relative offset at 24 */
+	J32_16, /* 32 bit PC relative offset at 16 */
+	L4_8,	/* 4 bit length starting at position 8 */
+	L4_12,	/* 4 bit length starting at position 12 */
+	L8_8,	/* 8 bit length starting at position 8 */
+	R_8,	/* GPR starting at position 8 */
+	R_12,	/* GPR starting at position 12 */
+	R_16,	/* GPR starting at position 16 */
+	R_24,	/* GPR starting at position 24 */
+	R_28,	/* GPR starting at position 28 */
+	U4_8,	/* 4 bit unsigned value starting at 8 */
+	U4_12,	/* 4 bit unsigned value starting at 12 */
+	U4_16,	/* 4 bit unsigned value starting at 16 */
+	U4_20,	/* 4 bit unsigned value starting at 20 */
+	U4_24,	/* 4 bit unsigned value starting at 24 */
+	U4_28,	/* 4 bit unsigned value starting at 28 */
+	U4_32,	/* 4 bit unsigned value starting at 32 */
+	U4_36,	/* 4 bit unsigned value starting at 36 */
+	U8_8,	/* 8 bit unsigned value starting at 8 */
+	U8_16,	/* 8 bit unsigned value starting at 16 */
+	U8_24,	/* 8 bit unsigned value starting at 24 */
+	U8_28,	/* 8 bit unsigned value starting at 28 */
+	U8_32,	/* 8 bit unsigned value starting at 32 */
+	U12_16, /* 12 bit unsigned value starting at 16 */
+	U16_16, /* 16 bit unsigned value starting at 16 */
+	U16_32, /* 16 bit unsigned value starting at 32 */
+	U32_16, /* 32 bit unsigned value starting at 16 */
+	VX_12,	/* Vector index register starting at position 12 */
+	V_8,	/* Vector reg. starting at position 8 */
+	V_12,	/* Vector reg. starting at position 12 */
+	V_16,	/* Vector reg. starting at position 16 */
+	V_32,	/* Vector reg. starting at position 32 */
+	X_12,	/* Index register starting at position 12 */
+};
+
+static const struct s390_operand operands[] = {
+	[UNUSED] = {  0,  0, 0 },
+	[A_8]	 = {  4,  8, OPERAND_AR },
+	[A_12]	 = {  4, 12, OPERAND_AR },
+	[A_24]	 = {  4, 24, OPERAND_AR },
+	[A_28]	 = {  4, 28, OPERAND_AR },
+	[B_16]	 = {  4, 16, OPERAND_BASE | OPERAND_GPR },
+	[B_32]	 = {  4, 32, OPERAND_BASE | OPERAND_GPR },
+	[C_8]	 = {  4,  8, OPERAND_CR },
+	[C_12]	 = {  4, 12, OPERAND_CR },
+	[D20_20] = { 20, 20, OPERAND_DISP | OPERAND_SIGNED },
+	[D_20]	 = { 12, 20, OPERAND_DISP },
+	[D_36]	 = { 12, 36, OPERAND_DISP },
+	[F_8]	 = {  4,  8, OPERAND_FPR },
+	[F_12]	 = {  4, 12, OPERAND_FPR },
+	[F_16]	 = {  4, 16, OPERAND_FPR },
+	[F_24]	 = {  4, 24, OPERAND_FPR },
+	[F_28]	 = {  4, 28, OPERAND_FPR },
+	[F_32]	 = {  4, 32, OPERAND_FPR },
+	[I8_8]	 = {  8,  8, OPERAND_SIGNED },
+	[I8_32]	 = {  8, 32, OPERAND_SIGNED },
+	[I16_16] = { 16, 16, OPERAND_SIGNED },
+	[I16_32] = { 16, 32, OPERAND_SIGNED },
+	[I32_16] = { 32, 16, OPERAND_SIGNED },
+	[J12_12] = { 12, 12, OPERAND_PCREL },
+	[J16_16] = { 16, 16, OPERAND_PCREL },
+	[J16_32] = { 16, 32, OPERAND_PCREL },
+	[J24_24] = { 24, 24, OPERAND_PCREL },
+	[J32_16] = { 32, 16, OPERAND_PCREL },
+	[L4_8]	 = {  4,  8, OPERAND_LENGTH },
+	[L4_12]	 = {  4, 12, OPERAND_LENGTH },
+	[L8_8]	 = {  8,  8, OPERAND_LENGTH },
+	[R_8]	 = {  4,  8, OPERAND_GPR },
+	[R_12]	 = {  4, 12, OPERAND_GPR },
+	[R_16]	 = {  4, 16, OPERAND_GPR },
+	[R_24]	 = {  4, 24, OPERAND_GPR },
+	[R_28]	 = {  4, 28, OPERAND_GPR },
+	[U4_8]	 = {  4,  8, 0 },
+	[U4_12]	 = {  4, 12, 0 },
+	[U4_16]	 = {  4, 16, 0 },
+	[U4_20]	 = {  4, 20, 0 },
+	[U4_24]	 = {  4, 24, 0 },
+	[U4_28]	 = {  4, 28, 0 },
+	[U4_32]	 = {  4, 32, 0 },
+	[U4_36]	 = {  4, 36, 0 },
+	[U8_8]	 = {  8,  8, 0 },
+	[U8_16]	 = {  8, 16, 0 },
+	[U8_24]	 = {  8, 24, 0 },
+	[U8_28]	 = {  8, 28, 0 },
+	[U8_32]	 = {  8, 32, 0 },
+	[U12_16] = { 12, 16, 0 },
+	[U16_16] = { 16, 16, 0 },
+	[U16_32] = { 16, 32, 0 },
+	[U32_16] = { 32, 16, 0 },
+	[VX_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_VR },
+	[V_8]	 = {  4,  8, OPERAND_VR },
+	[V_12]	 = {  4, 12, OPERAND_VR },
+	[V_16]	 = {  4, 16, OPERAND_VR },
+	[V_32]	 = {  4, 32, OPERAND_VR },
+	[X_12]	 = {  4, 12, OPERAND_INDEX | OPERAND_GPR },
+};
+
+static const unsigned char formats[][6] = {
+	[INSTR_E]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_IE_UU]	     = { U4_24, U4_28, 0, 0, 0, 0 },
+	[INSTR_MII_UPP]	     = { U4_8, J12_12, J24_24 },
+	[INSTR_RIE_R0IU]     = { R_8, I16_16, U4_32, 0, 0, 0 },
+	[INSTR_RIE_R0UU]     = { R_8, U16_16, U4_32, 0, 0, 0 },
+	[INSTR_RIE_RRI0]     = { R_8, R_12, I16_16, 0, 0, 0 },
+	[INSTR_RIE_RRP]	     = { R_8, R_12, J16_16, 0, 0, 0 },
+	[INSTR_RIE_RRPU]     = { R_8, R_12, U4_32, J16_16, 0, 0 },
+	[INSTR_RIE_RRUUU]    = { R_8, R_12, U8_16, U8_24, U8_32, 0 },
+	[INSTR_RIE_RUI0]     = { R_8, I16_16, U4_12, 0, 0, 0 },
+	[INSTR_RIE_RUPI]     = { R_8, I8_32, U4_12, J16_16, 0, 0 },
+	[INSTR_RIE_RUPU]     = { R_8, U8_32, U4_12, J16_16, 0, 0 },
+	[INSTR_RIL_RI]	     = { R_8, I32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_RP]	     = { R_8, J32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_RU]	     = { R_8, U32_16, 0, 0, 0, 0 },
+	[INSTR_RIL_UP]	     = { U4_8, J32_16, 0, 0, 0, 0 },
+	[INSTR_RIS_RURDI]    = { R_8, I8_32, U4_12, D_20, B_16, 0 },
+	[INSTR_RIS_RURDU]    = { R_8, U8_32, U4_12, D_20, B_16, 0 },
+	[INSTR_RI_RI]	     = { R_8, I16_16, 0, 0, 0, 0 },
+	[INSTR_RI_RP]	     = { R_8, J16_16, 0, 0, 0, 0 },
+	[INSTR_RI_RU]	     = { R_8, U16_16, 0, 0, 0, 0 },
+	[INSTR_RI_UP]	     = { U4_8, J16_16, 0, 0, 0, 0 },
+	[INSTR_RRE_00]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_AA]	     = { A_24, A_28, 0, 0, 0, 0 },
+	[INSTR_RRE_AR]	     = { A_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRE_F0]	     = { F_24, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_FF]	     = { F_24, F_28, 0, 0, 0, 0 },
+	[INSTR_RRE_FR]	     = { F_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRE_R0]	     = { R_24, 0, 0, 0, 0, 0 },
+	[INSTR_RRE_RA]	     = { R_24, A_28, 0, 0, 0, 0 },
+	[INSTR_RRE_RF]	     = { R_24, F_28, 0, 0, 0, 0 },
+	[INSTR_RRE_RR]	     = { R_24, R_28, 0, 0, 0, 0 },
+	[INSTR_RRF_0UFF]     = { F_24, F_28, U4_20, 0, 0, 0 },
+	[INSTR_RRF_0URF]     = { R_24, F_28, U4_20, 0, 0, 0 },
+	[INSTR_RRF_F0FF]     = { F_16, F_24, F_28, 0, 0, 0 },
+	[INSTR_RRF_F0FF2]    = { F_24, F_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_F0FR]     = { F_24, F_16, R_28, 0, 0, 0 },
+	[INSTR_RRF_FFRU]     = { F_24, F_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_FUFF]     = { F_24, F_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRF_FUFF2]    = { F_24, F_28, F_16, U4_20, 0, 0 },
+	[INSTR_RRF_R0RR]     = { R_24, R_16, R_28, 0, 0, 0 },
+	[INSTR_RRF_R0RR2]    = { R_24, R_28, R_16, 0, 0, 0 },
+	[INSTR_RRF_RURR]     = { R_24, R_28, R_16, U4_20, 0, 0 },
+	[INSTR_RRF_RURR2]    = { R_24, R_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_U0FF]     = { F_24, U4_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_U0RF]     = { R_24, U4_16, F_28, 0, 0, 0 },
+	[INSTR_RRF_U0RR]     = { R_24, R_28, U4_16, 0, 0, 0 },
+	[INSTR_RRF_URR]	     = { R_24, R_28, U8_16, 0, 0, 0 },
+	[INSTR_RRF_UUFF]     = { F_24, U4_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRF_UUFR]     = { F_24, U4_16, R_28, U4_20, 0, 0 },
+	[INSTR_RRF_UURF]     = { R_24, U4_16, F_28, U4_20, 0, 0 },
+	[INSTR_RRS_RRRDU]    = { R_8, R_12, U4_32, D_20, B_16 },
+	[INSTR_RR_FF]	     = { F_8, F_12, 0, 0, 0, 0 },
+	[INSTR_RR_R0]	     = { R_8,  0, 0, 0, 0, 0 },
+	[INSTR_RR_RR]	     = { R_8, R_12, 0, 0, 0, 0 },
+	[INSTR_RR_U0]	     = { U8_8,	0, 0, 0, 0, 0 },
+	[INSTR_RR_UR]	     = { U4_8, R_12, 0, 0, 0, 0 },
+	[INSTR_RSI_RRP]	     = { R_8, R_12, J16_16, 0, 0, 0 },
+	[INSTR_RSL_LRDFU]    = { F_32, D_20, L8_8, B_16, U4_36, 0 },
+	[INSTR_RSL_R0RD]     = { D_20, L4_8, B_16, 0, 0, 0 },
+	[INSTR_RSY_AARD]     = { A_8, A_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_CCRD]     = { C_8, C_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RDRU]     = { R_8, D20_20, B_16, U4_12, 0, 0 },
+	[INSTR_RSY_RRRD]     = { R_8, R_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RURD]     = { R_8, U4_12, D20_20, B_16, 0, 0 },
+	[INSTR_RSY_RURD2]    = { R_8, D20_20, B_16, U4_12, 0, 0 },
+	[INSTR_RS_AARD]	     = { A_8, A_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_CCRD]	     = { C_8, C_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_R0RD]	     = { R_8, D_20, B_16, 0, 0, 0 },
+	[INSTR_RS_RRRD]	     = { R_8, R_12, D_20, B_16, 0, 0 },
+	[INSTR_RS_RURD]	     = { R_8, U4_12, D_20, B_16, 0, 0 },
+	[INSTR_RXE_FRRD]     = { F_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RXE_RRRDU]    = { R_8, D_20, X_12, B_16, U4_32, 0 },
+	[INSTR_RXF_FRRDF]    = { F_32, F_8, D_20, X_12, B_16, 0 },
+	[INSTR_RXY_FRRD]     = { F_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RXY_RRRD]     = { R_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RXY_URRD]     = { U4_8, D20_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_FRRD]	     = { F_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_RRRD]	     = { R_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_RX_URRD]	     = { U4_8, D_20, X_12, B_16, 0, 0 },
+	[INSTR_SIL_RDI]	     = { D_20, B_16, I16_32, 0, 0, 0 },
+	[INSTR_SIL_RDU]	     = { D_20, B_16, U16_32, 0, 0, 0 },
+	[INSTR_SIY_IRD]	     = { D20_20, B_16, I8_8, 0, 0, 0 },
+	[INSTR_SIY_RD]	     = { D20_20, B_16, 0, 0, 0, 0 },
+	[INSTR_SIY_URD]	     = { D20_20, B_16, U8_8, 0, 0, 0 },
+	[INSTR_SI_RD]	     = { D_20, B_16, 0, 0, 0, 0 },
+	[INSTR_SI_URD]	     = { D_20, B_16, U8_8, 0, 0, 0 },
+	[INSTR_SMI_U0RDP]    = { U4_8, J16_32, D_20, B_16, 0, 0 },
+	[INSTR_SSE_RDRD]     = { D_20, B_16, D_36, B_32, 0, 0 },
+	[INSTR_SSF_RRDRD]    = { D_20, B_16, D_36, B_32, R_8, 0 },
+	[INSTR_SSF_RRDRD2]   = { R_8, D_20, B_16, D_36, B_32, 0 },
+	[INSTR_SS_L0RDRD]    = { D_20, L8_8, B_16, D_36, B_32, 0 },
+	[INSTR_SS_L2RDRD]    = { D_20, B_16, D_36, L8_8, B_32, 0 },
+	[INSTR_SS_LIRDRD]    = { D_20, L4_8, B_16, D_36, B_32, U4_12 },
+	[INSTR_SS_LLRDRD]    = { D_20, L4_8, B_16, D_36, L4_12, B_32 },
+	[INSTR_SS_RRRDRD]    = { D_20, R_8, B_16, D_36, B_32, R_12 },
+	[INSTR_SS_RRRDRD2]   = { R_8, D_20, B_16, R_12, D_36, B_32 },
+	[INSTR_SS_RRRDRD3]   = { R_8, R_12, D_20, B_16, D_36, B_32 },
+	[INSTR_S_00]	     = { 0, 0, 0, 0, 0, 0 },
+	[INSTR_S_RD]	     = { D_20, B_16, 0, 0, 0, 0 },
+	[INSTR_VRI_V0IU]     = { V_8, I16_16, U4_32, 0, 0, 0 },
+	[INSTR_VRI_V0U]	     = { V_8, U16_16, 0, 0, 0, 0 },
+	[INSTR_VRI_V0UU2]    = { V_8, U16_16, U4_32, 0, 0, 0 },
+	[INSTR_VRI_V0UUU]    = { V_8, U8_16, U8_24, U4_32, 0, 0 },
+	[INSTR_VRI_VR0UU]    = { V_8, R_12, U8_28, U4_24, 0, 0 },
+	[INSTR_VRI_VVUU]     = { V_8, V_12, U16_16, U4_32, 0, 0 },
+	[INSTR_VRI_VVUUU]    = { V_8, V_12, U12_16, U4_32, U4_28, 0 },
+	[INSTR_VRI_VVUUU2]   = { V_8, V_12, U8_28, U8_16, U4_24, 0 },
+	[INSTR_VRI_VVV0U]    = { V_8, V_12, V_16, U8_24, 0, 0 },
+	[INSTR_VRI_VVV0UU]   = { V_8, V_12, V_16, U8_24, U4_32, 0 },
+	[INSTR_VRI_VVV0UU2]  = { V_8, V_12, V_16, U8_28, U4_24, 0 },
+	[INSTR_VRR_0V]	     = { V_12, 0, 0, 0, 0, 0 },
+	[INSTR_VRR_0VV0U]    = { V_12, V_16, U4_24, 0, 0, 0 },
+	[INSTR_VRR_RV0UU]    = { R_8, V_12, U4_24, U4_28, 0, 0 },
+	[INSTR_VRR_VRR]	     = { V_8, R_12, R_16, 0, 0, 0 },
+	[INSTR_VRR_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
+	[INSTR_VRR_VV0U]     = { V_8, V_12, U4_32, 0, 0, 0 },
+	[INSTR_VRR_VV0U0U]   = { V_8, V_12, U4_32, U4_24, 0, 0 },
+	[INSTR_VRR_VV0U2]    = { V_8, V_12, U4_24, 0, 0, 0 },
+	[INSTR_VRR_VV0UU2]   = { V_8, V_12, U4_32, U4_28, 0, 0 },
+	[INSTR_VRR_VV0UUU]   = { V_8, V_12, U4_32, U4_28, U4_24, 0 },
+	[INSTR_VRR_VVV]	     = { V_8, V_12, V_16, 0, 0, 0 },
+	[INSTR_VRR_VVV0U]    = { V_8, V_12, V_16, U4_32, 0, 0 },
+	[INSTR_VRR_VVV0U0]   = { V_8, V_12, V_16, U4_24, 0, 0 },
+	[INSTR_VRR_VVV0U0U]  = { V_8, V_12, V_16, U4_32, U4_24, 0 },
+	[INSTR_VRR_VVV0UU]   = { V_8, V_12, V_16, U4_32, U4_28, 0 },
+	[INSTR_VRR_VVV0UUU]  = { V_8, V_12, V_16, U4_32, U4_28, U4_24 },
+	[INSTR_VRR_VVV0V]    = { V_8, V_12, V_16, V_32, 0, 0 },
+	[INSTR_VRR_VVVU0UV]  = { V_8, V_12, V_16, V_32, U4_28, U4_20 },
+	[INSTR_VRR_VVVU0V]   = { V_8, V_12, V_16, V_32, U4_20, 0 },
+	[INSTR_VRR_VVVUU0V]  = { V_8, V_12, V_16, V_32, U4_20, U4_24 },
+	[INSTR_VRS_RRDV]     = { V_32, R_12, D_20, B_16, 0, 0 },
+	[INSTR_VRS_RVRDU]    = { R_8, V_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRS_VRRD]     = { V_8, R_12, D_20, B_16, 0, 0 },
+	[INSTR_VRS_VRRDU]    = { V_8, R_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRS_VVRDU]    = { V_8, V_12, D_20, B_16, U4_32, 0 },
+	[INSTR_VRV_VVXRDU]   = { V_8, D_20, VX_12, B_16, U4_32, 0 },
+	[INSTR_VRX_VRRDU]    = { V_8, D_20, X_12, B_16, U4_32, 0 },
+	[INSTR_VRX_VV]	     = { V_8, V_12, 0, 0, 0, 0 },
+	[INSTR_VSI_URDV]     = { V_32, D_20, B_16, U8_8, 0, 0 },
+};
+
+static char long_insn_name[][7] = LONG_INSN_INITIALIZER;
+static struct s390_insn opcode[] = OPCODE_TABLE_INITIALIZER;
+static struct s390_opcode_offset opcode_offset[] = OPCODE_OFFSET_INITIALIZER;
+
+/* Extracts an operand value from an instruction.  */
+static unsigned int extract_operand(unsigned char *code,
+				    const struct s390_operand *operand)
+{
+	unsigned char *cp;
+	unsigned int val;
+	int bits;
+
+	/* Extract fragments of the operand byte for byte.  */
+	cp = code + operand->shift / 8;
+	bits = (operand->shift & 7) + operand->bits;
+	val = 0;
+	do {
+		val <<= 8;
+		val |= (unsigned int) *cp++;
+		bits -= 8;
+	} while (bits > 0);
+	val >>= -bits;
+	val &= ((1U << (operand->bits - 1)) << 1) - 1;
+
+	/* Check for special long displacement case.  */
+	if (operand->bits == 20 && operand->shift == 20)
+		val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+
+	/* Check for register extensions bits for vector registers. */
+	if (operand->flags & OPERAND_VR) {
+		if (operand->shift == 8)
+			val |= (code[4] & 8) << 1;
+		else if (operand->shift == 12)
+			val |= (code[4] & 4) << 2;
+		else if (operand->shift == 16)
+			val |= (code[4] & 2) << 3;
+		else if (operand->shift == 32)
+			val |= (code[4] & 1) << 4;
+	}
+
+	/* Sign extend value if the operand is signed or pc relative.  */
+	if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
+	    (val & (1U << (operand->bits - 1))))
+		val |= (-1U << (operand->bits - 1)) << 1;
+
+	/* Double value if the operand is pc relative.	*/
+	if (operand->flags & OPERAND_PCREL)
+		val <<= 1;
+
+	/* Length x in an instructions has real length x + 1.  */
+	if (operand->flags & OPERAND_LENGTH)
+		val++;
+	return val;
+}
+
+struct s390_insn *find_insn(unsigned char *code)
+{
+	struct s390_opcode_offset *entry;
+	struct s390_insn *insn;
+	unsigned char opfrag;
+	int i;
+
+	/* Search the opcode offset table to find an entry which
+	 * matches the beginning of the opcode. If there is no match
+	 * the last entry will be used, which is the default entry for
+	 * unknown instructions as well as 1-byte opcode instructions.
+	 */
+	for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) {
+		entry = &opcode_offset[i];
+		if (entry->opcode == code[0])
+			break;
+	}
+
+	opfrag = *(code + entry->byte) & entry->mask;
+
+	insn = &opcode[entry->offset];
+	for (i = 0; i < entry->count; i++) {
+		if (insn->opfrag == opfrag)
+			return insn;
+		insn++;
+	}
+	return NULL;
+}
+
+static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
+{
+	struct s390_insn *insn;
+	const unsigned char *ops;
+	const struct s390_operand *operand;
+	unsigned int value;
+	char separator;
+	char *ptr;
+	int i;
+
+	ptr = buffer;
+	insn = find_insn(code);
+	if (insn) {
+		if (insn->zero == 0)
+			ptr += sprintf(ptr, "%.7s\t",
+				       long_insn_name[insn->offset]);
+		else
+			ptr += sprintf(ptr, "%.5s\t", insn->name);
+		/* Extract the operands. */
+		separator = 0;
+		for (ops = formats[insn->format], i = 0;
+		     *ops != 0 && i < 6; ops++, i++) {
+			operand = operands + *ops;
+			value = extract_operand(code, operand);
+			if ((operand->flags & OPERAND_INDEX)  && value == 0)
+				continue;
+			if ((operand->flags & OPERAND_BASE) &&
+			    value == 0 && separator == '(') {
+				separator = ',';
+				continue;
+			}
+			if (separator)
+				ptr += sprintf(ptr, "%c", separator);
+			if (operand->flags & OPERAND_GPR)
+				ptr += sprintf(ptr, "%%r%i", value);
+			else if (operand->flags & OPERAND_FPR)
+				ptr += sprintf(ptr, "%%f%i", value);
+			else if (operand->flags & OPERAND_AR)
+				ptr += sprintf(ptr, "%%a%i", value);
+			else if (operand->flags & OPERAND_CR)
+				ptr += sprintf(ptr, "%%c%i", value);
+			else if (operand->flags & OPERAND_VR)
+				ptr += sprintf(ptr, "%%v%i", value);
+			else if (operand->flags & OPERAND_PCREL) {
+				void *pcrel = (void *)((int)value + addr);
+
+				ptr += sprintf(ptr, "%px", pcrel);
+			} else if (operand->flags & OPERAND_SIGNED)
+				ptr += sprintf(ptr, "%i", value);
+			else
+				ptr += sprintf(ptr, "%u", value);
+			if (operand->flags & OPERAND_DISP)
+				separator = '(';
+			else if (operand->flags & OPERAND_BASE) {
+				ptr += sprintf(ptr, ")");
+				separator = ',';
+			} else
+				separator = ',';
+		}
+	} else
+		ptr += sprintf(ptr, "unknown");
+	return (int) (ptr - buffer);
+}
+
+static int copy_from_regs(struct pt_regs *regs, void *dst, void *src, int len)
+{
+	if (user_mode(regs)) {
+		if (copy_from_user(dst, (char __user *)src, len))
+			return -EFAULT;
+	} else {
+		if (copy_from_kernel_nofault(dst, src, len))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+void show_code(struct pt_regs *regs)
+{
+	char *mode = user_mode(regs) ? "User" : "Krnl";
+	unsigned char code[64];
+	char buffer[128], *ptr;
+	unsigned long addr;
+	int start, end, opsize, hops, i;
+
+	/* Get a snapshot of the 64 bytes surrounding the fault address. */
+	for (start = 32; start && regs->psw.addr >= 34 - start; start -= 2) {
+		addr = regs->psw.addr - 34 + start;
+		if (copy_from_regs(regs, code + start - 2, (void *)addr, 2))
+			break;
+	}
+	for (end = 32; end < 64; end += 2) {
+		addr = regs->psw.addr + end - 32;
+		if (copy_from_regs(regs, code + end, (void *)addr, 2))
+			break;
+	}
+	/* Code snapshot usable ? */
+	if ((regs->psw.addr & 1) || start >= end) {
+		printk("%s Code: Bad PSW.\n", mode);
+		return;
+	}
+	/* Find a starting point for the disassembly. */
+	while (start < 32) {
+		for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) {
+			if (!find_insn(code + start + i))
+				break;
+			i += insn_length(code[start + i]);
+		}
+		if (start + i == 32)
+			/* Looks good, sequence ends at PSW. */
+			break;
+		start += 2;
+	}
+	/* Decode the instructions. */
+	ptr = buffer;
+	ptr += sprintf(ptr, "%s Code:", mode);
+	hops = 0;
+	while (start < end && hops < 8) {
+		opsize = insn_length(code[start]);
+		if  (start + opsize == 32)
+			*ptr++ = '#';
+		else if (start == 32)
+			*ptr++ = '>';
+		else
+			*ptr++ = ' ';
+		addr = regs->psw.addr + start - 32;
+		ptr += sprintf(ptr, "%px: ", (void *)addr);
+		if (start + opsize >= end)
+			break;
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[start + i]);
+		*ptr++ = '\t';
+		if (i < 6)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code + start, addr);
+		start += opsize;
+		pr_cont("%s", buffer);
+		ptr = buffer;
+		ptr += sprintf(ptr, "\n          ");
+		hops++;
+	}
+	pr_cont("\n");
+}
+
+void print_fn_code(unsigned char *code, unsigned long len)
+{
+	char buffer[128], *ptr;
+	int opsize, i;
+
+	while (len) {
+		ptr = buffer;
+		opsize = insn_length(*code);
+		if (opsize > len)
+			break;
+		ptr += sprintf(ptr, "%px: ", code);
+		for (i = 0; i < opsize; i++)
+			ptr += sprintf(ptr, "%02x", code[i]);
+		*ptr++ = '\t';
+		if (i < 4)
+			*ptr++ = '\t';
+		ptr += print_insn(ptr, code, (unsigned long) code);
+		*ptr++ = '\n';
+		*ptr++ = 0;
+		printk("%s", buffer);
+		code += opsize;
+		len -= opsize;
+	}
+}
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
new file mode 100644
index 0000000000..d2012635b0
--- /dev/null
+++ b/arch/s390/kernel/dumpstack.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack dumping functions
+ *
+ *  Copyright IBM Corp. 1999, 2013
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/utsname.h>
+#include <linux/export.h>
+#include <linux/kdebug.h>
+#include <linux/ptrace.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
+#include <asm/processor.h>
+#include <asm/debug.h>
+#include <asm/dis.h>
+#include <asm/ipl.h>
+#include <asm/unwind.h>
+
+const char *stack_type_name(enum stack_type type)
+{
+	switch (type) {
+	case STACK_TYPE_TASK:
+		return "task";
+	case STACK_TYPE_IRQ:
+		return "irq";
+	case STACK_TYPE_NODAT:
+		return "nodat";
+	case STACK_TYPE_RESTART:
+		return "restart";
+	default:
+		return "unknown";
+	}
+}
+EXPORT_SYMBOL_GPL(stack_type_name);
+
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+			    enum stack_type type, unsigned long stack)
+{
+	if (sp < stack || sp >= stack + THREAD_SIZE)
+		return false;
+	info->type = type;
+	info->begin = stack;
+	info->end = stack + THREAD_SIZE;
+	return true;
+}
+
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+			  struct stack_info *info)
+{
+	unsigned long stack = (unsigned long)task_stack_page(task);
+
+	return in_stack(sp, info, STACK_TYPE_TASK, stack);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.async_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_IRQ, stack);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.nodat_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_NODAT, stack);
+}
+
+static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.mcck_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_MCCK, stack);
+}
+
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
+{
+	unsigned long stack = S390_lowcore.restart_stack - STACK_INIT_OFFSET;
+
+	return in_stack(sp, info, STACK_TYPE_RESTART, stack);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask)
+{
+	if (!sp)
+		goto unknown;
+
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto unknown;
+
+	/* Check per-task stack */
+	if (in_task_stack(sp, task, info))
+		goto recursion_check;
+
+	if (task != current)
+		goto unknown;
+
+	/* Check per-cpu stacks */
+	if (!in_irq_stack(sp, info) &&
+	    !in_nodat_stack(sp, info) &&
+	    !in_restart_stack(sp, info) &&
+	    !in_mcck_stack(sp, info))
+		goto unknown;
+
+recursion_check:
+	/*
+	 * Make sure we don't iterate through any given stack more than once.
+	 * If it comes up a second time then there's something wrong going on:
+	 * just break out and report an unknown stack type.
+	 */
+	if (*visit_mask & (1UL << info->type))
+		goto unknown;
+	*visit_mask |= 1UL << info->type;
+	return 0;
+unknown:
+	info->type = STACK_TYPE_UNKNOWN;
+	return -EINVAL;
+}
+
+void show_stack(struct task_struct *task, unsigned long *stack,
+		       const char *loglvl)
+{
+	struct unwind_state state;
+
+	printk("%sCall Trace:\n", loglvl);
+	unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+		printk(state.reliable ? "%s [<%016lx>] %pSR \n" :
+					"%s([<%016lx>] %pSR)\n",
+		       loglvl, state.ip, (void *) state.ip);
+	debug_show_held_locks(task ? : current);
+}
+
+static void show_last_breaking_event(struct pt_regs *regs)
+{
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] ", regs->last_break);
+	if (user_mode(regs)) {
+		print_vma_addr(KERN_CONT, regs->last_break);
+		pr_cont("\n");
+	} else {
+		pr_cont("%pSR\n", (void *)regs->last_break);
+	}
+}
+
+void show_registers(struct pt_regs *regs)
+{
+	struct psw_bits *psw = &psw_bits(regs->psw);
+	char *mode;
+
+	mode = user_mode(regs) ? "User" : "Krnl";
+	printk("%s PSW : %px %px", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+	if (!user_mode(regs))
+		pr_cont(" (%pSR)", (void *)regs->psw.addr);
+	pr_cont("\n");
+	printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
+	       "P:%x AS:%x CC:%x PM:%x", psw->per, psw->dat, psw->io, psw->ext,
+	       psw->key, psw->mcheck, psw->wait, psw->pstate, psw->as, psw->cc, psw->pm);
+	pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+	printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+	       regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[8], regs->gprs[9], regs->gprs[10], regs->gprs[11]);
+	printk("           %016lx %016lx %016lx %016lx\n",
+	       regs->gprs[12], regs->gprs[13], regs->gprs[14], regs->gprs[15]);
+	show_code(regs);
+}
+
+void show_regs(struct pt_regs *regs)
+{
+	show_regs_print_info(KERN_DEFAULT);
+	show_registers(regs);
+	/* Show stack backtrace if pt_regs is from kernel mode */
+	if (!user_mode(regs))
+		show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT);
+	show_last_breaking_event(regs);
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void __noreturn die(struct pt_regs *regs, const char *str)
+{
+	static int die_counter;
+
+	oops_enter();
+	lgr_info_log();
+	debug_stop_all();
+	console_verbose();
+	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+	printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
+	       regs->int_code >> 17, ++die_counter);
+#ifdef CONFIG_PREEMPT
+	pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+	pr_cont("PREEMPT_RT ");
+#endif
+	pr_cont("SMP ");
+	if (debug_pagealloc_enabled())
+		pr_cont("DEBUG_PAGEALLOC");
+	pr_cont("\n");
+	notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
+	print_modules();
+	show_regs(regs);
+	bust_spinlocks(0);
+	add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+	spin_unlock_irq(&die_lock);
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+	if (panic_on_oops)
+		panic("Fatal exception: panic_on_oops");
+	oops_exit();
+	make_task_dead(SIGSEGV);
+}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
new file mode 100644
index 0000000000..442ce0489e
--- /dev/null
+++ b/arch/s390/kernel/early.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2009
+ *    Author(s): Hongjie Yang <hongjie@us.ibm.com>,
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/compiler.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/lockdep.h>
+#include <linux/extable.h>
+#include <linux/pfn.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <asm/asm-extable.h>
+#include <linux/memblock.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include <asm/boot_data.h>
+#include <asm/switch_to.h>
+#include "entry.h"
+
+#define decompressor_handled_param(param)			\
+static int __init ignore_decompressor_param_##param(char *s)	\
+{								\
+	return 0;						\
+}								\
+early_param(#param, ignore_decompressor_param_##param)
+
+decompressor_handled_param(mem);
+decompressor_handled_param(vmalloc);
+decompressor_handled_param(dfltcc);
+decompressor_handled_param(facilities);
+decompressor_handled_param(nokaslr);
+#if IS_ENABLED(CONFIG_KVM)
+decompressor_handled_param(prot_virt);
+#endif
+
+static void __init kasan_early_init(void)
+{
+#ifdef CONFIG_KASAN
+	init_task.kasan_depth = 0;
+	sclp_early_printk("KernelAddressSanitizer initialized\n");
+#endif
+}
+
+static void __init reset_tod_clock(void)
+{
+	union tod_clock clk;
+
+	if (store_tod_clock_ext_cc(&clk) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_tod_clock(TOD_UNIX_EPOCH) || store_tod_clock_ext_cc(&clk))
+		disabled_wait();
+
+	memset(&tod_clock_base, 0, sizeof(tod_clock_base));
+	tod_clock_base.tod = TOD_UNIX_EPOCH;
+	S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+}
+
+/*
+ * Initialize storage key for kernel pages
+ */
+static noinline __init void init_kernel_storage_key(void)
+{
+#if PAGE_DEFAULT_KEY
+	unsigned long end_pfn, init_pfn;
+
+	end_pfn = PFN_UP(__pa(_end));
+
+	for (init_pfn = 0 ; init_pfn < end_pfn; init_pfn++)
+		page_set_storage_key(init_pfn << PAGE_SHIFT,
+				     PAGE_DEFAULT_KEY, 0);
+#endif
+}
+
+static __initdata char sysinfo_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+
+static noinline __init void detect_machine_type(void)
+{
+	struct sysinfo_3_2_2 *vmms = (struct sysinfo_3_2_2 *)&sysinfo_page;
+
+	/* Check current-configuration-level */
+	if (stsi(NULL, 0, 0, 0) <= 2) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_LPAR;
+		return;
+	}
+	/* Get virtual-machine cpu information. */
+	if (stsi(vmms, 3, 2, 2) || !vmms->count)
+		return;
+
+	/* Detect known hypervisors */
+	if (!memcmp(vmms->vm[0].cpi, "\xd2\xe5\xd4", 3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_KVM;
+	else if (!memcmp(vmms->vm[0].cpi, "\xa9\x61\xe5\xd4", 4))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VM;
+}
+
+/* Remove leading, trailing and double whitespace. */
+static inline void strim_all(char *str)
+{
+	char *s;
+
+	s = strim(str);
+	if (s != str)
+		memmove(str, s, strlen(s));
+	while (*str) {
+		if (!isspace(*str++))
+			continue;
+		if (isspace(*str)) {
+			s = skip_spaces(str);
+			memmove(str, s, strlen(s) + 1);
+		}
+	}
+}
+
+static noinline __init void setup_arch_string(void)
+{
+	struct sysinfo_1_1_1 *mach = (struct sysinfo_1_1_1 *)&sysinfo_page;
+	struct sysinfo_3_2_2 *vm = (struct sysinfo_3_2_2 *)&sysinfo_page;
+	char mstr[80], hvstr[17];
+
+	if (stsi(mach, 1, 1, 1))
+		return;
+	EBCASC(mach->manufacturer, sizeof(mach->manufacturer));
+	EBCASC(mach->type, sizeof(mach->type));
+	EBCASC(mach->model, sizeof(mach->model));
+	EBCASC(mach->model_capacity, sizeof(mach->model_capacity));
+	sprintf(mstr, "%-16.16s %-4.4s %-16.16s %-16.16s",
+		mach->manufacturer, mach->type,
+		mach->model, mach->model_capacity);
+	strim_all(mstr);
+	if (stsi(vm, 3, 2, 2) == 0 && vm->count) {
+		EBCASC(vm->vm[0].cpi, sizeof(vm->vm[0].cpi));
+		sprintf(hvstr, "%-16.16s", vm->vm[0].cpi);
+		strim_all(hvstr);
+	} else {
+		sprintf(hvstr, "%s",
+			MACHINE_IS_LPAR ? "LPAR" :
+			MACHINE_IS_VM ? "z/VM" :
+			MACHINE_IS_KVM ? "KVM" : "unknown");
+	}
+	dump_stack_set_arch_desc("%s (%s)", mstr, hvstr);
+}
+
+static __init void setup_topology(void)
+{
+	int max_mnest;
+
+	if (!test_facility(11))
+		return;
+	S390_lowcore.machine_flags |= MACHINE_FLAG_TOPOLOGY;
+	for (max_mnest = 6; max_mnest > 1; max_mnest--) {
+		if (stsi(&sysinfo_page, 15, 1, max_mnest) == 0)
+			break;
+	}
+	topology_max_mnest = max_mnest;
+}
+
+void __do_early_pgm_check(struct pt_regs *regs)
+{
+	if (!fixup_exception(regs))
+		disabled_wait();
+}
+
+static noinline __init void setup_lowcore_early(void)
+{
+	psw_t psw;
+
+	psw.addr = (unsigned long)early_pgm_check_handler;
+	psw.mask = PSW_KERNEL_BITS;
+	S390_lowcore.program_new_psw = psw;
+	S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
+}
+
+static noinline __init void setup_facility_list(void)
+{
+	memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
+	if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
+		__clear_facility(82, alt_stfle_fac_list);
+}
+
+static __init void detect_diag9c(void)
+{
+	unsigned int cpu_address;
+	int rc;
+
+	cpu_address = stap();
+	diag_stat_inc(DIAG_STAT_X09C);
+	asm volatile(
+		"	diag	%2,0,0x9c\n"
+		"0:	la	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc");
+	if (!rc)
+		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
+}
+
+static __init void detect_machine_facilities(void)
+{
+	if (test_facility(8)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT1;
+		__ctl_set_bit(0, 23);
+	}
+	if (test_facility(78))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_EDAT2;
+	if (test_facility(3))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE;
+	if (test_facility(50) && test_facility(73)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
+		__ctl_set_bit(0, 55);
+	}
+	if (test_facility(51))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+	if (test_facility(129)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+		__ctl_set_bit(0, 17);
+	}
+	if (test_facility(130))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
+	if (test_facility(133))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
+	if (test_facility(139) && (tod_clock_base.tod >> 63)) {
+		/* Enabled signed clock comparator comparisons */
+		S390_lowcore.machine_flags |= MACHINE_FLAG_SCC;
+		clock_comparator_max = -1ULL >> 1;
+		__ctl_set_bit(0, 53);
+	}
+	if (IS_ENABLED(CONFIG_PCI) && test_facility(153)) {
+		S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO;
+		/* the control bit is set during PCI initialization */
+	}
+	if (test_facility(194))
+		S390_lowcore.machine_flags |= MACHINE_FLAG_RDP;
+}
+
+static inline void save_vector_registers(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (test_facility(129))
+		save_vx_regs(boot_cpu_vector_save_area);
+#endif
+}
+
+static inline void setup_control_registers(void)
+{
+	unsigned long reg;
+
+	__ctl_store(reg, 0, 0);
+	reg |= CR0_LOW_ADDRESS_PROTECTION;
+	reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+	reg |= CR0_EXTERNAL_CALL_SUBMASK;
+	__ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+	unsigned int acrs[NUM_ACRS] = { 0 };
+
+	restore_access_regs(acrs);
+}
+
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	__ctl_clear_bit(0, 17);
+	return 0;
+}
+early_param("novx", disable_vector_extension);
+
+char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
+static void __init setup_boot_command_line(void)
+{
+	/* copy arch command line */
+	strscpy(boot_command_line, early_command_line, COMMAND_LINE_SIZE);
+}
+
+static void __init sort_amode31_extable(void)
+{
+	sort_extable(__start_amode31_ex_table, __stop_amode31_ex_table);
+}
+
+void __init startup_init(void)
+{
+	kasan_early_init();
+	reset_tod_clock();
+	time_early_init();
+	init_kernel_storage_key();
+	lockdep_off();
+	sort_amode31_extable();
+	setup_lowcore_early();
+	setup_facility_list();
+	detect_machine_type();
+	setup_arch_string();
+	setup_boot_command_line();
+	detect_diag9c();
+	detect_machine_facilities();
+	save_vector_registers();
+	setup_topology();
+	sclp_early_detect();
+	setup_control_registers();
+	setup_access_registers();
+	lockdep_on();
+}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
new file mode 100644
index 0000000000..d9d53f4400
--- /dev/null
+++ b/arch/s390/kernel/early_printk.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2017
+ */
+
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <asm/sclp.h>
+
+static void sclp_early_write(struct console *con, const char *s, unsigned int len)
+{
+	__sclp_early_printk(s, len);
+}
+
+static struct console sclp_early_console = {
+	.name  = "earlysclp",
+	.write = sclp_early_write,
+	.flags = CON_PRINTBUFFER | CON_BOOT,
+	.index = -1,
+};
+
+static int __init setup_early_printk(char *buf)
+{
+	if (early_console)
+		return 0;
+	/* Accept only "earlyprintk" and "earlyprintk=sclp" */
+	if (buf && !str_has_prefix(buf, "sclp"))
+		return 0;
+	if (!sclp.has_linemode && !sclp.has_vt220)
+		return 0;
+	early_console = &sclp_early_console;
+	register_console(early_console);
+	return 0;
+}
+early_param("earlyprintk", setup_early_printk);
diff --git a/arch/s390/kernel/earlypgm.S b/arch/s390/kernel/earlypgm.S
new file mode 100644
index 0000000000..c634871f0d
--- /dev/null
+++ b/arch/s390/kernel/earlypgm.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 2006, 2007
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+
+SYM_CODE_START(early_pgm_check_handler)
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__PT_SIZE)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_PSW(16,%r11),__LC_PGM_OLD_PSW
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	lgr	%r2,%r11
+	brasl	%r14,__do_early_pgm_check
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	lpswe	__LC_RETURN_PSW
+SYM_CODE_END(early_pgm_check_handler)
diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c
new file mode 100644
index 0000000000..0e51fa5372
--- /dev/null
+++ b/arch/s390/kernel/ebcdic.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC,
+ *    upper to lower case (EBCDIC) conversion tables.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *               Martin Peschke <peschke@fh-brandenburg.de>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <asm/ebcdic.h>
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 037
+ */
+__u8 _ascebc[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0xBA, 0xE0, 0xBB, 0xB0, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 037 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ä              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                       .     <     (     +     | */
+          0x87, 0xA4, 0x9B, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ß     !     $     *     )     ;       */
+          0x8D, 0xE1, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAA,
+ /* 0x60     -     /  ----     Ä  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     °     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0     ^                    ----     §  ----       */
+          0x5E, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----     [     ]  ----  ----  ----  ---- */
+          0xAB, 0x07, 0x5B, 0x5D, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ö              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ü                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     Ö  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     Ü  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * ASCII (IBM PC 437)  -> EBCDIC 500
+ */
+__u8 _ascebc_500[256] =
+{
+ /*00 NUL   SOH   STX   ETX   EOT   ENQ   ACK   BEL */
+     0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F,
+ /*08  BS    HT    LF    VT    FF    CR    SO    SI */
+ /*              ->NL                               */
+     0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /*10 DLE   DC1   DC2   DC3   DC4   NAK   SYN   ETB */
+     0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26,
+ /*18 CAN    EM   SUB   ESC    FS    GS    RS    US */
+ /*                               ->IGS ->IRS ->IUS */
+     0x18, 0x19, 0x3F, 0x27, 0x22, 0x1D, 0x1E, 0x1F,
+ /*20  SP     !     "     #     $     %     &     ' */
+     0x40, 0x4F, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D,
+ /*28   (     )     *     +     ,     -    .      / */
+     0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
+ /*30   0     1     2     3     4     5     6     7 */
+     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+ /*38   8     9     :     ;     <     =     >     ? */
+     0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
+ /*40   @     A     B     C     D     E     F     G */
+     0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+ /*48   H     I     J     K     L     M     N     O */
+     0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ /*50   P     Q     R     S     T     U     V     W */
+     0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6,
+ /*58   X     Y     Z     [     \     ]     ^     _ */
+     0xE7, 0xE8, 0xE9, 0x4A, 0xE0, 0x5A, 0x5F, 0x6D,
+ /*60   `     a     b     c     d     e     f     g */
+     0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ /*68   h     i     j     k     l     m     n     o */
+     0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
+ /*70   p     q     r     s     t     u     v     w */
+     0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6,
+ /*78   x     y     z     {     |     }     ~    DL */
+     0xA7, 0xA8, 0xA9, 0xC0, 0xBB, 0xD0, 0xA1, 0x07,
+ /*80*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*88*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*90*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*98*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*A8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*B8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*C8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*D8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E0        sz						*/
+     0x3F, 0x59, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*E8*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F0*/
+     0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
+ /*F8*/
+     0x90, 0x3F, 0x3F, 0x3F, 0x3F, 0xEA, 0x3F, 0xFF
+};
+
+/*
+ * EBCDIC 500 -> ASCII (IBM PC 437)
+ */
+__u8 _ebcasc_500[256] =
+{
+ /* 0x00   NUL   SOH   STX   ETX  *SEL    HT  *RNL   DEL */
+          0x00, 0x01, 0x02, 0x03, 0x07, 0x09, 0x07, 0x7F,
+ /* 0x08   -GE  -SPS  -RPT    VT    FF    CR    SO    SI */
+          0x07, 0x07, 0x07, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ /* 0x10   DLE   DC1   DC2   DC3  -RES   -NL    BS  -POC
+                                  -ENP  ->LF             */
+          0x10, 0x11, 0x12, 0x13, 0x07, 0x0A, 0x08, 0x07,
+ /* 0x18   CAN    EM  -UBS  -CU1  -IFS  -IGS  -IRS  -ITB
+                                                    -IUS */
+          0x18, 0x19, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x20   -DS  -SOS    FS  -WUS  -BYP    LF   ETB   ESC
+                                  -INP                   */
+          0x07, 0x07, 0x1C, 0x07, 0x07, 0x0A, 0x17, 0x1B,
+ /* 0x28   -SA  -SFE   -SM  -CSP  -MFA   ENQ   ACK   BEL
+                       -SW                               */ 
+          0x07, 0x07, 0x07, 0x07, 0x07, 0x05, 0x06, 0x07,
+ /* 0x30  ----  ----   SYN   -IR   -PP  -TRN  -NBS   EOT */
+          0x07, 0x07, 0x16, 0x07, 0x07, 0x07, 0x07, 0x04,
+ /* 0x38  -SBS   -IT  -RFF  -CU3   DC4   NAK  ----   SUB */
+          0x07, 0x07, 0x07, 0x07, 0x14, 0x15, 0x07, 0x1A,
+ /* 0x40    SP   RSP           ä              ----       */
+          0x20, 0xFF, 0x83, 0x84, 0x85, 0xA0, 0x07, 0x86,
+ /* 0x48                 [     .     <     (     +     ! */
+          0x87, 0xA4, 0x5B, 0x2E, 0x3C, 0x28, 0x2B, 0x21,
+ /* 0x50     &                                      ---- */
+          0x26, 0x82, 0x88, 0x89, 0x8A, 0xA1, 0x8C, 0x07,
+ /* 0x58           ß     ]     $     *     )     ;     ^ */
+          0x8D, 0xE1, 0x5D, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ /* 0x60     -     /  ----     Ä  ----  ----  ----       */
+          0x2D, 0x2F, 0x07, 0x8E, 0x07, 0x07, 0x07, 0x8F,
+ /* 0x68              ----     ,     %     _     >     ? */ 
+          0x80, 0xA5, 0x07, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ /* 0x70  ----        ----  ----  ----  ----  ----  ---- */
+          0x07, 0x90, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
+ /* 0x78     *     `     :     #     @     '     =     " */
+          0x70, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ /* 0x80     *     a     b     c     d     e     f     g */
+          0x07, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ /* 0x88     h     i              ----  ----  ----       */
+          0x68, 0x69, 0xAE, 0xAF, 0x07, 0x07, 0x07, 0xF1,
+ /* 0x90     °     j     k     l     m     n     o     p */
+          0xF8, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ /* 0x98     q     r                    ----        ---- */
+          0x71, 0x72, 0xA6, 0xA7, 0x91, 0x07, 0x92, 0x07,
+ /* 0xA0           ~     s     t     u     v     w     x */
+          0xE6, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ /* 0xA8     y     z              ----  ----  ----  ---- */
+          0x79, 0x7A, 0xAD, 0xAB, 0x07, 0x07, 0x07, 0x07,
+ /* 0xB0                          ----     §  ----       */
+          0x9B, 0x9C, 0x9D, 0xFA, 0x07, 0x07, 0x07, 0xAC,
+ /* 0xB8        ----           |  ----  ----  ----  ---- */
+          0xAB, 0x07, 0xAA, 0x7C, 0x07, 0x07, 0x07, 0x07,
+ /* 0xC0     {     A     B     C     D     E     F     G */
+          0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ /* 0xC8     H     I  ----           ö              ---- */
+          0x48, 0x49, 0x07, 0x93, 0x94, 0x95, 0xA2, 0x07,
+ /* 0xD0     }     J     K     L     M     N     O     P */
+          0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ /* 0xD8     Q     R  ----           ü                   */
+          0x51, 0x52, 0x07, 0x96, 0x81, 0x97, 0xA3, 0x98,
+ /* 0xE0     \           S     T     U     V     W     X */
+          0x5C, 0xF6, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ /* 0xE8     Y     Z        ----     Ö  ----  ----  ---- */
+          0x59, 0x5A, 0xFD, 0x07, 0x99, 0x07, 0x07, 0x07,
+ /* 0xF0     0     1     2     3     4     5     6     7 */
+          0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ /* 0xF8     8     9  ----  ----     Ü  ----  ----  ---- */
+          0x38, 0x39, 0x07, 0x07, 0x9A, 0x07, 0x07, 0x07
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from upper to lower case
+ */
+__u8 _ebc_tolower[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+	0x48, 0x49, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+	0x58, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9C, 0x9F,
+	0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xAA, 0xAB, 0x8C, 0x8D, 0x8E, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+	0x88, 0x89, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xD0, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+	0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
+	0xE0, 0xE1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
+	0xA8, 0xA9, 0xEA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xDB, 0xDC, 0xDD, 0xDE, 0xFF
+};
+
+
+/*
+ * EBCDIC 037/500 conversion table:
+ * from lower to upper case
+ */
+__u8 _ebc_toupper[256] =
+{
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+	0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
+	0x40, 0x41, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
+	0x50, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+	0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+	0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
+	0x80, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0x8A, 0x8B, 0xAC, 0xAD, 0xAE, 0x8F,
+	0x90, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0x9A, 0x9B, 0x9E, 0x9D, 0x9E, 0x9F,
+	0xA0, 0xA1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
+	0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
+	0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
+	0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
+	0xC8, 0xC9, 0xCA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
+	0xD8, 0xD9, 0xDA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
+	0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
+	0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
+	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
+	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
+};
+
+EXPORT_SYMBOL(_ascebc_500);
+EXPORT_SYMBOL(_ebcasc_500);
+EXPORT_SYMBOL(_ascebc);
+EXPORT_SYMBOL(_ebcasc);
+EXPORT_SYMBOL(_ebc_tolower);
+EXPORT_SYMBOL(_ebc_toupper);
+
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
new file mode 100644
index 0000000000..49a11f6dd7
--- /dev/null
+++ b/arch/s390/kernel/entry.S
@@ -0,0 +1,669 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    S390 low-level entry points.
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *		 Hartmut Penner (hp@de.ibm.com),
+ *		 Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/alternative-asm.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/dwarf.h>
+#include <asm/errno.h>
+#include <asm/ptrace.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+#include <asm/irq.h>
+#include <asm/vx-insn.h>
+#include <asm/setup.h>
+#include <asm/nmi.h>
+#include <asm/nospec-insn.h>
+
+_LPP_OFFSET	= __LC_LPP
+
+	.macro STBEAR address
+	ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
+	.endm
+
+	.macro LBEAR address
+	ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
+	.endm
+
+	.macro LPSWEY address,lpswe
+	ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
+	.endm
+
+	.macro MBEAR reg
+	ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+	.endm
+
+	.macro	CHECK_STACK savearea
+#ifdef CONFIG_CHECK_STACK
+	tml	%r15,THREAD_SIZE - CONFIG_STACK_GUARD
+	lghi	%r14,\savearea
+	jz	stack_overflow
+#endif
+	.endm
+
+	.macro	CHECK_VMAP_STACK savearea,oklabel
+#ifdef CONFIG_VMAP_STACK
+	lgr	%r14,%r15
+	nill	%r14,0x10000 - THREAD_SIZE
+	oill	%r14,STACK_INIT_OFFSET
+	clg	%r14,__LC_KERNEL_STACK
+	je	\oklabel
+	clg	%r14,__LC_ASYNC_STACK
+	je	\oklabel
+	clg	%r14,__LC_MCCK_STACK
+	je	\oklabel
+	clg	%r14,__LC_NODAT_STACK
+	je	\oklabel
+	clg	%r14,__LC_RESTART_STACK
+	je	\oklabel
+	lghi	%r14,\savearea
+	j	stack_overflow
+#else
+	j	\oklabel
+#endif
+	.endm
+
+	/*
+	 * The TSTMSK macro generates a test-under-mask instruction by
+	 * calculating the memory offset for the specified mask value.
+	 * Mask value can be any constant.  The macro shifts the mask
+	 * value to calculate the memory offset for the test-under-mask
+	 * instruction.
+	 */
+	.macro TSTMSK addr, mask, size=8, bytepos=0
+		.if (\bytepos < \size) && (\mask >> 8)
+			.if (\mask & 0xff)
+				.error "Mask exceeds byte boundary"
+			.endif
+			TSTMSK \addr, "(\mask >> 8)", \size, "(\bytepos + 1)"
+			.exitm
+		.endif
+		.ifeq \mask
+			.error "Mask must not be zero"
+		.endif
+		off = \size - \bytepos - 1
+		tm	off+\addr, \mask
+	.endm
+
+	.macro BPOFF
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
+	.endm
+
+	.macro BPON
+	ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
+	.endm
+
+	.macro BPENTER tif_ptr,tif_mask
+	ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
+		    "j .+12; nop; nop", 82
+	.endm
+
+	.macro BPEXIT tif_ptr,tif_mask
+	TSTMSK	\tif_ptr,\tif_mask
+	ALTERNATIVE "jz .+8;  .insn rrf,0xb2e80000,0,0,12,0", \
+		    "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
+	.endm
+
+#if IS_ENABLED(CONFIG_KVM)
+	/*
+	 * The OUTSIDE macro jumps to the provided label in case the value
+	 * in the provided register is outside of the provided range. The
+	 * macro is useful for checking whether a PSW stored in a register
+	 * pair points inside or outside of a block of instructions.
+	 * @reg: register to check
+	 * @start: start of the range
+	 * @end: end of the range
+	 * @outside_label: jump here if @reg is outside of [@start..@end)
+	 */
+	.macro OUTSIDE reg,start,end,outside_label
+	lgr	%r14,\reg
+	larl	%r13,\start
+	slgr	%r14,%r13
+	clgfrl	%r14,.Lrange_size\@
+	jhe	\outside_label
+	.section .rodata, "a"
+	.balign 4
+.Lrange_size\@:
+	.long	\end - \start
+	.previous
+	.endm
+
+	.macro SIEEXIT
+	lg	%r9,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	ni	__SIE_PROG0C+3(%r9),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+	larl	%r9,sie_exit			# skip forward to sie_exit
+	.endm
+#endif
+
+	.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	brasl	%r14,stackleak_erase_on_task_stack
+#endif
+	.endm
+
+	GEN_BR_THUNK %r14
+
+	.section .kprobes.text, "ax"
+.Ldummy:
+	/*
+	 * The following nop exists only in order to avoid that the next
+	 * symbol starts at the beginning of the kprobes text section.
+	 * In that case there would be several symbols at the same address.
+	 * E.g. objdump would take an arbitrary symbol when disassembling
+	 * the code.
+	 * With the added nop in between this cannot happen.
+	 */
+	nop	0
+
+/*
+ * Scheduler resume function, called by switch_to
+ *  gpr2 = (task_struct *) prev
+ *  gpr3 = (task_struct *) next
+ * Returns:
+ *  gpr2 = prev
+ */
+SYM_FUNC_START(__switch_to)
+	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	lghi	%r4,__TASK_stack
+	lghi	%r1,__TASK_thread
+	llill	%r5,STACK_INIT_OFFSET
+	stg	%r15,__THREAD_ksp(%r1,%r2)	# store kernel stack of prev
+	lg	%r15,0(%r4,%r3)			# start of kernel stack of next
+	agr	%r15,%r5			# end of kernel stack of next
+	stg	%r3,__LC_CURRENT		# store task struct of next
+	stg	%r15,__LC_KERNEL_STACK		# store end of kernel stack
+	lg	%r15,__THREAD_ksp(%r1,%r3)	# load kernel stack of next
+	aghi	%r3,__TASK_pid
+	mvc	__LC_CURRENT_PID(4,%r0),0(%r3)	# store pid of next
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+	BR_EX	%r14
+SYM_FUNC_END(__switch_to)
+
+#if IS_ENABLED(CONFIG_KVM)
+/*
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
+ */
+SYM_FUNC_START(__sie64a)
+	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
+	lg	%r12,__LC_CURRENT
+	stg	%r2,__SF_SIE_CONTROL_PHYS(%r15)	# save sie block physical..
+	stg	%r3,__SF_SIE_CONTROL(%r15)	# ...and virtual addresses
+	stg	%r4,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
+	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
+	lmg	%r0,%r13,0(%r4)			# load guest gprs 0-13
+	lg	%r14,__LC_GMAP			# get gmap pointer
+	ltgr	%r14,%r14
+	jz	.Lsie_gmap
+	lctlg	%c1,%c1,__GMAP_ASCE(%r14)	# load primary asce
+.Lsie_gmap:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	oi	__SIE_PROG0C+3(%r14),1		# we are going into SIE now
+	tm	__SIE_PROG20+3(%r14),3		# last exit...
+	jnz	.Lsie_skip
+	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
+	jo	.Lsie_skip			# exit if fp/vx regs changed
+	lg	%r14,__SF_SIE_CONTROL_PHYS(%r15)	# get sie block phys addr
+	BPEXIT	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_entry:
+	sie	0(%r14)
+# Let the next instruction be NOP to avoid triggering a machine check
+# and handling it in a guest as result of the instruction execution.
+	nopr	7
+.Lsie_leave:
+	BPOFF
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+.Lsie_skip:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
+	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
+.Lsie_done:
+# some program checks are suppressing. C code (e.g. do_protection_exception)
+# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
+# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
+# Other instructions between __sie64a and .Lsie_done should not cause program
+# interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
+.Lrewind_pad6:
+	nopr	7
+.Lrewind_pad4:
+	nopr	7
+.Lrewind_pad2:
+	nopr	7
+SYM_INNER_LABEL(sie_exit, SYM_L_GLOBAL)
+	lg	%r14,__SF_SIE_SAVEAREA(%r15)	# load guest register save area
+	stmg	%r0,%r13,0(%r14)		# save guest gprs 0-13
+	xgr	%r0,%r0				# clear guest registers to
+	xgr	%r1,%r1				# prevent speculative use
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	lmg	%r6,%r14,__SF_GPRS(%r15)	# restore kernel registers
+	lg	%r2,__SF_SIE_REASON(%r15)	# return exit reason code
+	BR_EX	%r14
+.Lsie_fault:
+	lghi	%r14,-EFAULT
+	stg	%r14,__SF_SIE_REASON(%r15)	# set exit reason code
+	j	sie_exit
+
+	EX_TABLE(.Lrewind_pad6,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
+	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
+	EX_TABLE(sie_exit,.Lsie_fault)
+SYM_FUNC_END(__sie64a)
+EXPORT_SYMBOL(__sie64a)
+EXPORT_SYMBOL(sie_exit)
+#endif
+
+/*
+ * SVC interrupt handler routine. System calls are synchronous events and
+ * are entered with interrupts disabled.
+ */
+
+SYM_CODE_START(system_call)
+	stpt	__LC_SYS_ENTER_TIMER
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	BPOFF
+	lghi	%r14,0
+.Lsysc_per:
+	STBEAR	__LC_LAST_BREAK
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	lg	%r15,__LC_KERNEL_STACK
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	# clear user controlled register to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r8,%r8
+	xgr	%r9,%r9
+	xgr	%r10,%r10
+	xgr	%r11,%r11
+	la	%r2,STACK_FRAME_OVERHEAD(%r15)	# pointer to pt_regs
+	mvc	__PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
+	MBEAR	%r2
+	lgr	%r3,%r14
+	brasl	%r14,__do_syscall
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	stpt	__LC_EXIT_TIMER
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(system_call)
+
+#
+# a new process exits the kernel with ret_from_fork
+#
+SYM_CODE_START(ret_from_fork)
+	lgr	%r3,%r11
+	brasl	%r14,__ret_from_fork
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	BPON
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	stpt	__LC_EXIT_TIMER
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(ret_from_fork)
+
+/*
+ * Program check handler routine
+ */
+
+SYM_CODE_START(pgm_check_handler)
+	stpt	__LC_SYS_ENTER_TIMER
+	BPOFF
+	stmg	%r8,%r15,__LC_SAVE_AREA_SYNC
+	lghi	%r10,0
+	lmg	%r8,%r9,__LC_PGM_OLD_PSW
+	tmhh	%r8,0x0001		# coming from user space?
+	jno	.Lpgm_skip_asce
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	j	3f			# -> fault in user space
+.Lpgm_skip_asce:
+#if IS_ENABLED(CONFIG_KVM)
+	# cleanup critical section for program checks in __sie64a
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,1f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+	lghi	%r10,_PIF_GUEST_FAULT
+#endif
+1:	tmhh	%r8,0x4000		# PER bit set in old PSW ?
+	jnz	2f			# -> enabled, can't be a double fault
+	tm	__LC_PGM_ILC+3,0x80	# check for per exception
+	jnz	.Lpgm_svcper		# -> single stepped svc
+2:	CHECK_STACK __LC_SAVE_AREA_SYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	# CHECK_VMAP_STACK branches to stack_overflow or 4f
+	CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
+3:	lg	%r15,__LC_KERNEL_STACK
+4:	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stg	%r10,__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+	mvc	__PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
+	stmg	%r8,%r9,__PT_PSW(%r11)
+
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	lgr	%r2,%r11
+	brasl	%r14,__do_pgm_check
+	tmhh	%r8,0x0001		# returning to user space?
+	jno	.Lpgm_exit_kernel
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	BPON
+	stpt	__LC_EXIT_TIMER
+.Lpgm_exit_kernel:
+	mvc	__LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+	LBEAR	STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
+	lmg	%r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+
+#
+# single stepped system call
+#
+.Lpgm_svcper:
+	mvc	__LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
+	larl	%r14,.Lsysc_per
+	stg	%r14,__LC_RETURN_PSW+8
+	lghi	%r14,1
+	LBEAR	__LC_PGM_LAST_BREAK
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
+SYM_CODE_END(pgm_check_handler)
+
+/*
+ * Interrupt handler macro used for external and IO interrupts.
+ */
+.macro INT_HANDLER name,lc_old_psw,handler
+SYM_CODE_START(\name)
+	stckf	__LC_INT_CLOCK
+	stpt	__LC_SYS_ENTER_TIMER
+	STBEAR	__LC_LAST_BREAK
+	BPOFF
+	stmg	%r8,%r15,__LC_SAVE_AREA_ASYNC
+	lmg	%r8,%r9,\lc_old_psw
+	tmhh	%r8,0x0001			# interrupting from user ?
+	jnz	1f
+#if IS_ENABLED(CONFIG_KVM)
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,0f
+	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+#endif
+0:	CHECK_STACK __LC_SAVE_AREA_ASYNC
+	aghi	%r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
+	j	2f
+1:	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	lg	%r15,__LC_KERNEL_STACK
+2:	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r10,%r10
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	mvc	__PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+	MBEAR	%r11
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,\handler
+	mvc	__LC_RETURN_PSW(16),__PT_PSW(%r11)
+	tmhh	%r8,0x0001		# returning to user ?
+	jno	2f
+	STACKLEAK_ERASE
+	lctlg	%c1,%c1,__LC_USER_ASCE
+	BPON
+	stpt	__LC_EXIT_TIMER
+2:	LBEAR	__PT_LAST_BREAK(%r11)
+	lmg	%r0,%r15,__PT_R0(%r11)
+	LPSWEY	__LC_RETURN_PSW,__LC_RETURN_LPSWE
+SYM_CODE_END(\name)
+.endm
+
+INT_HANDLER ext_int_handler,__LC_EXT_OLD_PSW,do_ext_irq
+INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
+
+/*
+ * Load idle PSW.
+ */
+SYM_FUNC_START(psw_idle)
+	stg	%r14,(__SF_GPRS+8*8)(%r15)
+	stg	%r3,__SF_EMPTY(%r15)
+	larl	%r1,psw_idle_exit
+	stg	%r1,__SF_EMPTY+8(%r15)
+	larl	%r1,smp_cpu_mtid
+	llgf	%r1,0(%r1)
+	ltgr	%r1,%r1
+	jz	.Lpsw_idle_stcctm
+	.insn	rsy,0xeb0000000017,%r1,5,__MT_CYCLES_ENTER(%r2)
+.Lpsw_idle_stcctm:
+	oi	__LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
+	BPON
+	stckf	__CLOCK_IDLE_ENTER(%r2)
+	stpt	__TIMER_IDLE_ENTER(%r2)
+	lpswe	__SF_EMPTY(%r15)
+SYM_INNER_LABEL(psw_idle_exit, SYM_L_GLOBAL)
+	BR_EX	%r14
+SYM_FUNC_END(psw_idle)
+
+/*
+ * Machine check handler routines
+ */
+SYM_CODE_START(mcck_int_handler)
+	BPOFF
+	la	%r1,4095		# validate r1
+	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# validate cpu timer
+	LBEAR	__LC_LAST_BREAK_SAVE_AREA-4095(%r1)		# validate bear
+	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA		# validate gprs
+	lmg	%r8,%r9,__LC_MCK_OLD_PSW
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
+	jo	.Lmcck_panic		# yes -> rest of mcck code invalid
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CR_VALID
+	jno	.Lmcck_panic		# control registers invalid -> panic
+	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA		# validate ctl regs
+	ptlb
+	lghi	%r14,__LC_CPU_TIMER_SAVE_AREA
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
+	jo	3f
+	la	%r14,__LC_SYS_ENTER_TIMER
+	clc	0(8,%r14),__LC_EXIT_TIMER
+	jl	1f
+	la	%r14,__LC_EXIT_TIMER
+1:	clc	0(8,%r14),__LC_LAST_UPDATE_TIMER
+	jl	2f
+	la	%r14,__LC_LAST_UPDATE_TIMER
+2:	spt	0(%r14)
+	mvc	__LC_MCCK_ENTER_TIMER(8),0(%r14)
+3:	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
+	jno	.Lmcck_panic
+	tmhh	%r8,0x0001		# interrupting from user ?
+	jnz	.Lmcck_user
+	TSTMSK	__LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
+	jno	.Lmcck_panic
+#if IS_ENABLED(CONFIG_KVM)
+	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,.Lmcck_user
+	OUTSIDE	%r9,.Lsie_entry,.Lsie_leave,4f
+	oi	__LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
+4:	BPENTER	__SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
+	SIEEXIT
+#endif
+.Lmcck_user:
+	lg	%r15,__LC_MCCK_STACK
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stctg	%c1,%c1,__PT_CR1(%r11)
+	lctlg	%c1,%c1,__LC_KERNEL_ASCE
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lghi	%r14,__LC_GPREGS_SAVE_AREA+64
+	stmg	%r0,%r7,__PT_R0(%r11)
+	# clear user controlled registers to prevent speculative use
+	xgr	%r0,%r0
+	xgr	%r1,%r1
+	xgr	%r3,%r3
+	xgr	%r4,%r4
+	xgr	%r5,%r5
+	xgr	%r6,%r6
+	xgr	%r7,%r7
+	xgr	%r10,%r10
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	xc	__PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	brasl	%r14,s390_do_machine_check
+	lctlg	%c1,%c1,__PT_CR1(%r11)
+	lmg	%r0,%r10,__PT_R0(%r11)
+	mvc	__LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
+	tm	__LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
+	jno	0f
+	BPON
+	stpt	__LC_EXIT_TIMER
+0:	ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+	LBEAR	0(%r12)
+	lmg	%r11,%r15,__PT_R11(%r11)
+	LPSWEY	__LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
+
+.Lmcck_panic:
+	/*
+	 * Iterate over all possible CPU addresses in the range 0..0xffff
+	 * and stop each CPU using signal processor. Use compare and swap
+	 * to allow just one CPU-stopper and prevent concurrent CPUs from
+	 * stopping each other while leaving the others running.
+	 */
+	lhi	%r5,0
+	lhi	%r6,1
+	larl	%r7,stop_lock
+	cs	%r5,%r6,0(%r7)		# single CPU-stopper only
+	jnz	4f
+	larl	%r7,this_cpu
+	stap	0(%r7)			# this CPU address
+	lh	%r4,0(%r7)
+	nilh	%r4,0
+	lhi	%r0,1
+	sll	%r0,16			# CPU counter
+	lhi	%r3,0			# next CPU address
+0:	cr	%r3,%r4
+	je	2f
+1:	sigp	%r1,%r3,SIGP_STOP	# stop next CPU
+	brc	SIGP_CC_BUSY,1b
+2:	ahi	%r3,1
+	brct	%r0,0b
+3:	sigp	%r1,%r4,SIGP_STOP	# stop this CPU
+	brc	SIGP_CC_BUSY,3b
+4:	j	4b
+SYM_CODE_END(mcck_int_handler)
+
+SYM_CODE_START(restart_int_handler)
+	ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
+	stg	%r15,__LC_SAVE_AREA_RESTART
+	TSTMSK	__LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
+	jz	0f
+	lctlg	%c0,%c15,__LC_CREGS_SAVE_AREA
+0:	larl	%r15,daton_psw
+	lpswe	0(%r15)				# turn dat on, keep irqs off
+.Ldaton:
+	lg	%r15,__LC_RESTART_STACK
+	xc	STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
+	mvc	STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
+	mvc	STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
+	xc	0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
+	lg	%r1,__LC_RESTART_FN		# load fn, parm & source cpu
+	lg	%r2,__LC_RESTART_DATA
+	lgf	%r3,__LC_RESTART_SOURCE
+	ltgr	%r3,%r3				# test source cpu address
+	jm	1f				# negative -> skip source stop
+0:	sigp	%r4,%r3,SIGP_SENSE		# sigp sense to source cpu
+	brc	10,0b				# wait for status stored
+1:	basr	%r14,%r1			# call function
+	stap	__SF_EMPTY(%r15)		# store cpu address
+	llgh	%r3,__SF_EMPTY(%r15)
+2:	sigp	%r4,%r3,SIGP_STOP		# sigp stop to current cpu
+	brc	2,2b
+3:	j	3b
+SYM_CODE_END(restart_int_handler)
+
+	.section .kprobes.text, "ax"
+
+#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
+/*
+ * The synchronous or the asynchronous stack overflowed. We are dead.
+ * No need to properly save the registers, we are going to panic anyway.
+ * Setup a pt_regs so that show_trace can provide a good call trace.
+ */
+SYM_CODE_START(stack_overflow)
+	lg	%r15,__LC_NODAT_STACK	# change to panic stack
+	la	%r11,STACK_FRAME_OVERHEAD(%r15)
+	stmg	%r0,%r7,__PT_R0(%r11)
+	stmg	%r8,%r9,__PT_PSW(%r11)
+	mvc	__PT_R8(64,%r11),0(%r14)
+	stg	%r10,__PT_ORIG_GPR2(%r11) # store last break to orig_gpr2
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+	lgr	%r2,%r11		# pass pointer to pt_regs
+	jg	kernel_stack_overflow
+SYM_CODE_END(stack_overflow)
+#endif
+
+	.section .data, "aw"
+	.balign	4
+SYM_DATA_LOCAL(stop_lock,	.long 0)
+SYM_DATA_LOCAL(this_cpu,	.short 0)
+	.balign	8
+SYM_DATA_START_LOCAL(daton_psw)
+	.quad	PSW_KERNEL_BITS
+	.quad	.Ldaton
+SYM_DATA_END(daton_psw)
+
+	.section .rodata, "a"
+#define SYSCALL(esame,emu)	.quad __s390x_ ## esame
+SYM_DATA_START(sys_call_table)
+#include "asm/syscall_table.h"
+SYM_DATA_END(sys_call_table)
+#undef SYSCALL
+
+#ifdef CONFIG_COMPAT
+
+#define SYSCALL(esame,emu)	.quad __s390_ ## emu
+SYM_DATA_START(sys_call_table_emu)
+#include "asm/syscall_table.h"
+SYM_DATA_END(sys_call_table_emu)
+#undef SYSCALL
+#endif
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 0000000000..9f41853f36
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/extable.h>
+#include <asm/ptrace.h>
+#include <asm/idle.h>
+
+extern void *restart_stack;
+
+void system_call(void);
+void pgm_check_handler(void);
+void ext_int_handler(void);
+void io_int_handler(void);
+void mcck_int_handler(void);
+void restart_int_handler(void);
+void early_pgm_check_handler(void);
+
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs);
+void __do_pgm_check(struct pt_regs *regs);
+void __do_syscall(struct pt_regs *regs, int per_trap);
+void __do_early_pgm_check(struct pt_regs *regs);
+
+void do_protection_exception(struct pt_regs *regs);
+void do_dat_exception(struct pt_regs *regs);
+void do_secure_storage_access(struct pt_regs *regs);
+void do_non_secure_storage_access(struct pt_regs *regs);
+void do_secure_storage_violation(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
+void kernel_stack_overflow(struct pt_regs * regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+		     struct pt_regs *regs);
+
+void do_io_irq(struct pt_regs *regs);
+void do_ext_irq(struct pt_regs *regs);
+void do_restart(void *arg);
+void __init startup_init(void);
+void die(struct pt_regs *regs, const char *str);
+int setup_profiling_timer(unsigned int multiplier);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
+
+struct s390_mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+
+long sys_rt_sigreturn(void);
+long sys_sigreturn(void);
+
+long sys_s390_personality(unsigned int personality);
+long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
+long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
+long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
+long sys_s390_sthyi(unsigned long function_code, void __user *buffer, u64 __user *return_code, unsigned long flags);
+
+DECLARE_PER_CPU(u64, mt_cycles[8]);
+
+unsigned long stack_alloc(void);
+void stack_free(unsigned long stack);
+
+extern char kprobes_insn_page[];
+
+extern char _samode31[], _eamode31[];
+extern char _stext_amode31[], _etext_amode31[];
+extern struct exception_table_entry _start_amode31_ex_table[];
+extern struct exception_table_entry _stop_amode31_ex_table[];
+
+#define __amode31_data __section(".amode31.data")
+#define __amode31_ref __section(".amode31.refs")
+extern long _start_amode31_refs[], _end_amode31_refs[];
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
new file mode 100644
index 0000000000..4666b29ac8
--- /dev/null
+++ b/arch/s390/kernel/fpu.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * In-kernel vector facility support functions
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <asm/fpu/types.h>
+#include <asm/fpu/api.h>
+#include <asm/vx-insn.h>
+
+void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
+{
+	/*
+	 * Limit the save to the FPU/vector registers already
+	 * in use by the previous context
+	 */
+	flags &= state->mask;
+
+	if (flags & KERNEL_FPC)
+		/* Save floating point control */
+		asm volatile("stfpc %0" : "=Q" (state->fpc));
+
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Save floating-point registers */
+			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		}
+		return;
+	}
+
+	/* Test and save vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be saved and, if so,
+		 * test if all register can be saved.
+		 */
+		"	la	1,%[vxrs]\n"	/* load save area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> save V0..V31 */
+		/*
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vstm V8..V23 is the best instruction
+		 */
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> save V8..V23 */
+		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and save the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> save V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
+		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		/* Test and save the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> save V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
+		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_begin);
+
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
+{
+	/*
+	 * Limit the restore to the FPU/vector registers of the
+	 * previous context that have been overwritte by the
+	 * current context
+	 */
+	flags &= state->mask;
+
+	if (flags & KERNEL_FPC)
+		/* Restore floating-point controls */
+		asm volatile("lfpc %0" : : "Q" (state->fpc));
+
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Restore floating-point registers */
+			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		}
+		return;
+	}
+
+	/* Test and restore (load) vector registers */
+	asm volatile (
+		/*
+		 * Test if any vector register must be loaded and, if so,
+		 * test if all registers can be loaded at once.
+		 */
+		"	la	1,%[vxrs]\n"	/* load restore area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> restore V0..V31 */
+		/*
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vlm V8..V23 is the best instruction
+		 */
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> restore V8..V23 */
+		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and restore the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> restore V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
+		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		/* Test and restore the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> restore V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
+		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
+		: "1", "cc");
+}
+EXPORT_SYMBOL(__kernel_fpu_end);
+
+void __load_fpu_regs(void)
+{
+	struct fpu *state = &current->thread.fpu;
+	unsigned long *regs = current->thread.fpu.regs;
+
+	asm volatile("lfpc %0" : : "Q" (state->fpc));
+	if (likely(MACHINE_HAS_VX)) {
+		asm volatile("lgr	1,%0\n"
+			     "VLM	0,15,0,1\n"
+			     "VLM	16,31,256,1\n"
+			     :
+			     : "d" (regs)
+			     : "1", "cc", "memory");
+	} else {
+		asm volatile("ld 0,%0" : : "Q" (regs[0]));
+		asm volatile("ld 1,%0" : : "Q" (regs[1]));
+		asm volatile("ld 2,%0" : : "Q" (regs[2]));
+		asm volatile("ld 3,%0" : : "Q" (regs[3]));
+		asm volatile("ld 4,%0" : : "Q" (regs[4]));
+		asm volatile("ld 5,%0" : : "Q" (regs[5]));
+		asm volatile("ld 6,%0" : : "Q" (regs[6]));
+		asm volatile("ld 7,%0" : : "Q" (regs[7]));
+		asm volatile("ld 8,%0" : : "Q" (regs[8]));
+		asm volatile("ld 9,%0" : : "Q" (regs[9]));
+		asm volatile("ld 10,%0" : : "Q" (regs[10]));
+		asm volatile("ld 11,%0" : : "Q" (regs[11]));
+		asm volatile("ld 12,%0" : : "Q" (regs[12]));
+		asm volatile("ld 13,%0" : : "Q" (regs[13]));
+		asm volatile("ld 14,%0" : : "Q" (regs[14]));
+		asm volatile("ld 15,%0" : : "Q" (regs[15]));
+	}
+	clear_cpu_flag(CIF_FPU);
+}
+EXPORT_SYMBOL(__load_fpu_regs);
+
+void load_fpu_regs(void)
+{
+	raw_local_irq_disable();
+	__load_fpu_regs();
+	raw_local_irq_enable();
+}
+EXPORT_SYMBOL(load_fpu_regs);
+
+void save_fpu_regs(void)
+{
+	unsigned long flags, *regs;
+	struct fpu *state;
+
+	local_irq_save(flags);
+
+	if (test_cpu_flag(CIF_FPU))
+		goto out;
+
+	state = &current->thread.fpu;
+	regs = current->thread.fpu.regs;
+
+	asm volatile("stfpc %0" : "=Q" (state->fpc));
+	if (likely(MACHINE_HAS_VX)) {
+		asm volatile("lgr	1,%0\n"
+			     "VSTM	0,15,0,1\n"
+			     "VSTM	16,31,256,1\n"
+			     :
+			     : "d" (regs)
+			     : "1", "cc", "memory");
+	} else {
+		asm volatile("std 0,%0" : "=Q" (regs[0]));
+		asm volatile("std 1,%0" : "=Q" (regs[1]));
+		asm volatile("std 2,%0" : "=Q" (regs[2]));
+		asm volatile("std 3,%0" : "=Q" (regs[3]));
+		asm volatile("std 4,%0" : "=Q" (regs[4]));
+		asm volatile("std 5,%0" : "=Q" (regs[5]));
+		asm volatile("std 6,%0" : "=Q" (regs[6]));
+		asm volatile("std 7,%0" : "=Q" (regs[7]));
+		asm volatile("std 8,%0" : "=Q" (regs[8]));
+		asm volatile("std 9,%0" : "=Q" (regs[9]));
+		asm volatile("std 10,%0" : "=Q" (regs[10]));
+		asm volatile("std 11,%0" : "=Q" (regs[11]));
+		asm volatile("std 12,%0" : "=Q" (regs[12]));
+		asm volatile("std 13,%0" : "=Q" (regs[13]));
+		asm volatile("std 14,%0" : "=Q" (regs[14]));
+		asm volatile("std 15,%0" : "=Q" (regs[15]));
+	}
+	set_cpu_flag(CIF_FPU);
+out:
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(save_fpu_regs);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
new file mode 100644
index 0000000000..c46381ea04
--- /dev/null
+++ b/arch/s390/kernel/ftrace.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Dynamic function tracer architecture backend.
+ *
+ * Copyright IBM Corp. 2009,2014
+ *
+ *   Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/moduleloader.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <trace/syscall.h>
+#include <asm/asm-offsets.h>
+#include <asm/text-patching.h>
+#include <asm/cacheflush.h>
+#include <asm/ftrace.lds.h>
+#include <asm/nospec-branch.h>
+#include <asm/set_memory.h>
+#include "entry.h"
+#include "ftrace.h"
+
+/*
+ * To generate function prologue either gcc's hotpatch feature (since gcc 4.8)
+ * or a combination of -pg -mrecord-mcount -mnop-mcount -mfentry flags
+ * (since gcc 9 / clang 10) is used.
+ * In both cases the original and also the disabled function prologue contains
+ * only a single six byte instruction and looks like this:
+ * >	brcl	0,0			# offset 0
+ * To enable ftrace the code gets patched like above and afterwards looks
+ * like this:
+ * >	brasl	%r0,ftrace_caller	# offset 0
+ *
+ * The instruction will be patched by ftrace_make_call / ftrace_make_nop.
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * For module code we can't directly jump to ftrace caller, but need a
+ * trampoline (ftrace_plt), which clobbers also r1.
+ */
+
+void *ftrace_func __read_mostly = ftrace_stub;
+struct ftrace_insn {
+	u16 opc;
+	s32 disp;
+} __packed;
+
+#ifdef CONFIG_MODULES
+static char *ftrace_plt;
+#endif /* CONFIG_MODULES */
+
+static const char *ftrace_shared_hotpatch_trampoline(const char **end)
+{
+	const char *tstart, *tend;
+
+	tstart = ftrace_shared_hotpatch_trampoline_br;
+	tend = ftrace_shared_hotpatch_trampoline_br_end;
+#ifdef CONFIG_EXPOLINE
+	if (!nospec_disable) {
+		tstart = ftrace_shared_hotpatch_trampoline_exrl;
+		tend = ftrace_shared_hotpatch_trampoline_exrl_end;
+	}
+#endif /* CONFIG_EXPOLINE */
+	if (end)
+		*end = tend;
+	return tstart;
+}
+
+bool ftrace_need_init_nop(void)
+{
+	return true;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	static struct ftrace_hotpatch_trampoline *next_vmlinux_trampoline =
+		__ftrace_hotpatch_trampolines_start;
+	static const char orig[6] = { 0xc0, 0x04, 0x00, 0x00, 0x00, 0x00 };
+	static struct ftrace_hotpatch_trampoline *trampoline;
+	struct ftrace_hotpatch_trampoline **next_trampoline;
+	struct ftrace_hotpatch_trampoline *trampolines_end;
+	struct ftrace_hotpatch_trampoline tmp;
+	struct ftrace_insn *insn;
+	const char *shared;
+	s32 disp;
+
+	BUILD_BUG_ON(sizeof(struct ftrace_hotpatch_trampoline) !=
+		     SIZEOF_FTRACE_HOTPATCH_TRAMPOLINE);
+
+	next_trampoline = &next_vmlinux_trampoline;
+	trampolines_end = __ftrace_hotpatch_trampolines_end;
+	shared = ftrace_shared_hotpatch_trampoline(NULL);
+#ifdef CONFIG_MODULES
+	if (mod) {
+		next_trampoline = &mod->arch.next_trampoline;
+		trampolines_end = mod->arch.trampolines_end;
+		shared = ftrace_plt;
+	}
+#endif
+
+	if (WARN_ON_ONCE(*next_trampoline >= trampolines_end))
+		return -ENOMEM;
+	trampoline = (*next_trampoline)++;
+
+	/* Check for the compiler-generated fentry nop (brcl 0, .). */
+	if (WARN_ON_ONCE(memcmp((const void *)rec->ip, &orig, sizeof(orig))))
+		return -EINVAL;
+
+	/* Generate the trampoline. */
+	tmp.brasl_opc = 0xc015; /* brasl %r1, shared */
+	tmp.brasl_disp = (shared - (const char *)&trampoline->brasl_opc) / 2;
+	tmp.interceptor = FTRACE_ADDR;
+	tmp.rest_of_intercepted_function = rec->ip + sizeof(struct ftrace_insn);
+	s390_kernel_write(trampoline, &tmp, sizeof(tmp));
+
+	/* Generate a jump to the trampoline. */
+	disp = ((char *)trampoline - (char *)rec->ip) / 2;
+	insn = (struct ftrace_insn *)rec->ip;
+	s390_kernel_write(&insn->disp, &disp, sizeof(disp));
+
+	return 0;
+}
+
+static struct ftrace_hotpatch_trampoline *ftrace_get_trampoline(struct dyn_ftrace *rec)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+	struct ftrace_insn insn;
+	s64 disp;
+	u16 opc;
+
+	if (copy_from_kernel_nofault(&insn, (void *)rec->ip, sizeof(insn)))
+		return ERR_PTR(-EFAULT);
+	disp = (s64)insn.disp * 2;
+	trampoline = (void *)(rec->ip + disp);
+	if (get_kernel_nofault(opc, &trampoline->brasl_opc))
+		return ERR_PTR(-EFAULT);
+	if (opc != 0xc015)
+		return ERR_PTR(-EINVAL);
+	return trampoline;
+}
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+	u64 old;
+
+	trampoline = ftrace_get_trampoline(rec);
+	if (IS_ERR(trampoline))
+		return PTR_ERR(trampoline);
+	if (get_kernel_nofault(old, &trampoline->interceptor))
+		return -EFAULT;
+	if (old != old_addr)
+		return -EINVAL;
+	s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+	return 0;
+}
+
+static int ftrace_patch_branch_mask(void *addr, u16 expected, bool enable)
+{
+	u16 old;
+	u8 op;
+
+	if (get_kernel_nofault(old, addr))
+		return -EFAULT;
+	if (old != expected)
+		return -EINVAL;
+	/* set mask field to all ones or zeroes */
+	op = enable ? 0xf4 : 0x04;
+	s390_kernel_write((char *)addr + 1, &op, sizeof(op));
+	return 0;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	/* Expect brcl 0xf,... */
+	return ftrace_patch_branch_mask((void *)rec->ip, 0xc0f4, false);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	struct ftrace_hotpatch_trampoline *trampoline;
+
+	trampoline = ftrace_get_trampoline(rec);
+	if (IS_ERR(trampoline))
+		return PTR_ERR(trampoline);
+	s390_kernel_write(&trampoline->interceptor, &addr, sizeof(addr));
+	/* Expect brcl 0x0,... */
+	return ftrace_patch_branch_mask((void *)rec->ip, 0xc004, true);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	ftrace_func = func;
+	return 0;
+}
+
+void arch_ftrace_update_code(int command)
+{
+	ftrace_modify_all_code(command);
+}
+
+void ftrace_arch_code_modify_post_process(void)
+{
+	/*
+	 * Flush any pre-fetched instructions on all
+	 * CPUs to make the new code visible.
+	 */
+	text_poke_sync_lock();
+}
+
+#ifdef CONFIG_MODULES
+
+static int __init ftrace_plt_init(void)
+{
+	const char *start, *end;
+
+	ftrace_plt = module_alloc(PAGE_SIZE);
+	if (!ftrace_plt)
+		panic("cannot allocate ftrace plt\n");
+
+	start = ftrace_shared_hotpatch_trampoline(&end);
+	memcpy(ftrace_plt, start, end - start);
+	set_memory_rox((unsigned long)ftrace_plt, 1);
+	return 0;
+}
+device_initcall(ftrace_plt_init);
+
+#endif /* CONFIG_MODULES */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/*
+ * Hook the return address and push it in the stack of return addresses
+ * in current thread info.
+ */
+unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
+				    unsigned long ip)
+{
+	if (unlikely(ftrace_graph_is_dead()))
+		goto out;
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		goto out;
+	ip -= MCOUNT_INSN_SIZE;
+	if (!function_graph_enter(ra, ip, 0, (void *) sp))
+		ra = (unsigned long) return_to_handler;
+out:
+	return ra;
+}
+NOKPROBE_SYMBOL(prepare_ftrace_return);
+
+/*
+ * Patch the kernel code at ftrace_graph_caller location. The instruction
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
+ */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	int rc;
+
+	/* Expect brc 0xf,... */
+	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa7f4, false);
+	if (rc)
+		return rc;
+	text_poke_sync_lock();
+	return 0;
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	int rc;
+
+	/* Expect brc 0x0,... */
+	rc = ftrace_patch_branch_mask(ftrace_graph_caller, 0xa704, true);
+	if (rc)
+		return rc;
+	text_poke_sync_lock();
+	return 0;
+}
+
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+		struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct pt_regs *regs;
+	struct kprobe *p;
+	int bit;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
+	regs = ftrace_get_regs(fregs);
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (!regs || unlikely(!p) || kprobe_disabled(p))
+		goto out;
+
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+		goto out;
+	}
+
+	__this_cpu_write(current_kprobe, p);
+
+	kcb = get_kprobe_ctlblk();
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	instruction_pointer_set(regs, ip);
+
+	if (!p->pre_handler || !p->pre_handler(p, regs)) {
+
+		instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+		if (unlikely(p->post_handler)) {
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			p->post_handler(p, regs, 0);
+		}
+	}
+	__this_cpu_write(current_kprobe, NULL);
+out:
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	return 0;
+}
+#endif
diff --git a/arch/s390/kernel/ftrace.h b/arch/s390/kernel/ftrace.h
new file mode 100644
index 0000000000..7f75a96164
--- /dev/null
+++ b/arch/s390/kernel/ftrace.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FTRACE_H
+#define _FTRACE_H
+
+#include <asm/types.h>
+
+struct ftrace_hotpatch_trampoline {
+	u16 brasl_opc;
+	s32 brasl_disp;
+	s16: 16;
+	u64 rest_of_intercepted_function;
+	u64 interceptor;
+} __packed;
+
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_start[];
+extern struct ftrace_hotpatch_trampoline __ftrace_hotpatch_trampolines_end[];
+extern const char ftrace_shared_hotpatch_trampoline_br[];
+extern const char ftrace_shared_hotpatch_trampoline_br_end[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl[];
+extern const char ftrace_shared_hotpatch_trampoline_exrl_end[];
+extern const char ftrace_plt_template[];
+extern const char ftrace_plt_template_end[];
+
+#endif /* _FTRACE_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 0000000000..d14dd1c2e5
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void guarded_storage_release(struct task_struct *tsk)
+{
+	kfree(tsk->thread.gs_cb);
+	kfree(tsk->thread.gs_bc_cb);
+}
+
+static int gs_enable(void)
+{
+	struct gs_cb *gs_cb;
+
+	if (!current->thread.gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		gs_cb->gsd = 25;
+		preempt_disable();
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_disable(void)
+{
+	if (current->thread.gs_cb) {
+		preempt_disable();
+		kfree(current->thread.gs_cb);
+		current->thread.gs_cb = NULL;
+		__ctl_clear_bit(2, 4);
+		preempt_enable();
+	}
+	return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	if (!gs_cb) {
+		gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+		if (!gs_cb)
+			return -ENOMEM;
+		current->thread.gs_bc_cb = gs_cb;
+	}
+	if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+		return -EFAULT;
+	return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+	struct gs_cb *gs_cb;
+
+	gs_cb = current->thread.gs_bc_cb;
+	current->thread.gs_bc_cb = NULL;
+	kfree(gs_cb);
+	return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+	struct gs_cb *gs_cb;
+
+	preempt_disable();
+	clear_thread_flag(TIF_GUARDED_STORAGE);
+	gs_cb = current->thread.gs_bc_cb;
+	if (gs_cb) {
+		kfree(current->thread.gs_cb);
+		current->thread.gs_bc_cb = NULL;
+		__ctl_set_bit(2, 4);
+		load_gs_cb(gs_cb);
+		current->thread.gs_cb = gs_cb;
+	}
+	preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+	struct task_struct *sibling;
+
+	read_lock(&tasklist_lock);
+	for_each_thread(current, sibling) {
+		if (!sibling->thread.gs_bc_cb)
+			continue;
+		if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+			kick_process(sibling);
+	}
+	read_unlock(&tasklist_lock);
+	return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+		struct gs_cb __user *, gs_cb)
+{
+	if (!MACHINE_HAS_GS)
+		return -EOPNOTSUPP;
+	switch (command) {
+	case GS_ENABLE:
+		return gs_enable();
+	case GS_DISABLE:
+		return gs_disable();
+	case GS_SET_BC_CB:
+		return gs_set_bc_cb(gs_cb);
+	case GS_CLEAR_BC_CB:
+		return gs_clear_bc_cb();
+	case GS_BROADCAST:
+		return gs_broadcast();
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
new file mode 100644
index 0000000000..45413b04ef
--- /dev/null
+++ b/arch/s390/kernel/head64.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 1999, 2010
+ *
+ *   Author(s):	Hartmut Penner <hp@de.ibm.com>
+ *		Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		Rob van der Heij <rvdhei@iae.nl>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+
+__HEAD
+SYM_CODE_START(startup_continue)
+	larl	%r1,tod_clock_base
+	mvc	0(16,%r1),__LC_BOOT_CLOCK
+#
+# Setup stack
+#
+	larl	%r14,init_task
+	stg	%r14,__LC_CURRENT
+	larl	%r15,init_thread_union+STACK_INIT_OFFSET
+	stg	%r15,__LC_KERNEL_STACK
+	brasl	%r14,sclp_early_adjust_va	# allow sclp_early_printk
+	brasl	%r14,startup_init		# s390 specific early init
+	brasl	%r14,start_kernel		# common init code
+#
+# We returned from start_kernel ?!? PANIK
+#
+	basr	%r13,0
+	lpswe	dw_psw-.(%r13)		# load disabled wait psw
+SYM_CODE_END(startup_continue)
+
+	.balign	16
+SYM_DATA_LOCAL(dw_psw,	.quad 0x0002000180000000,0x0000000000000000)
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 0000000000..e7239aaf42
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <trace/events/power.h>
+#include <asm/cpu_mf.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void account_idle_time_irq(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	unsigned long idle_time;
+	u64 cycles_new[8];
+	int i;
+
+	if (smp_cpu_mtid) {
+		stcctm(MT_DIAG, smp_cpu_mtid, cycles_new);
+		for (i = 0; i < smp_cpu_mtid; i++)
+			this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
+	}
+
+	idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
+
+	S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
+	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
+
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
+	S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
+
+	/* Account time spent with enabled wait psw loaded as idle time. */
+	WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
+	WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
+	account_idle_time(cputime_to_nsecs(idle_time));
+}
+
+void noinstr arch_cpu_idle(void)
+{
+	struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+	unsigned long psw_mask;
+
+	/* Wait for external, I/O or machine check interrupt. */
+	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+		   PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+
+	/* psw_idle() returns with interrupts disabled. */
+	psw_idle(idle, psw_mask);
+}
+
+static ssize_t show_idle_count(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+			      struct device_attribute *attr, char *buf)
+{
+	struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+
+	return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+void arch_cpu_idle_enter(void)
+{
+}
+
+void arch_cpu_idle_exit(void)
+{
+}
+
+void __noreturn arch_cpu_idle_dead(void)
+{
+	cpu_die();
+}
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 0000000000..f3c3e6e1c5
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+	return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+	return NULL;
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
new file mode 100644
index 0000000000..8d0b95c173
--- /dev/null
+++ b/arch/s390/kernel/ipl.c
@@ -0,0 +1,2522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    ipl/reipl/dump support for Linux on s390.
+ *
+ *    Copyright IBM Corp. 2005, 2012
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ *		 Volker Sameske <sameske@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/kstrtox.h>
+#include <linux/panic_notifier.h>
+#include <linux/reboot.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/crash_dump.h>
+#include <linux/debug_locks.h>
+#include <asm/asm-extable.h>
+#include <asm/diag.h>
+#include <asm/ipl.h>
+#include <asm/smp.h>
+#include <asm/setup.h>
+#include <asm/cpcmd.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/checksum.h>
+#include <asm/debug.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include "entry.h"
+
+#define IPL_PARM_BLOCK_VERSION 0
+
+#define IPL_UNKNOWN_STR		"unknown"
+#define IPL_CCW_STR		"ccw"
+#define IPL_ECKD_STR		"eckd"
+#define IPL_ECKD_DUMP_STR	"eckd_dump"
+#define IPL_FCP_STR		"fcp"
+#define IPL_FCP_DUMP_STR	"fcp_dump"
+#define IPL_NVME_STR		"nvme"
+#define IPL_NVME_DUMP_STR	"nvme_dump"
+#define IPL_NSS_STR		"nss"
+
+#define DUMP_CCW_STR		"ccw"
+#define DUMP_ECKD_STR		"eckd"
+#define DUMP_FCP_STR		"fcp"
+#define DUMP_NVME_STR		"nvme"
+#define DUMP_NONE_STR		"none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ * - restart
+ */
+#define ON_PANIC_STR		"on_panic"
+#define ON_HALT_STR		"on_halt"
+#define ON_POFF_STR		"on_poff"
+#define ON_REIPL_STR		"on_reboot"
+#define ON_RESTART_STR		"on_restart"
+
+struct shutdown_action;
+struct shutdown_trigger {
+	char *name;
+	struct shutdown_action *action;
+};
+
+/*
+ * The following shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR		"ipl"
+#define SHUTDOWN_ACTION_REIPL_STR	"reipl"
+#define SHUTDOWN_ACTION_DUMP_STR	"dump"
+#define SHUTDOWN_ACTION_VMCMD_STR	"vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR	"stop"
+#define SHUTDOWN_ACTION_DUMP_REIPL_STR	"dump_reipl"
+
+struct shutdown_action {
+	char *name;
+	void (*fn) (struct shutdown_trigger *trigger);
+	int (*init) (void);
+	int init_rc;
+};
+
+static char *ipl_type_str(enum ipl_type type)
+{
+	switch (type) {
+	case IPL_TYPE_CCW:
+		return IPL_CCW_STR;
+	case IPL_TYPE_ECKD:
+		return IPL_ECKD_STR;
+	case IPL_TYPE_ECKD_DUMP:
+		return IPL_ECKD_DUMP_STR;
+	case IPL_TYPE_FCP:
+		return IPL_FCP_STR;
+	case IPL_TYPE_FCP_DUMP:
+		return IPL_FCP_DUMP_STR;
+	case IPL_TYPE_NSS:
+		return IPL_NSS_STR;
+	case IPL_TYPE_NVME:
+		return IPL_NVME_STR;
+	case IPL_TYPE_NVME_DUMP:
+		return IPL_NVME_DUMP_STR;
+	case IPL_TYPE_UNKNOWN:
+	default:
+		return IPL_UNKNOWN_STR;
+	}
+}
+
+enum dump_type {
+	DUMP_TYPE_NONE	= 1,
+	DUMP_TYPE_CCW	= 2,
+	DUMP_TYPE_FCP	= 4,
+	DUMP_TYPE_NVME	= 8,
+	DUMP_TYPE_ECKD	= 16,
+};
+
+static char *dump_type_str(enum dump_type type)
+{
+	switch (type) {
+	case DUMP_TYPE_NONE:
+		return DUMP_NONE_STR;
+	case DUMP_TYPE_CCW:
+		return DUMP_CCW_STR;
+	case DUMP_TYPE_ECKD:
+		return DUMP_ECKD_STR;
+	case DUMP_TYPE_FCP:
+		return DUMP_FCP_STR;
+	case DUMP_TYPE_NVME:
+		return DUMP_NVME_STR;
+	default:
+		return NULL;
+	}
+}
+
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+static int reipl_capabilities = IPL_TYPE_UNKNOWN;
+
+static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
+static struct ipl_parameter_block *reipl_block_fcp;
+static struct ipl_parameter_block *reipl_block_nvme;
+static struct ipl_parameter_block *reipl_block_ccw;
+static struct ipl_parameter_block *reipl_block_eckd;
+static struct ipl_parameter_block *reipl_block_nss;
+static struct ipl_parameter_block *reipl_block_actual;
+
+static int dump_capabilities = DUMP_TYPE_NONE;
+static enum dump_type dump_type = DUMP_TYPE_NONE;
+static struct ipl_parameter_block *dump_block_fcp;
+static struct ipl_parameter_block *dump_block_nvme;
+static struct ipl_parameter_block *dump_block_ccw;
+static struct ipl_parameter_block *dump_block_eckd;
+
+static struct sclp_ipl_info sclp_ipl_info;
+
+static bool reipl_nvme_clear;
+static bool reipl_fcp_clear;
+static bool reipl_ccw_clear;
+static bool reipl_eckd_clear;
+
+static unsigned long os_info_flags;
+
+static inline int __diag308(unsigned long subcode, unsigned long addr)
+{
+	union register_pair r1;
+
+	r1.even = addr;
+	r1.odd	= 0;
+	asm volatile(
+		"	diag	%[r1],%[subcode],0x308\n"
+		"0:	nopr	%%r7\n"
+		EX_TABLE(0b,0b)
+		: [r1] "+&d" (r1.pair)
+		: [subcode] "d" (subcode)
+		: "cc", "memory");
+	return r1.odd;
+}
+
+int diag308(unsigned long subcode, void *addr)
+{
+	diag_stat_inc(DIAG_STAT_X308);
+	return __diag308(subcode, addr ? virt_to_phys(addr) : 0);
+}
+EXPORT_SYMBOL_GPL(diag308);
+
+/* SYSFS */
+
+#define IPL_ATTR_SHOW_FN(_prefix, _name, _format, args...)		\
+static ssize_t sys_##_prefix##_##_name##_show(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		char *page)						\
+{									\
+	return scnprintf(page, PAGE_SIZE, _format, ##args);		\
+}
+
+#define IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long ssid, devno;					\
+									\
+	if (sscanf(buf, "0.%llx.%llx\n", &ssid, &devno) != 2)		\
+		return -EINVAL;						\
+									\
+	if (ssid > __MAX_SSID || devno > __MAX_SUBCHANNEL)		\
+		return -EINVAL;						\
+									\
+	_ipl_blk.ssid = ssid;						\
+	_ipl_blk.devno = devno;						\
+	return len;							\
+}
+
+#define DEFINE_IPL_CCW_ATTR_RW(_prefix, _name, _ipl_blk)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, "0.%x.%04x\n",				\
+		 _ipl_blk.ssid, _ipl_blk.devno);			\
+IPL_ATTR_CCW_STORE_FN(_prefix, _name, _ipl_blk);			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+	       sys_##_prefix##_##_name##_show,				\
+	       sys_##_prefix##_##_name##_store)				\
+
+#define DEFINE_IPL_ATTR_RO(_prefix, _name, _format, _value)		\
+IPL_ATTR_SHOW_FN(_prefix, _name, _format, _value)			\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0444, sys_##_prefix##_##_name##_show, NULL)
+
+#define DEFINE_IPL_ATTR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)	\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, (unsigned long long) _value)	\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	unsigned long long value;					\
+	if (sscanf(buf, _fmt_in, &value) != 1)				\
+		return -EINVAL;						\
+	_value = value;							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+#define DEFINE_IPL_ATTR_STR_RW(_prefix, _name, _fmt_out, _fmt_in, _value)\
+IPL_ATTR_SHOW_FN(_prefix, _name, _fmt_out, _value)			\
+static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,	\
+		struct kobj_attribute *attr,				\
+		const char *buf, size_t len)				\
+{									\
+	strscpy(_value, buf, sizeof(_value));				\
+	strim(_value);							\
+	return len;							\
+}									\
+static struct kobj_attribute sys_##_prefix##_##_name##_attr =		\
+	__ATTR(_name, 0644,						\
+			sys_##_prefix##_##_name##_show,			\
+			sys_##_prefix##_##_name##_store)
+
+/*
+ * ipl section
+ */
+
+static __init enum ipl_type get_ipl_type(void)
+{
+	if (!ipl_block_valid)
+		return IPL_TYPE_UNKNOWN;
+
+	switch (ipl_block.pb0_hdr.pbt) {
+	case IPL_PBT_CCW:
+		return IPL_TYPE_CCW;
+	case IPL_PBT_FCP:
+		if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
+			return IPL_TYPE_FCP_DUMP;
+		else
+			return IPL_TYPE_FCP;
+	case IPL_PBT_NVME:
+		if (ipl_block.nvme.opt == IPL_PB0_NVME_OPT_DUMP)
+			return IPL_TYPE_NVME_DUMP;
+		else
+			return IPL_TYPE_NVME;
+	case IPL_PBT_ECKD:
+		if (ipl_block.eckd.opt == IPL_PB0_ECKD_OPT_DUMP)
+			return IPL_TYPE_ECKD_DUMP;
+		else
+			return IPL_TYPE_ECKD;
+	}
+	return IPL_TYPE_UNKNOWN;
+}
+
+struct ipl_info ipl_info;
+EXPORT_SYMBOL_GPL(ipl_info);
+
+static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
+			     char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(ipl_info.type));
+}
+
+static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+
+static ssize_t ipl_secure_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%i\n", !!ipl_secure_flag);
+}
+
+static struct kobj_attribute sys_ipl_secure_attr =
+	__ATTR(secure, 0444, ipl_secure_show, NULL);
+
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%i\n", !!sclp.has_sipl);
+}
+
+static struct kobj_attribute sys_ipl_has_secure_attr =
+	__ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *page)
+{
+	char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
+		ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
+	return sprintf(page, "%s\n", parm);
+}
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+	__ATTR(parm, 0444, ipl_vm_parm_show, NULL);
+
+static ssize_t sys_ipl_device_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+			       ipl_block.ccw.devno);
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		return sprintf(page, "0.%x.%04x\n", ipl_block.eckd.ssid,
+			       ipl_block.eckd.devno);
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		return sprintf(page, "%08ux\n", ipl_block.nvme.fid);
+	default:
+		return 0;
+	}
+}
+
+static struct kobj_attribute sys_ipl_device_attr =
+	__ATTR(device, 0444, sys_ipl_device_show, NULL);
+
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, &ipl_block,
+				       ipl_block.hdr.len);
+}
+static struct bin_attribute ipl_parameter_attr =
+	__BIN_ATTR(binary_parameter, 0444, ipl_parameter_read, NULL,
+		   PAGE_SIZE);
+
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.fcp.scp_data_len;
+	void *scp_data = &ipl_block.fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t ipl_nvme_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.nvme.scp_data_len;
+	void *scp_data = &ipl_block.nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t ipl_eckd_scp_data_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr, char *buf,
+				      loff_t off, size_t count)
+{
+	unsigned int size = ipl_block.eckd.scp_data_len;
+	void *scp_data = &ipl_block.eckd.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static struct bin_attribute ipl_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute ipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_nvme_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute ipl_eckd_scp_data_attr =
+	__BIN_ATTR(scp_data, 0444, ipl_eckd_scp_data_read, NULL, PAGE_SIZE);
+
+static struct bin_attribute *ipl_fcp_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_scp_data_attr,
+	NULL,
+};
+
+static struct bin_attribute *ipl_nvme_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_nvme_scp_data_attr,
+	NULL,
+};
+
+static struct bin_attribute *ipl_eckd_bin_attrs[] = {
+	&ipl_parameter_attr,
+	&ipl_eckd_scp_data_attr,
+	NULL,
+};
+
+/* FCP ipl device attributes */
+
+DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
+		   (unsigned long long)ipl_block.fcp.wwpn);
+DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
+		   (unsigned long long)ipl_block.fcp.lun);
+DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.fcp.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.fcp.br_lba);
+
+/* NVMe ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_nvme, fid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.fid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, nsid, "0x%08llx\n",
+		   (unsigned long long)ipl_block.nvme.nsid);
+DEFINE_IPL_ATTR_RO(ipl_nvme, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.bootprog);
+DEFINE_IPL_ATTR_RO(ipl_nvme, br_lba, "%lld\n",
+		   (unsigned long long)ipl_block.nvme.br_lba);
+
+/* ECKD ipl device attributes */
+DEFINE_IPL_ATTR_RO(ipl_eckd, bootprog, "%lld\n",
+		   (unsigned long long)ipl_block.eckd.bootprog);
+
+#define IPL_ATTR_BR_CHR_SHOW_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_show(struct kobject *kobj,		\
+					  struct kobj_attribute *attr,	\
+					  char *buf)			\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+									\
+	if (!ipb->br_chr.cyl &&						\
+	    !ipb->br_chr.head &&					\
+	    !ipb->br_chr.record)					\
+		return sprintf(buf, "auto\n");				\
+									\
+	return sprintf(buf, "0x%x,0x%x,0x%x\n",				\
+			ipb->br_chr.cyl,				\
+			ipb->br_chr.head,				\
+			ipb->br_chr.record);				\
+}
+
+#define IPL_ATTR_BR_CHR_STORE_FN(_name, _ipb)				\
+static ssize_t eckd_##_name##_br_chr_store(struct kobject *kobj,	\
+					   struct kobj_attribute *attr,	\
+					   const char *buf, size_t len)	\
+{									\
+	struct ipl_pb0_eckd *ipb = &(_ipb);				\
+	unsigned long args[3] = { 0 };					\
+	char *p, *p1, *tmp = NULL;					\
+	int i, rc;							\
+									\
+	if (!strncmp(buf, "auto", 4))					\
+		goto out;						\
+									\
+	tmp = kstrdup(buf, GFP_KERNEL);					\
+	p = tmp;							\
+	for (i = 0; i < 3; i++) {					\
+		p1 = strsep(&p, ", ");					\
+		if (!p1) {						\
+			rc = -EINVAL;					\
+			goto err;					\
+		}							\
+		rc = kstrtoul(p1, 0, args + i);				\
+		if (rc)							\
+			goto err;					\
+	}								\
+									\
+	rc = -EINVAL;							\
+	if (i != 3)							\
+		goto err;						\
+									\
+	if ((args[0] || args[1]) && !args[2])				\
+		goto err;						\
+									\
+	if (args[0] > UINT_MAX || args[1] > 255 || args[2] > 255)	\
+		goto err;						\
+									\
+out:									\
+	ipb->br_chr.cyl = args[0];					\
+	ipb->br_chr.head = args[1];					\
+	ipb->br_chr.record = args[2];					\
+	rc = len;							\
+err:									\
+	kfree(tmp);							\
+	return rc;							\
+}
+
+IPL_ATTR_BR_CHR_SHOW_FN(ipl, ipl_block.eckd);
+static struct kobj_attribute sys_ipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_ipl_br_chr_show, NULL);
+
+IPL_ATTR_BR_CHR_SHOW_FN(reipl, reipl_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(reipl, reipl_block_eckd->eckd);
+
+static struct kobj_attribute sys_reipl_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_reipl_br_chr_show, eckd_reipl_br_chr_store);
+
+static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	char loadparm[LOADPARM_LEN + 1] = {};
+
+	if (!sclp_ipl_info.is_valid)
+		return sprintf(page, "#unknown#\n");
+	memcpy(loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	strim(loadparm);
+	return sprintf(page, "%s\n", loadparm);
+}
+
+static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
+	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+
+static struct attribute *ipl_fcp_attrs[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_fcp_wwpn_attr.attr,
+	&sys_ipl_fcp_lun_attr.attr,
+	&sys_ipl_fcp_bootprog_attr.attr,
+	&sys_ipl_fcp_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+	.attrs = ipl_fcp_attrs,
+	.bin_attrs = ipl_fcp_bin_attrs,
+};
+
+static struct attribute *ipl_nvme_attrs[] = {
+	&sys_ipl_nvme_fid_attr.attr,
+	&sys_ipl_nvme_nsid_attr.attr,
+	&sys_ipl_nvme_bootprog_attr.attr,
+	&sys_ipl_nvme_br_lba_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_nvme_attr_group = {
+	.attrs = ipl_nvme_attrs,
+	.bin_attrs = ipl_nvme_bin_attrs,
+};
+
+static struct attribute *ipl_eckd_attrs[] = {
+	&sys_ipl_eckd_bootprog_attr.attr,
+	&sys_ipl_eckd_br_chr_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_eckd_attr_group = {
+	.attrs = ipl_eckd_attrs,
+	.bin_attrs = ipl_eckd_bin_attrs,
+};
+
+/* CCW ipl device attributes */
+
+static struct attribute *ipl_ccw_attrs_vm[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
+	NULL,
+};
+
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+	.attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+	.attrs = ipl_ccw_attrs_lpar
+};
+
+static struct attribute *ipl_common_attrs[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_secure_attr.attr,
+	&sys_ipl_has_secure_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_common_attr_group = {
+	.attrs = ipl_common_attrs,
+};
+
+static struct kset *ipl_kset;
+
+static void __ipl_run(void *unused)
+{
+	diag308(DIAG308_LOAD_CLEAR, NULL);
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__ipl_run, NULL);
+}
+
+static int __init ipl_init(void)
+{
+	int rc;
+
+	ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+	if (!ipl_kset) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+	if (rc)
+		goto out;
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		if (MACHINE_IS_VM)
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_vm);
+		else
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_lpar);
+		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group);
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
+		break;
+	default:
+		break;
+	}
+out:
+	if (rc)
+		panic("ipl_init failed: rc = %i\n", rc);
+
+	return 0;
+}
+
+static struct shutdown_action __refdata ipl_action = {
+	.name	= SHUTDOWN_ACTION_IPL_STR,
+	.fn	= ipl_run,
+	.init	= ipl_init,
+};
+
+/*
+ * reipl shutdown action: Reboot Linux on shutdown.
+ */
+
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+					  char *page)
+{
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	ipl_block_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb);
+	return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+					  size_t vmparm_max,
+					  const char *buf, size_t len)
+{
+	int i, ip_len;
+
+	/* ignore trailing newline */
+	ip_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		ip_len--;
+
+	if (ip_len > vmparm_max)
+		return -EINVAL;
+
+	/* parm is used to store kernel options, check for common chars */
+	for (i = 0; i < ip_len; i++)
+		if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+			return -EINVAL;
+
+	memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+	ipb->ccw.vm_parm_len = ip_len;
+	if (ip_len > 0) {
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+		memcpy(ipb->ccw.vm_parm, buf, ip_len);
+		ASCEBC(ipb->ccw.vm_parm, ip_len);
+	} else {
+		ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
+	}
+
+	return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+	__ATTR(parm, 0644, reipl_nss_vmparm_show,
+	       reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+	__ATTR(parm, 0644, reipl_ccw_vmparm_show,
+	       reipl_ccw_vmparm_store);
+
+/* FCP reipl device attributes */
+
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_fcp->fcp.scp_data_len;
+	void *scp_data = reipl_block_fcp->fcp.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
+	reipl_block_fcp->fcp.scp_data_len = scpdata_len;
+
+	return count;
+}
+static struct bin_attribute sys_reipl_fcp_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_fcp_scpdata_read,
+		   reipl_fcp_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_fcp_bin_attrs[] = {
+	&sys_reipl_fcp_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->fcp.wwpn);
+DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
+		   reipl_block_fcp->fcp.lun);
+DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_fcp->fcp.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_fcp->fcp.br_lba);
+DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   reipl_block_fcp->fcp.devno);
+
+static void reipl_get_ascii_loadparm(char *loadparm,
+				     struct ipl_parameter_block *ibp)
+{
+	memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
+	EBCASC(loadparm, LOADPARM_LEN);
+	loadparm[LOADPARM_LEN] = 0;
+	strim(loadparm);
+}
+
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+					   char *page)
+{
+	char buf[LOADPARM_LEN + 1];
+
+	reipl_get_ascii_loadparm(buf, ipb);
+	return sprintf(page, "%s\n", buf);
+}
+
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+					    const char *buf, size_t len)
+{
+	int i, lp_len;
+
+	/* ignore trailing newline */
+	lp_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		lp_len--;
+	/* loadparm can have max 8 characters and must not start with a blank */
+	if ((lp_len > LOADPARM_LEN) || ((lp_len > 0) && (buf[0] == ' ')))
+		return -EINVAL;
+	/* loadparm can only contain "a-z,A-Z,0-9,SP,." */
+	for (i = 0; i < lp_len; i++) {
+		if (isalpha(buf[i]) || isdigit(buf[i]) || (buf[i] == ' ') ||
+		    (buf[i] == '.'))
+			continue;
+		return -EINVAL;
+	}
+	/* initialize loadparm with blanks */
+	memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
+	/* copy and convert to ebcdic */
+	memcpy(ipb->common.loadparm, buf, lp_len);
+	ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+	ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
+	return len;
+}
+
+#define DEFINE_GENERIC_LOADPARM(name)							\
+static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj,			\
+					    struct kobj_attribute *attr, char *page)	\
+{											\
+	return reipl_generic_loadparm_show(reipl_block_##name, page);			\
+}											\
+static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj,			\
+					     struct kobj_attribute *attr,		\
+					     const char *buf, size_t len)		\
+{											\
+	return reipl_generic_loadparm_store(reipl_block_##name, buf, len);		\
+}											\
+static struct kobj_attribute sys_reipl_##name##_loadparm_attr =				\
+	__ATTR(loadparm, 0644, reipl_##name##_loadparm_show,				\
+	       reipl_##name##_loadparm_store)
+
+DEFINE_GENERIC_LOADPARM(fcp);
+DEFINE_GENERIC_LOADPARM(nvme);
+DEFINE_GENERIC_LOADPARM(ccw);
+DEFINE_GENERIC_LOADPARM(nss);
+DEFINE_GENERIC_LOADPARM(eckd);
+
+static ssize_t reipl_fcp_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_fcp_clear);
+}
+
+static ssize_t reipl_fcp_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_fcp_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct attribute *reipl_fcp_attrs[] = {
+	&sys_reipl_fcp_device_attr.attr,
+	&sys_reipl_fcp_wwpn_attr.attr,
+	&sys_reipl_fcp_lun_attr.attr,
+	&sys_reipl_fcp_bootprog_attr.attr,
+	&sys_reipl_fcp_br_lba_attr.attr,
+	&sys_reipl_fcp_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+	.attrs = reipl_fcp_attrs,
+	.bin_attrs = reipl_fcp_bin_attrs,
+};
+
+static struct kobj_attribute sys_reipl_fcp_clear_attr =
+	__ATTR(clear, 0644, reipl_fcp_clear_show, reipl_fcp_clear_store);
+
+/* NVME reipl device attributes */
+
+static ssize_t reipl_nvme_scpdata_read(struct file *filp, struct kobject *kobj,
+				      struct bin_attribute *attr,
+				      char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_nvme->nvme.scp_data_len;
+	void *scp_data = reipl_block_nvme->nvme.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_nvme_scpdata_write(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_nvme->nvme.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_nvme->nvme.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_nvme->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.len = IPL_BP0_FCP_LEN + scpdata_len;
+	reipl_block_nvme->nvme.scp_data_len = scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_nvme_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_nvme_scpdata_read,
+		   reipl_nvme_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_nvme_bin_attrs[] = {
+	&sys_reipl_nvme_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_ATTR_RW(reipl_nvme, fid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   reipl_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n",
+		   reipl_block_nvme->nvme.br_lba);
+
+static struct attribute *reipl_nvme_attrs[] = {
+	&sys_reipl_nvme_fid_attr.attr,
+	&sys_reipl_nvme_nsid_attr.attr,
+	&sys_reipl_nvme_bootprog_attr.attr,
+	&sys_reipl_nvme_br_lba_attr.attr,
+	&sys_reipl_nvme_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nvme_attr_group = {
+	.attrs = reipl_nvme_attrs,
+	.bin_attrs = reipl_nvme_bin_attrs
+};
+
+static ssize_t reipl_nvme_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_nvme_clear);
+}
+
+static ssize_t reipl_nvme_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_nvme_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nvme_clear_attr =
+	__ATTR(clear, 0644, reipl_nvme_clear_show, reipl_nvme_clear_store);
+
+/* CCW reipl device attributes */
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
+
+static ssize_t reipl_ccw_clear_show(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_ccw_clear);
+}
+
+static ssize_t reipl_ccw_clear_store(struct kobject *kobj,
+				     struct kobj_attribute *attr,
+				     const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_ccw_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_ccw_clear_attr =
+	__ATTR(clear, 0644, reipl_ccw_clear_show, reipl_ccw_clear_store);
+
+static struct attribute *reipl_ccw_attrs_vm[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_vmparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
+	NULL,
+};
+
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_clear_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_lpar,
+};
+
+/* ECKD reipl device attributes */
+
+static ssize_t reipl_eckd_scpdata_read(struct file *filp, struct kobject *kobj,
+				       struct bin_attribute *attr,
+				       char *buf, loff_t off, size_t count)
+{
+	size_t size = reipl_block_eckd->eckd.scp_data_len;
+	void *scp_data = reipl_block_eckd->eckd.scp_data;
+
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
+}
+
+static ssize_t reipl_eckd_scpdata_write(struct file *filp, struct kobject *kobj,
+					struct bin_attribute *attr,
+					char *buf, loff_t off, size_t count)
+{
+	size_t scpdata_len = count;
+	size_t padding;
+
+	if (off)
+		return -EINVAL;
+
+	memcpy(reipl_block_eckd->eckd.scp_data, buf, count);
+	if (scpdata_len % 8) {
+		padding = 8 - (scpdata_len % 8);
+		memset(reipl_block_eckd->eckd.scp_data + scpdata_len,
+		       0, padding);
+		scpdata_len += padding;
+	}
+
+	reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN + scpdata_len;
+	reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN + scpdata_len;
+	reipl_block_eckd->eckd.scp_data_len = scpdata_len;
+
+	return count;
+}
+
+static struct bin_attribute sys_reipl_eckd_scp_data_attr =
+	__BIN_ATTR(scp_data, 0644, reipl_eckd_scpdata_read,
+		   reipl_eckd_scpdata_write, DIAG308_SCPDATA_SIZE);
+
+static struct bin_attribute *reipl_eckd_bin_attrs[] = {
+	&sys_reipl_eckd_scp_data_attr,
+	NULL,
+};
+
+DEFINE_IPL_CCW_ATTR_RW(reipl_eckd, device, reipl_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(reipl_eckd, bootprog, "%lld\n", "%lld\n",
+		   reipl_block_eckd->eckd.bootprog);
+
+static struct attribute *reipl_eckd_attrs[] = {
+	&sys_reipl_eckd_device_attr.attr,
+	&sys_reipl_eckd_bootprog_attr.attr,
+	&sys_reipl_eckd_br_chr_attr.attr,
+	&sys_reipl_eckd_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_eckd_attr_group = {
+	.attrs = reipl_eckd_attrs,
+	.bin_attrs = reipl_eckd_bin_attrs
+};
+
+static ssize_t reipl_eckd_clear_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%u\n", reipl_eckd_clear);
+}
+
+static ssize_t reipl_eckd_clear_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	if (kstrtobool(buf, &reipl_eckd_clear) < 0)
+		return -EINVAL;
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_eckd_clear_attr =
+	__ATTR(clear, 0644, reipl_eckd_clear_show, reipl_eckd_clear_store);
+
+/* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+				     struct ipl_parameter_block *ipb)
+{
+	memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
+	EBCASC(dst, NSS_NAME_SIZE);
+	dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	char nss_name[NSS_NAME_SIZE + 1] = {};
+
+	reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+	return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t len)
+{
+	int nss_len;
+
+	/* ignore trailing newline */
+	nss_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		nss_len--;
+
+	if (nss_len > NSS_NAME_SIZE)
+		return -EINVAL;
+
+	memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
+	if (nss_len > 0) {
+		reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
+		memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+		ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+		EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
+	} else {
+		reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
+	}
+
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+	__ATTR(name, 0644, reipl_nss_name_show,
+	       reipl_nss_name_store);
+
+static struct attribute *reipl_nss_attrs[] = {
+	&sys_reipl_nss_name_attr.attr,
+	&sys_reipl_nss_loadparm_attr.attr,
+	&sys_reipl_nss_vmparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_nss_attr_group = {
+	.name  = IPL_NSS_STR,
+	.attrs = reipl_nss_attrs,
+};
+
+void set_os_info_reipl_block(void)
+{
+	os_info_entry_add(OS_INFO_REIPL_BLOCK, reipl_block_actual,
+			  reipl_block_actual->hdr.len);
+}
+
+/* reipl type */
+
+static int reipl_set_type(enum ipl_type type)
+{
+	if (!(reipl_capabilities & type))
+		return -EINVAL;
+
+	switch(type) {
+	case IPL_TYPE_CCW:
+		reipl_block_actual = reipl_block_ccw;
+		break;
+	case IPL_TYPE_ECKD:
+		reipl_block_actual = reipl_block_eckd;
+		break;
+	case IPL_TYPE_FCP:
+		reipl_block_actual = reipl_block_fcp;
+		break;
+	case IPL_TYPE_NVME:
+		reipl_block_actual = reipl_block_nvme;
+		break;
+	case IPL_TYPE_NSS:
+		reipl_block_actual = reipl_block_nss;
+		break;
+	default:
+		break;
+	}
+	reipl_type = type;
+	return 0;
+}
+
+static ssize_t reipl_type_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", ipl_type_str(reipl_type));
+}
+
+static ssize_t reipl_type_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, IPL_CCW_STR, strlen(IPL_CCW_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_CCW);
+	else if (strncmp(buf, IPL_ECKD_STR, strlen(IPL_ECKD_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_ECKD);
+	else if (strncmp(buf, IPL_FCP_STR, strlen(IPL_FCP_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_FCP);
+	else if (strncmp(buf, IPL_NVME_STR, strlen(IPL_NVME_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NVME);
+	else if (strncmp(buf, IPL_NSS_STR, strlen(IPL_NSS_STR)) == 0)
+		rc = reipl_set_type(IPL_TYPE_NSS);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute reipl_type_attr =
+	__ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+
+static struct kset *reipl_kset;
+static struct kset *reipl_fcp_kset;
+static struct kset *reipl_nvme_kset;
+static struct kset *reipl_eckd_kset;
+
+static void __reipl_run(void *unused)
+{
+	switch (reipl_type) {
+	case IPL_TYPE_CCW:
+		diag308(DIAG308_SET, reipl_block_ccw);
+		if (reipl_ccw_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL_DUMP, NULL);
+		break;
+	case IPL_TYPE_ECKD:
+		diag308(DIAG308_SET, reipl_block_eckd);
+		if (reipl_eckd_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_FCP:
+		diag308(DIAG308_SET, reipl_block_fcp);
+		if (reipl_fcp_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_NVME:
+		diag308(DIAG308_SET, reipl_block_nvme);
+		if (reipl_nvme_clear)
+			diag308(DIAG308_LOAD_CLEAR, NULL);
+		else
+			diag308(DIAG308_LOAD_NORMAL, NULL);
+		break;
+	case IPL_TYPE_NSS:
+		diag308(DIAG308_SET, reipl_block_nss);
+		diag308(DIAG308_LOAD_CLEAR, NULL);
+		break;
+	case IPL_TYPE_UNKNOWN:
+		diag308(DIAG308_LOAD_CLEAR, NULL);
+		break;
+	case IPL_TYPE_FCP_DUMP:
+	case IPL_TYPE_NVME_DUMP:
+	case IPL_TYPE_ECKD_DUMP:
+		break;
+	}
+	disabled_wait();
+}
+
+static void reipl_run(struct shutdown_trigger *trigger)
+{
+	smp_call_ipl_cpu(__reipl_run, NULL);
+}
+
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
+{
+	ipb->hdr.len = IPL_BP_CCW_LEN;
+	ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+	ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+	ipb->pb0_hdr.pbt = IPL_PBT_CCW;
+}
+
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+	/* LOADPARM */
+	/* check if read scp info worked and set loadparm */
+	if (sclp_ipl_info.is_valid)
+		memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+	else
+		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
+		memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+	ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
+
+	/* VM PARM */
+	if (MACHINE_IS_VM && ipl_block_valid &&
+	    (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
+
+		ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+		ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+		memcpy(ipb->ccw.vm_parm,
+		       ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+	}
+}
+
+static int __init reipl_nss_init(void)
+{
+	int rc;
+
+	if (!MACHINE_IS_VM)
+		return 0;
+
+	reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nss)
+		return -ENOMEM;
+
+	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_nss);
+	reipl_capabilities |= IPL_TYPE_NSS;
+	return 0;
+}
+
+static int __init reipl_ccw_init(void)
+{
+	int rc;
+
+	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_ccw)
+		return -ENOMEM;
+
+	rc = sysfs_create_group(&reipl_kset->kobj,
+				MACHINE_IS_VM ? &reipl_ccw_attr_group_vm
+					      : &reipl_ccw_attr_group_lpar);
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_ccw);
+	if (ipl_info.type == IPL_TYPE_CCW) {
+		reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+		reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
+		reipl_block_ccw_fill_parms(reipl_block_ccw);
+	}
+
+	reipl_capabilities |= IPL_TYPE_CCW;
+	return 0;
+}
+
+static int __init reipl_fcp_init(void)
+{
+	int rc;
+
+	reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_fcp)
+		return -ENOMEM;
+
+	/* sysfs: create fcp kset for mixing attr group and bin attrs */
+	reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_fcp_kset) {
+		free_page((unsigned long) reipl_block_fcp);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_fcp_kset->kobj,
+				       &sys_reipl_fcp_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_fcp_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_FCP) {
+		memcpy(reipl_block_fcp, &ipl_block, sizeof(ipl_block));
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the SCSI IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
+		reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+		reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+		reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_FCP;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group);
+out1:
+	kset_unregister(reipl_fcp_kset);
+	free_page((unsigned long) reipl_block_fcp);
+	return rc;
+}
+
+static int __init reipl_nvme_init(void)
+{
+	int rc;
+
+	reipl_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nvme)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_nvme_kset = kset_create_and_add(IPL_NVME_STR, NULL,
+					     &reipl_kset->kobj);
+	if (!reipl_nvme_kset) {
+		free_page((unsigned long) reipl_block_nvme);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_nvme_kset->kobj,
+				       &sys_reipl_nvme_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_nvme_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_NVME) {
+		memcpy(reipl_block_nvme, &ipl_block, sizeof(ipl_block));
+		/*
+		 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+		 * is invalid in the IPL parameter block, so take it
+		 * always from sclp_ipl_info.
+		 */
+		memcpy(reipl_block_nvme->nvme.loadparm, sclp_ipl_info.loadparm,
+		       LOADPARM_LEN);
+	} else {
+		reipl_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+		reipl_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_nvme->nvme.len = IPL_BP0_NVME_LEN;
+		reipl_block_nvme->nvme.pbt = IPL_PBT_NVME;
+		reipl_block_nvme->nvme.opt = IPL_PB0_NVME_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_NVME;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_nvme_kset->kobj, &reipl_nvme_attr_group);
+out1:
+	kset_unregister(reipl_nvme_kset);
+	free_page((unsigned long) reipl_block_nvme);
+	return rc;
+}
+
+static int __init reipl_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp.has_sipl_eckd)
+		return 0;
+
+	reipl_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_eckd)
+		return -ENOMEM;
+
+	/* sysfs: create kset for mixing attr group and bin attrs */
+	reipl_eckd_kset = kset_create_and_add(IPL_ECKD_STR, NULL,
+					      &reipl_kset->kobj);
+	if (!reipl_eckd_kset) {
+		free_page((unsigned long)reipl_block_eckd);
+		return -ENOMEM;
+	}
+
+	rc = sysfs_create_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+	if (rc)
+		goto out1;
+
+	if (test_facility(141)) {
+		rc = sysfs_create_file(&reipl_eckd_kset->kobj,
+				       &sys_reipl_eckd_clear_attr.attr);
+		if (rc)
+			goto out2;
+	} else {
+		reipl_eckd_clear = true;
+	}
+
+	if (ipl_info.type == IPL_TYPE_ECKD) {
+		memcpy(reipl_block_eckd, &ipl_block, sizeof(ipl_block));
+	} else {
+		reipl_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+		reipl_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+		reipl_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+		reipl_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+		reipl_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_IPL;
+	}
+	reipl_capabilities |= IPL_TYPE_ECKD;
+	return 0;
+
+out2:
+	sysfs_remove_group(&reipl_eckd_kset->kobj, &reipl_eckd_attr_group);
+out1:
+	kset_unregister(reipl_eckd_kset);
+	free_page((unsigned long)reipl_block_eckd);
+	return rc;
+}
+
+static int __init reipl_type_init(void)
+{
+	enum ipl_type reipl_type = ipl_info.type;
+	struct ipl_parameter_block *reipl_block;
+	unsigned long size;
+
+	reipl_block = os_info_old_entry(OS_INFO_REIPL_BLOCK, &size);
+	if (!reipl_block)
+		goto out;
+	/*
+	 * If we have an OS info reipl block, this will be used
+	 */
+	if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
+		memcpy(reipl_block_fcp, reipl_block, size);
+		reipl_type = IPL_TYPE_FCP;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_NVME) {
+		memcpy(reipl_block_nvme, reipl_block, size);
+		reipl_type = IPL_TYPE_NVME;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
+		memcpy(reipl_block_ccw, reipl_block, size);
+		reipl_type = IPL_TYPE_CCW;
+	} else if (reipl_block->pb0_hdr.pbt == IPL_PBT_ECKD) {
+		memcpy(reipl_block_eckd, reipl_block, size);
+		reipl_type = IPL_TYPE_ECKD;
+	}
+out:
+	return reipl_set_type(reipl_type);
+}
+
+static int __init reipl_init(void)
+{
+	int rc;
+
+	reipl_kset = kset_create_and_add("reipl", NULL, firmware_kobj);
+	if (!reipl_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&reipl_kset->kobj, &reipl_type_attr.attr);
+	if (rc) {
+		kset_unregister(reipl_kset);
+		return rc;
+	}
+	rc = reipl_ccw_init();
+	if (rc)
+		return rc;
+	rc = reipl_eckd_init();
+	if (rc)
+		return rc;
+	rc = reipl_fcp_init();
+	if (rc)
+		return rc;
+	rc = reipl_nvme_init();
+	if (rc)
+		return rc;
+	rc = reipl_nss_init();
+	if (rc)
+		return rc;
+	return reipl_type_init();
+}
+
+static struct shutdown_action __refdata reipl_action = {
+	.name	= SHUTDOWN_ACTION_REIPL_STR,
+	.fn	= reipl_run,
+	.init	= reipl_init,
+};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
+		   dump_block_fcp->fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+		   dump_block_fcp->fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+		   dump_block_fcp->fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+		   dump_block_fcp->fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+	&sys_dump_fcp_device_attr.attr,
+	&sys_dump_fcp_wwpn_attr.attr,
+	&sys_dump_fcp_lun_attr.attr,
+	&sys_dump_fcp_bootprog_attr.attr,
+	&sys_dump_fcp_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+	.name  = IPL_FCP_STR,
+	.attrs = dump_fcp_attrs,
+};
+
+/* NVME dump device attributes */
+DEFINE_IPL_ATTR_RW(dump_nvme, fid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.fid);
+DEFINE_IPL_ATTR_RW(dump_nvme, nsid, "0x%08llx\n", "%llx\n",
+		   dump_block_nvme->nvme.nsid);
+DEFINE_IPL_ATTR_RW(dump_nvme, bootprog, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.bootprog);
+DEFINE_IPL_ATTR_RW(dump_nvme, br_lba, "%lld\n", "%llx\n",
+		   dump_block_nvme->nvme.br_lba);
+
+static struct attribute *dump_nvme_attrs[] = {
+	&sys_dump_nvme_fid_attr.attr,
+	&sys_dump_nvme_nsid_attr.attr,
+	&sys_dump_nvme_bootprog_attr.attr,
+	&sys_dump_nvme_br_lba_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_nvme_attr_group = {
+	.name  = IPL_NVME_STR,
+	.attrs = dump_nvme_attrs,
+};
+
+/* ECKD dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_eckd, device, dump_block_eckd->eckd);
+DEFINE_IPL_ATTR_RW(dump_eckd, bootprog, "%lld\n", "%llx\n",
+		   dump_block_eckd->eckd.bootprog);
+
+IPL_ATTR_BR_CHR_SHOW_FN(dump, dump_block_eckd->eckd);
+IPL_ATTR_BR_CHR_STORE_FN(dump, dump_block_eckd->eckd);
+
+static struct kobj_attribute sys_dump_eckd_br_chr_attr =
+	__ATTR(br_chr, 0644, eckd_dump_br_chr_show, eckd_dump_br_chr_store);
+
+static struct attribute *dump_eckd_attrs[] = {
+	&sys_dump_eckd_device_attr.attr,
+	&sys_dump_eckd_bootprog_attr.attr,
+	&sys_dump_eckd_br_chr_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_eckd_attr_group = {
+	.name  = IPL_ECKD_STR,
+	.attrs = dump_eckd_attrs,
+};
+
+/* CCW dump device attributes */
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
+
+static struct attribute *dump_ccw_attrs[] = {
+	&sys_dump_ccw_device_attr.attr,
+	NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+	.name  = IPL_CCW_STR,
+	.attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+	if (!(dump_capabilities & type))
+		return -EINVAL;
+	dump_type = type;
+	return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	int rc = -EINVAL;
+
+	if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NONE);
+	else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_CCW);
+	else if (strncmp(buf, DUMP_ECKD_STR, strlen(DUMP_ECKD_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_ECKD);
+	else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_FCP);
+	else if (strncmp(buf, DUMP_NVME_STR, strlen(DUMP_NVME_STR)) == 0)
+		rc = dump_set_type(DUMP_TYPE_NVME);
+	return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+	__ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void diag308_dump(void *dump_block)
+{
+	diag308(DIAG308_SET, dump_block);
+	while (1) {
+		if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
+			break;
+		udelay(USEC_PER_SEC);
+	}
+}
+
+static void __dump_run(void *unused)
+{
+	switch (dump_type) {
+	case DUMP_TYPE_CCW:
+		diag308_dump(dump_block_ccw);
+		break;
+	case DUMP_TYPE_ECKD:
+		diag308_dump(dump_block_eckd);
+		break;
+	case DUMP_TYPE_FCP:
+		diag308_dump(dump_block_fcp);
+		break;
+	case DUMP_TYPE_NVME:
+		diag308_dump(dump_block_nvme);
+		break;
+	default:
+		break;
+	}
+}
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+	if (dump_type == DUMP_TYPE_NONE)
+		return;
+	smp_send_stop();
+	smp_call_ipl_cpu(__dump_run, NULL);
+}
+
+static int __init dump_ccw_init(void)
+{
+	int rc;
+
+	dump_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_ccw)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_ccw_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_ccw);
+		return rc;
+	}
+	dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
+	dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+	dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
+	dump_capabilities |= DUMP_TYPE_CCW;
+	return 0;
+}
+
+static int __init dump_fcp_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_fcp = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_fcp)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_fcp_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_fcp);
+		return rc;
+	}
+	dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
+	dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+	dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+	dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_FCP;
+	return 0;
+}
+
+static int __init dump_nvme_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_nvme = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_nvme)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_nvme_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_nvme);
+		return rc;
+	}
+	dump_block_nvme->hdr.len = IPL_BP_NVME_LEN;
+	dump_block_nvme->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_nvme->fcp.len = IPL_BP0_NVME_LEN;
+	dump_block_nvme->fcp.pbt = IPL_PBT_NVME;
+	dump_block_nvme->fcp.opt = IPL_PB0_NVME_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_NVME;
+	return 0;
+}
+
+static int __init dump_eckd_init(void)
+{
+	int rc;
+
+	if (!sclp_ipl_info.has_dump || !sclp.has_sipl_eckd)
+		return 0; /* LDIPL DUMP is not installed */
+	dump_block_eckd = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!dump_block_eckd)
+		return -ENOMEM;
+	rc = sysfs_create_group(&dump_kset->kobj, &dump_eckd_attr_group);
+	if (rc) {
+		free_page((unsigned long)dump_block_eckd);
+		return rc;
+	}
+	dump_block_eckd->hdr.len = IPL_BP_ECKD_LEN;
+	dump_block_eckd->hdr.version = IPL_PARM_BLOCK_VERSION;
+	dump_block_eckd->eckd.len = IPL_BP0_ECKD_LEN;
+	dump_block_eckd->eckd.pbt = IPL_PBT_ECKD;
+	dump_block_eckd->eckd.opt = IPL_PB0_ECKD_OPT_DUMP;
+	dump_capabilities |= DUMP_TYPE_ECKD;
+	return 0;
+}
+
+static int __init dump_init(void)
+{
+	int rc;
+
+	dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
+	if (!dump_kset)
+		return -ENOMEM;
+	rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
+	if (rc) {
+		kset_unregister(dump_kset);
+		return rc;
+	}
+	rc = dump_ccw_init();
+	if (rc)
+		return rc;
+	rc = dump_eckd_init();
+	if (rc)
+		return rc;
+	rc = dump_fcp_init();
+	if (rc)
+		return rc;
+	rc = dump_nvme_init();
+	if (rc)
+		return rc;
+	dump_set_type(DUMP_TYPE_NONE);
+	return 0;
+}
+
+static struct shutdown_action __refdata dump_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_STR,
+	.fn	= dump_run,
+	.init	= dump_init,
+};
+
+static void dump_reipl_run(struct shutdown_trigger *trigger)
+{
+	struct lowcore *abs_lc;
+	unsigned int csum;
+
+	/*
+	 * Set REIPL_CLEAR flag in os_info flags entry indicating
+	 * 'clear' sysfs attribute has been set on the panicked system
+	 * for specified reipl type.
+	 * Always set for IPL_TYPE_NSS and IPL_TYPE_UNKNOWN.
+	 */
+	if ((reipl_type == IPL_TYPE_CCW && reipl_ccw_clear) ||
+	    (reipl_type == IPL_TYPE_ECKD && reipl_eckd_clear) ||
+	    (reipl_type == IPL_TYPE_FCP && reipl_fcp_clear) ||
+	    (reipl_type == IPL_TYPE_NVME && reipl_nvme_clear) ||
+	    reipl_type == IPL_TYPE_NSS ||
+	    reipl_type == IPL_TYPE_UNKNOWN)
+		os_info_flags |= OS_INFO_FLAG_REIPL_CLEAR;
+	os_info_entry_add(OS_INFO_FLAGS_ENTRY, &os_info_flags, sizeof(os_info_flags));
+	csum = (__force unsigned int)
+	       csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0);
+	abs_lc = get_abs_lowcore();
+	abs_lc->ipib = __pa(reipl_block_actual);
+	abs_lc->ipib_checksum = csum;
+	put_abs_lowcore(abs_lc);
+	dump_run(trigger);
+}
+
+static struct shutdown_action __refdata dump_reipl_action = {
+	.name	= SHUTDOWN_ACTION_DUMP_REIPL_STR,
+	.fn	= dump_reipl_run,
+};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+static char vmcmd_on_restart[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_restart, "%s\n", "%s\n", vmcmd_on_restart);
+
+static struct attribute *vmcmd_attrs[] = {
+	&sys_vmcmd_on_reboot_attr.attr,
+	&sys_vmcmd_on_panic_attr.attr,
+	&sys_vmcmd_on_halt_attr.attr,
+	&sys_vmcmd_on_poff_attr.attr,
+	&sys_vmcmd_on_restart_attr.attr,
+	NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+	.attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
+{
+	char *cmd;
+
+	if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+		cmd = vmcmd_on_reboot;
+	else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+		cmd = vmcmd_on_panic;
+	else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+		cmd = vmcmd_on_halt;
+	else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+		cmd = vmcmd_on_poff;
+	else if (strcmp(trigger->name, ON_RESTART_STR) == 0)
+		cmd = vmcmd_on_restart;
+	else
+		return;
+
+	if (strlen(cmd) == 0)
+		return;
+	__cpcmd(cmd, NULL, 0, NULL);
+}
+
+static int vmcmd_init(void)
+{
+	if (!MACHINE_IS_VM)
+		return -EOPNOTSUPP;
+	vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+	if (!vmcmd_kset)
+		return -ENOMEM;
+	return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+					      vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+	if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
+	    strcmp(trigger->name, ON_RESTART_STR) == 0)
+		disabled_wait();
+	smp_stop_cpu();
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+					     stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+	&ipl_action, &reipl_action, &dump_reipl_action, &dump_action,
+	&vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+		       size_t len)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (sysfs_streq(buf, shutdown_actions_list[i]->name)) {
+			if (shutdown_actions_list[i]->init_rc) {
+				return shutdown_actions_list[i]->init_rc;
+			} else {
+				trigger->action = shutdown_actions_list[i];
+				return len;
+			}
+		}
+	}
+	return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+						    &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+			       struct kobj_attribute *attr,
+			       const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_reboot_trigger, len);
+}
+static struct kobj_attribute on_reboot_attr = __ATTR_RW(on_reboot);
+
+static void do_machine_restart(char *__unused)
+{
+	smp_send_stop();
+	on_reboot_trigger.action->fn(&on_reboot_trigger);
+	reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+			     struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+			      struct kobj_attribute *attr,
+			      const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_panic_trigger, len);
+}
+static struct kobj_attribute on_panic_attr = __ATTR_RW(on_panic);
+
+static void do_panic(void)
+{
+	lgr_info_log();
+	on_panic_trigger.action->fn(&on_panic_trigger);
+	stop_run(&on_panic_trigger);
+}
+
+/* on restart */
+
+static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR,
+	&stop_action};
+
+static ssize_t on_restart_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_restart_trigger.action->name);
+}
+
+static ssize_t on_restart_store(struct kobject *kobj,
+				struct kobj_attribute *attr,
+				const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_restart_trigger, len);
+}
+static struct kobj_attribute on_restart_attr = __ATTR_RW(on_restart);
+
+static void __do_restart(void *ignore)
+{
+	smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+	crash_kexec(NULL);
+#endif
+	on_restart_trigger.action->fn(&on_restart_trigger);
+	stop_run(&on_restart_trigger);
+}
+
+void do_restart(void *arg)
+{
+	tracing_off();
+	debug_locks_off();
+	lgr_info_log();
+	smp_call_online_cpu(__do_restart, arg);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_halt_trigger, len);
+}
+static struct kobj_attribute on_halt_attr = __ATTR_RW(on_halt);
+
+static void do_machine_halt(void)
+{
+	smp_send_stop();
+	on_halt_trigger.action->fn(&on_halt_trigger);
+	stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *page)
+{
+	return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+			     struct kobj_attribute *attr,
+			     const char *buf, size_t len)
+{
+	return set_trigger(buf, &on_poff_trigger, len);
+}
+static struct kobj_attribute on_poff_attr = __ATTR_RW(on_poff);
+
+static void do_machine_power_off(void)
+{
+	smp_send_stop();
+	on_poff_trigger.action->fn(&on_poff_trigger);
+	stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static struct attribute *shutdown_action_attrs[] = {
+	&on_restart_attr.attr,
+	&on_reboot_attr.attr,
+	&on_panic_attr.attr,
+	&on_halt_attr.attr,
+	&on_poff_attr.attr,
+	NULL,
+};
+
+static struct attribute_group shutdown_action_attr_group = {
+	.attrs = shutdown_action_attrs,
+};
+
+static void __init shutdown_triggers_init(void)
+{
+	shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
+						    firmware_kobj);
+	if (!shutdown_actions_kset)
+		goto fail;
+	if (sysfs_create_group(&shutdown_actions_kset->kobj,
+			       &shutdown_action_attr_group))
+		goto fail;
+	return;
+fail:
+	panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+	int i;
+
+	for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+		if (!shutdown_actions_list[i]->init)
+			continue;
+		shutdown_actions_list[i]->init_rc =
+			shutdown_actions_list[i]->init();
+	}
+}
+
+static int __init s390_ipl_init(void)
+{
+	char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
+	sclp_early_get_ipl_info(&sclp_ipl_info);
+	/*
+	 * Fix loadparm: There are systems where the (SCSI) LOADPARM
+	 * returned by read SCP info is invalid (contains EBCDIC blanks)
+	 * when the system has been booted via diag308. In that case we use
+	 * the value from diag308, if available.
+	 *
+	 * There are also systems where diag308 store does not work in
+	 * case the system is booted from HMC. Fortunately in this case
+	 * READ SCP info provides the correct value.
+	 */
+	if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
+		memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
+	shutdown_actions_init();
+	shutdown_triggers_init();
+	return 0;
+}
+
+__initcall(s390_ipl_init);
+
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+	int sx, dx;
+
+	dx = 0;
+	for (sx = 0; src[sx] != 0; sx++) {
+		if (src[sx] == '"')
+			continue;
+		dst[dx++] = src[sx];
+		if (dx >= n)
+			break;
+	}
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+	vmcmd_on_reboot[127] = 0;
+	on_reboot_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_panic, str, 127);
+	vmcmd_on_panic[127] = 0;
+	on_panic_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_halt, str, 127);
+	vmcmd_on_halt[127] = 0;
+	on_halt_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+	if (!MACHINE_IS_VM)
+		return 1;
+	strncpy_skip_quote(vmcmd_on_poff, str, 127);
+	vmcmd_on_poff[127] = 0;
+	on_poff_trigger.action = &vmcmd_action;
+	return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+			   unsigned long event, void *data)
+{
+	do_panic();
+	return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+	.notifier_call = on_panic_notify,
+	.priority = INT_MIN,
+};
+
+void __init setup_ipl(void)
+{
+	BUILD_BUG_ON(sizeof(struct ipl_parameter_block) != PAGE_SIZE);
+
+	ipl_info.type = get_ipl_type();
+	switch (ipl_info.type) {
+	case IPL_TYPE_CCW:
+		ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+		ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
+		break;
+	case IPL_TYPE_ECKD:
+	case IPL_TYPE_ECKD_DUMP:
+		ipl_info.data.eckd.dev_id.ssid = ipl_block.eckd.ssid;
+		ipl_info.data.eckd.dev_id.devno = ipl_block.eckd.devno;
+		break;
+	case IPL_TYPE_FCP:
+	case IPL_TYPE_FCP_DUMP:
+		ipl_info.data.fcp.dev_id.ssid = 0;
+		ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+		ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+		ipl_info.data.fcp.lun = ipl_block.fcp.lun;
+		break;
+	case IPL_TYPE_NVME:
+	case IPL_TYPE_NVME_DUMP:
+		ipl_info.data.nvme.fid = ipl_block.nvme.fid;
+		ipl_info.data.nvme.nsid = ipl_block.nvme.nsid;
+		break;
+	case IPL_TYPE_NSS:
+	case IPL_TYPE_UNKNOWN:
+		/* We have no info to copy */
+		break;
+	}
+	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
+void s390_reset_system(void)
+{
+	/* Disable prefixing */
+	set_prefix(0);
+
+	/* Disable lowcore protection */
+	__ctl_clear_bit(0, 28);
+	diag_amode31_ops.diag308_reset();
+}
+
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+			     unsigned char flags, unsigned short cert)
+{
+	struct ipl_report_component *comp;
+
+	comp = vzalloc(sizeof(*comp));
+	if (!comp)
+		return -ENOMEM;
+	list_add_tail(&comp->list, &report->components);
+
+	comp->entry.addr = kbuf->mem;
+	comp->entry.len = kbuf->memsz;
+	comp->entry.flags = flags;
+	comp->entry.certificate_index = cert;
+
+	report->size += sizeof(comp->entry);
+
+	return 0;
+}
+
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+			       unsigned long addr, unsigned long len)
+{
+	struct ipl_report_certificate *cert;
+
+	cert = vzalloc(sizeof(*cert));
+	if (!cert)
+		return -ENOMEM;
+	list_add_tail(&cert->list, &report->certificates);
+
+	cert->entry.addr = addr;
+	cert->entry.len = len;
+	cert->key = key;
+
+	report->size += sizeof(cert->entry);
+	report->size += cert->entry.len;
+
+	return 0;
+}
+
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
+{
+	struct ipl_report *report;
+
+	report = vzalloc(sizeof(*report));
+	if (!report)
+		return ERR_PTR(-ENOMEM);
+
+	report->ipib = ipib;
+	INIT_LIST_HEAD(&report->components);
+	INIT_LIST_HEAD(&report->certificates);
+
+	report->size = ALIGN(ipib->hdr.len, 8);
+	report->size += sizeof(struct ipl_rl_hdr);
+	report->size += sizeof(struct ipl_rb_components);
+	report->size += sizeof(struct ipl_rb_certificates);
+
+	return report;
+}
+
+void *ipl_report_finish(struct ipl_report *report)
+{
+	struct ipl_report_certificate *cert;
+	struct ipl_report_component *comp;
+	struct ipl_rb_certificates *certs;
+	struct ipl_parameter_block *ipib;
+	struct ipl_rb_components *comps;
+	struct ipl_rl_hdr *rl_hdr;
+	void *buf, *ptr;
+
+	buf = vzalloc(report->size);
+	if (!buf)
+		goto out;
+	ptr = buf;
+
+	memcpy(ptr, report->ipib, report->ipib->hdr.len);
+	ipib = ptr;
+	if (ipl_secure_flag)
+		ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+	ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+	ptr += report->ipib->hdr.len;
+	ptr = PTR_ALIGN(ptr, 8);
+
+	rl_hdr = ptr;
+	ptr += sizeof(*rl_hdr);
+
+	comps = ptr;
+	comps->rbt = IPL_RBT_COMPONENTS;
+	ptr += sizeof(*comps);
+	list_for_each_entry(comp, &report->components, list) {
+		memcpy(ptr, &comp->entry, sizeof(comp->entry));
+		ptr += sizeof(comp->entry);
+	}
+	comps->len = ptr - (void *)comps;
+
+	certs = ptr;
+	certs->rbt = IPL_RBT_CERTIFICATES;
+	ptr += sizeof(*certs);
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, &cert->entry, sizeof(cert->entry));
+		ptr += sizeof(cert->entry);
+	}
+	certs->len = ptr - (void *)certs;
+	rl_hdr->len = ptr - (void *)rl_hdr;
+
+	list_for_each_entry(cert, &report->certificates, list) {
+		memcpy(ptr, cert->key, cert->entry.len);
+		ptr += cert->entry.len;
+	}
+
+	BUG_ON(ptr > buf + report->size);
+out:
+	return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+	struct ipl_report_component *comp, *ncomp;
+	struct ipl_report_certificate *cert, *ncert;
+
+	list_for_each_entry_safe(comp, ncomp, &report->components, list)
+		vfree(comp);
+
+	list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+		vfree(cert);
+
+	vfree(report);
+
+	return 0;
+}
+
+#endif
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
new file mode 100644
index 0000000000..b5245fadcf
--- /dev/null
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/minmax.h>
+#include <linux/string.h>
+#include <asm/ebcdic.h>
+#include <asm/ipl.h>
+
+/* VM IPL PARM routines */
+size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
+				  const struct ipl_parameter_block *ipb)
+{
+	int i;
+	size_t len;
+	char has_lowercase = 0;
+
+	len = 0;
+	if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
+	    (ipb->ccw.vm_parm_len > 0)) {
+
+		len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+		memcpy(dest, ipb->ccw.vm_parm, len);
+		/* If at least one character is lowercase, we assume mixed
+		 * case; otherwise we convert everything to lowercase.
+		 */
+		for (i = 0; i < len; i++)
+			if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+			    (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+			    (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+				has_lowercase = 1;
+				break;
+			}
+		if (!has_lowercase)
+			EBC_TOLOWER(dest, len);
+		EBCASC(dest, len);
+	}
+	dest[len] = 0;
+
+	return len;
+}
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
new file mode 100644
index 0000000000..b020ff17d2
--- /dev/null
+++ b/arch/s390/kernel/irq.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2004, 2011
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Thomas Spatzier <tspat@de.ibm.com>,
+ *
+ * This file contains interrupt related functions.
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/profile.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/irq.h>
+#include <linux/entry-common.h>
+#include <asm/irq_regs.h>
+#include <asm/cputime.h>
+#include <asm/lowcore.h>
+#include <asm/irq.h>
+#include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
+#include <asm/softirq_stack.h>
+#include "entry.h"
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
+EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
+
+struct irq_class {
+	int irq;
+	char *name;
+	char *desc;
+};
+
+/*
+ * The list of "main" irq classes on s390. This is the list of interrupts
+ * that appear both in /proc/stat ("intr" line) and /proc/interrupts.
+ * Historically only external and I/O interrupts have been part of /proc/stat.
+ * We can't add the split external and I/O sub classes since the first field
+ * in the "intr" line in /proc/stat is supposed to be the sum of all other
+ * fields.
+ * Since the external and I/O interrupt fields are already sums we would end
+ * up with having a sum which accounts each interrupt twice.
+ */
+static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
+	{.irq = EXT_INTERRUPT,	.name = "EXT"},
+	{.irq = IO_INTERRUPT,	.name = "I/O"},
+	{.irq = THIN_INTERRUPT, .name = "AIO"},
+};
+
+/*
+ * The list of split external and I/O interrupts that appear only in
+ * /proc/interrupts.
+ * In addition this list contains non external / I/O events like NMIs.
+ */
+static const struct irq_class irqclass_sub_desc[] = {
+	{.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+	{.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+	{.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+	{.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+	{.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+	{.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+	{.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+	{.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+	{.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+	{.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+	{.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+	{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+	{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+	{.irq = IRQIO_CIO,  .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+	{.irq = IRQIO_DAS,  .name = "DAS", .desc = "[I/O] DASD"},
+	{.irq = IRQIO_C15,  .name = "C15", .desc = "[I/O] 3215"},
+	{.irq = IRQIO_C70,  .name = "C70", .desc = "[I/O] 3270"},
+	{.irq = IRQIO_TAP,  .name = "TAP", .desc = "[I/O] Tape"},
+	{.irq = IRQIO_VMR,  .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+	{.irq = IRQIO_LCS,  .name = "LCS", .desc = "[I/O] LCS"},
+	{.irq = IRQIO_CTC,  .name = "CTC", .desc = "[I/O] CTC"},
+	{.irq = IRQIO_ADM,  .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+	{.irq = IRQIO_CSC,  .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+	{.irq = IRQIO_VIR,  .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+	{.irq = IRQIO_QAI,  .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+	{.irq = IRQIO_APB,  .name = "APB", .desc = "[AIO] AP Bus"},
+	{.irq = IRQIO_PCF,  .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+	{.irq = IRQIO_PCD,  .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
+	{.irq = IRQIO_MSI,  .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+	{.irq = IRQIO_VAI,  .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+	{.irq = IRQIO_GAL,  .name = "GAL", .desc = "[AIO] GIB Alert"},
+	{.irq = NMI_NMI,    .name = "NMI", .desc = "[NMI] Machine Check"},
+	{.irq = CPU_RST,    .name = "RST", .desc = "[CPU] CPU Restart"},
+};
+
+static void do_IRQ(struct pt_regs *regs, int irq)
+{
+	if (tod_after_eq(S390_lowcore.int_clock,
+			 S390_lowcore.clock_comparator))
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
+	generic_handle_irq(irq);
+}
+
+static int on_async_stack(void)
+{
+	unsigned long frame = current_frame_address();
+
+	return ((S390_lowcore.async_stack ^ frame) & ~(THREAD_SIZE - 1)) == 0;
+}
+
+static void do_irq_async(struct pt_regs *regs, int irq)
+{
+	if (on_async_stack()) {
+		do_IRQ(regs, irq);
+	} else {
+		call_on_stack(2, S390_lowcore.async_stack, void, do_IRQ,
+			      struct pt_regs *, regs, int, irq);
+	}
+}
+
+static int irq_pending(struct pt_regs *regs)
+{
+	int cc;
+
+	asm volatile("tpi 0\n"
+		     "ipm %0" : "=d" (cc) : : "cc");
+	return cc >> 28;
+}
+
+void noinstr do_io_irq(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	bool from_idle;
+
+	irq_enter_rcu();
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (static_branch_likely(&cpu_has_bear))
+			current->thread.last_break = regs->last_break;
+	}
+
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+	if (from_idle)
+		account_idle_time_irq();
+
+	do {
+		regs->tpi_info = S390_lowcore.tpi_info;
+		if (S390_lowcore.tpi_info.adapter_IO)
+			do_irq_async(regs, THIN_INTERRUPT);
+		else
+			do_irq_async(regs, IO_INTERRUPT);
+	} while (MACHINE_IS_LPAR && irq_pending(regs));
+
+	irq_exit_rcu();
+
+	set_irq_regs(old_regs);
+	irqentry_exit(regs, state);
+
+	if (from_idle)
+		regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+void noinstr do_ext_irq(struct pt_regs *regs)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	bool from_idle;
+
+	irq_enter_rcu();
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (static_branch_likely(&cpu_has_bear))
+			current->thread.last_break = regs->last_break;
+	}
+
+	regs->int_code = S390_lowcore.ext_int_code_addr;
+	regs->int_parm = S390_lowcore.ext_params;
+	regs->int_parm_long = S390_lowcore.ext_params2;
+
+	from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT);
+	if (from_idle)
+		account_idle_time_irq();
+
+	do_irq_async(regs, EXT_INTERRUPT);
+
+	irq_exit_rcu();
+	set_irq_regs(old_regs);
+	irqentry_exit(regs, state);
+
+	if (from_idle)
+		regs->psw.mask &= ~(PSW_MASK_EXT | PSW_MASK_IO | PSW_MASK_WAIT);
+}
+
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+	struct irq_desc *desc;
+	unsigned long flags;
+	int cpu;
+
+	rcu_read_lock();
+	desc = irq_to_desc(irq);
+	if (!desc)
+		goto out;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	seq_printf(p, "%3d: ", irq);
+	for_each_online_cpu(cpu)
+		seq_printf(p, "%10u ", irq_desc_kstat_cpu(desc, cpu));
+
+	if (desc->irq_data.chip)
+		seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+	if (desc->action)
+		seq_printf(p, "  %s", desc->action->name);
+
+	seq_putc(p, '\n');
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+	rcu_read_unlock();
+}
+
+/*
+ * show_interrupts is needed by /proc/interrupts.
+ */
+int show_interrupts(struct seq_file *p, void *v)
+{
+	int index = *(loff_t *) v;
+	int cpu, irq;
+
+	cpus_read_lock();
+	if (index == 0) {
+		seq_puts(p, "           ");
+		for_each_online_cpu(cpu)
+			seq_printf(p, "CPU%-8d", cpu);
+		seq_putc(p, '\n');
+	}
+	if (index < NR_IRQS_BASE) {
+		seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+		irq = irqclass_main_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+		seq_putc(p, '\n');
+		goto out;
+	}
+	if (index < nr_irqs) {
+		show_msi_interrupt(p, index);
+		goto out;
+	}
+	for (index = 0; index < NR_ARCH_IRQS; index++) {
+		seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+		irq = irqclass_sub_desc[index].irq;
+		for_each_online_cpu(cpu)
+			seq_printf(p, "%10u ",
+				   per_cpu(irq_stat, cpu).irqs[irq]);
+		if (irqclass_sub_desc[index].desc)
+			seq_printf(p, "  %s", irqclass_sub_desc[index].desc);
+		seq_putc(p, '\n');
+	}
+out:
+	cpus_read_unlock();
+	return 0;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+	return from < NR_IRQS_BASE ? NR_IRQS_BASE : from;
+}
+
+/*
+ * ext_int_hash[index] is the list head for all external interrupts that hash
+ * to this index.
+ */
+static struct hlist_head ext_int_hash[32] ____cacheline_aligned;
+
+struct ext_int_info {
+	ext_int_handler_t handler;
+	struct hlist_node entry;
+	struct rcu_head rcu;
+	u16 code;
+};
+
+/* ext_int_hash_lock protects the handler lists for external interrupts */
+static DEFINE_SPINLOCK(ext_int_hash_lock);
+
+static inline int ext_hash(u16 code)
+{
+	BUILD_BUG_ON(!is_power_of_2(ARRAY_SIZE(ext_int_hash)));
+
+	return (code + (code >> 9)) & (ARRAY_SIZE(ext_int_hash) - 1);
+}
+
+int register_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index;
+
+	p = kmalloc(sizeof(*p), GFP_ATOMIC);
+	if (!p)
+		return -ENOMEM;
+	p->code = code;
+	p->handler = handler;
+	index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_add_head_rcu(&p->entry, &ext_int_hash[index]);
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(register_external_irq);
+
+int unregister_external_irq(u16 code, ext_int_handler_t handler)
+{
+	struct ext_int_info *p;
+	unsigned long flags;
+	int index = ext_hash(code);
+
+	spin_lock_irqsave(&ext_int_hash_lock, flags);
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (p->code == code && p->handler == handler) {
+			hlist_del_rcu(&p->entry);
+			kfree_rcu(p, rcu);
+		}
+	}
+	spin_unlock_irqrestore(&ext_int_hash_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(unregister_external_irq);
+
+static irqreturn_t do_ext_interrupt(int irq, void *dummy)
+{
+	struct pt_regs *regs = get_irq_regs();
+	struct ext_code ext_code;
+	struct ext_int_info *p;
+	int index;
+
+	ext_code.int_code = regs->int_code;
+	if (ext_code.code != EXT_IRQ_CLK_COMP)
+		set_cpu_flag(CIF_NOHZ_DELAY);
+
+	index = ext_hash(ext_code.code);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(p, &ext_int_hash[index], entry) {
+		if (unlikely(p->code != ext_code.code))
+			continue;
+		p->handler(ext_code, regs->int_parm, regs->int_parm_long);
+	}
+	rcu_read_unlock();
+	return IRQ_HANDLED;
+}
+
+static void __init init_ext_interrupts(void)
+{
+	int idx;
+
+	for (idx = 0; idx < ARRAY_SIZE(ext_int_hash); idx++)
+		INIT_HLIST_HEAD(&ext_int_hash[idx]);
+
+	irq_set_chip_and_handler(EXT_INTERRUPT,
+				 &dummy_irq_chip, handle_percpu_irq);
+	if (request_irq(EXT_INTERRUPT, do_ext_interrupt, 0, "EXT", NULL))
+		panic("Failed to register EXT interrupt\n");
+}
+
+void __init init_IRQ(void)
+{
+	BUILD_BUG_ON(ARRAY_SIZE(irqclass_sub_desc) != NR_ARCH_IRQS);
+	init_cio_interrupts();
+	init_airq_interrupts();
+	init_ext_interrupts();
+}
+
+static DEFINE_SPINLOCK(irq_subclass_lock);
+static unsigned char irq_subclass_refcount[64];
+
+void irq_subclass_register(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	if (!irq_subclass_refcount[subclass])
+		ctl_set_bit(0, subclass);
+	irq_subclass_refcount[subclass]++;
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_register);
+
+void irq_subclass_unregister(enum irq_subclass subclass)
+{
+	spin_lock(&irq_subclass_lock);
+	irq_subclass_refcount[subclass]--;
+	if (!irq_subclass_refcount[subclass])
+		ctl_clear_bit(0, subclass);
+	spin_unlock(&irq_subclass_lock);
+}
+EXPORT_SYMBOL(irq_subclass_unregister);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
new file mode 100644
index 0000000000..e808bb8bc0
--- /dev/null
+++ b/arch/s390/kernel/jump_label.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Jump label s390 support
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+#include <linux/uaccess.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+#include <asm/text-patching.h>
+#include <asm/ipl.h>
+
+struct insn {
+	u16 opcode;
+	s32 offset;
+} __packed;
+
+static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 0,offset */
+	insn->opcode = 0xc004;
+	insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
+
+static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
+{
+	/* brcl 15,offset */
+	insn->opcode = 0xc0f4;
+	insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
+}
+
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+			   struct insn *new)
+{
+	unsigned char *ipc = (unsigned char *)jump_entry_code(entry);
+	unsigned char *ipe = (unsigned char *)expected;
+	unsigned char *ipn = (unsigned char *)new;
+
+	pr_emerg("Jump label code mismatch at %pS [%px]\n", ipc, ipc);
+	pr_emerg("Found:    %6ph\n", ipc);
+	pr_emerg("Expected: %6ph\n", ipe);
+	pr_emerg("New:      %6ph\n", ipn);
+	panic("Corrupted kernel text");
+}
+
+static void jump_label_transform(struct jump_entry *entry,
+				 enum jump_label_type type)
+{
+	void *code = (void *)jump_entry_code(entry);
+	struct insn old, new;
+
+	if (type == JUMP_LABEL_JMP) {
+		jump_label_make_nop(entry, &old);
+		jump_label_make_branch(entry, &new);
+	} else {
+		jump_label_make_branch(entry, &old);
+		jump_label_make_nop(entry, &new);
+	}
+	if (memcmp(code, &old, sizeof(old)))
+		jump_label_bug(entry, &old, &new);
+	s390_kernel_write(code, &new, sizeof(new));
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+			       enum jump_label_type type)
+{
+	jump_label_transform(entry, type);
+	text_poke_sync();
+}
+
+bool arch_jump_label_transform_queue(struct jump_entry *entry,
+				     enum jump_label_type type)
+{
+	jump_label_transform(entry, type);
+	return true;
+}
+
+void arch_jump_label_transform_apply(void)
+{
+	text_poke_sync();
+}
diff --git a/arch/s390/kernel/kdebugfs.c b/arch/s390/kernel/kdebugfs.c
new file mode 100644
index 0000000000..33130c7daf
--- /dev/null
+++ b/arch/s390/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+	arch_debugfs_dir = debugfs_create_dir("s390", NULL);
+	return 0;
+}
+postcore_initcall(arch_kdebugfs_init);
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
new file mode 100644
index 0000000000..9da6fa30c4
--- /dev/null
+++ b/arch/s390/kernel/kexec_elf.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ELF loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_elf(struct kimage *image,
+				     struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	const Elf_Ehdr *ehdr;
+	const Elf_Phdr *phdr;
+	Elf_Addr entry;
+	void *kernel;
+	int i, ret;
+
+	kernel = image->kernel_buf;
+	ehdr = (Elf_Ehdr *)kernel;
+	buf.image = image;
+	if (image->type == KEXEC_TYPE_CRASH)
+		entry = STARTUP_KDUMP_OFFSET;
+	else
+		entry = ehdr->e_entry;
+
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (phdr->p_type != PT_LOAD)
+			continue;
+
+		buf.buffer = kernel + phdr->p_offset;
+		buf.bufsz = phdr->p_filesz;
+
+		buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+		if (image->type == KEXEC_TYPE_CRASH)
+			buf.mem += crashk_res.start;
+		buf.memsz = phdr->p_memsz;
+		data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
+
+		if (entry - phdr->p_paddr < phdr->p_memsz) {
+			data->kernel_buf = buf.buffer;
+			data->kernel_mem = buf.mem;
+			data->parm = buf.buffer + PARMAREA;
+		}
+
+		ipl_report_add_component(data->report, &buf,
+					 IPL_RB_COMPONENT_FLAG_SIGNED |
+					 IPL_RB_COMPONENT_FLAG_VERIFIED,
+					 IPL_RB_CERT_UNKNOWN);
+		ret = kexec_add_buffer(&buf);
+		if (ret)
+			return ret;
+	}
+
+	return data->memsz ? 0 : -EINVAL;
+}
+
+static void *s390_elf_load(struct kimage *image,
+			   char *kernel, unsigned long kernel_len,
+			   char *initrd, unsigned long initrd_len,
+			   char *cmdline, unsigned long cmdline_len)
+{
+	const Elf_Ehdr *ehdr;
+	const Elf_Phdr *phdr;
+	size_t size;
+	int i;
+
+	/* image->fobs->probe already checked for valid ELF magic number. */
+	ehdr = (Elf_Ehdr *)kernel;
+
+	if (ehdr->e_type != ET_EXEC ||
+	    ehdr->e_ident[EI_CLASS] != ELFCLASS64 ||
+	    !elf_check_arch(ehdr))
+		return ERR_PTR(-EINVAL);
+
+	if (!ehdr->e_phnum || ehdr->e_phentsize != sizeof(Elf_Phdr))
+		return ERR_PTR(-EINVAL);
+
+	size = ehdr->e_ehsize + ehdr->e_phoff;
+	size += ehdr->e_phentsize * ehdr->e_phnum;
+	if (size > kernel_len)
+		return ERR_PTR(-EINVAL);
+
+	phdr = (void *)ehdr + ehdr->e_phoff;
+	size = ALIGN(size, phdr->p_align);
+	for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
+		if (phdr->p_type == PT_INTERP)
+			return ERR_PTR(-EINVAL);
+
+		if (phdr->p_offset > kernel_len)
+			return ERR_PTR(-EINVAL);
+
+		size += ALIGN(phdr->p_filesz, phdr->p_align);
+	}
+
+	if (size > kernel_len)
+		return ERR_PTR(-EINVAL);
+
+	return kexec_file_add_components(image, kexec_file_add_kernel_elf);
+}
+
+static int s390_elf_probe(const char *buf, unsigned long len)
+{
+	const Elf_Ehdr *ehdr;
+
+	if (len < sizeof(Elf_Ehdr))
+		return -ENOEXEC;
+
+	ehdr = (Elf_Ehdr *)buf;
+
+	/* Only check the ELF magic number here and do proper validity check
+	 * in the loader. Any check here that fails would send the erroneous
+	 * ELF file to the image loader that does not care what it gets.
+	 * (Most likely) causing behavior not intended by the user.
+	 */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		return -ENOEXEC;
+
+	return 0;
+}
+
+const struct kexec_file_ops s390_kexec_elf_ops = {
+	.probe = s390_elf_probe,
+	.load = s390_elf_load,
+#ifdef CONFIG_KEXEC_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
new file mode 100644
index 0000000000..af23eff577
--- /dev/null
+++ b/arch/s390/kernel/kexec_image.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Image loader for kexec_file_load system call.
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/kexec.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+static int kexec_file_add_kernel_image(struct kimage *image,
+				       struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+
+	buf.image = image;
+
+	buf.buffer = image->kernel_buf;
+	buf.bufsz = image->kernel_buf_len;
+
+	buf.mem = 0;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+	buf.memsz = buf.bufsz;
+
+	data->kernel_buf = image->kernel_buf;
+	data->kernel_mem = buf.mem;
+	data->parm = image->kernel_buf + PARMAREA;
+	data->memsz += buf.memsz;
+
+	ipl_report_add_component(data->report, &buf,
+				 IPL_RB_COMPONENT_FLAG_SIGNED |
+				 IPL_RB_COMPONENT_FLAG_VERIFIED,
+				 IPL_RB_CERT_UNKNOWN);
+	return kexec_add_buffer(&buf);
+}
+
+static void *s390_image_load(struct kimage *image,
+			     char *kernel, unsigned long kernel_len,
+			     char *initrd, unsigned long initrd_len,
+			     char *cmdline, unsigned long cmdline_len)
+{
+	return kexec_file_add_components(image, kexec_file_add_kernel_image);
+}
+
+static int s390_image_probe(const char *buf, unsigned long len)
+{
+	/* Can't reliably tell if an image is valid.  Therefore give the
+	 * user whatever he wants.
+	 */
+	return 0;
+}
+
+const struct kexec_file_ops s390_kexec_image_ops = {
+	.probe = s390_image_probe,
+	.load = s390_image_load,
+#ifdef CONFIG_KEXEC_SIG
+	.verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_SIG */
+};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
new file mode 100644
index 0000000000..d4b863ed0a
--- /dev/null
+++ b/arch/s390/kernel/kprobes.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Kernel Probes (KProbes)
+ *
+ * Copyright IBM Corp. 2002, 2006
+ *
+ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
+ */
+
+#define pr_fmt(fmt) "kprobes: " fmt
+
+#include <linux/moduleloader.h>
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/stop_machine.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <linux/ftrace.h>
+#include <asm/set_memory.h>
+#include <asm/sections.h>
+#include <asm/dis.h>
+#include "kprobes.h"
+#include "entry.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe);
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+struct kretprobe_blackpoint kretprobe_blacklist[] = { };
+
+static int insn_page_in_use;
+
+void *alloc_insn_page(void)
+{
+	void *page;
+
+	page = module_alloc(PAGE_SIZE);
+	if (!page)
+		return NULL;
+	set_memory_rox((unsigned long)page, 1);
+	return page;
+}
+
+static void *alloc_s390_insn_page(void)
+{
+	if (xchg(&insn_page_in_use, 1) == 1)
+		return NULL;
+	return &kprobes_insn_page;
+}
+
+static void free_s390_insn_page(void *page)
+{
+	xchg(&insn_page_in_use, 0);
+}
+
+struct kprobe_insn_cache kprobe_s390_insn_slots = {
+	.mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
+	.alloc = alloc_s390_insn_page,
+	.free = free_s390_insn_page,
+	.pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
+	.insn_size = MAX_INSN_SIZE,
+};
+
+static void copy_instruction(struct kprobe *p)
+{
+	kprobe_opcode_t insn[MAX_INSN_SIZE];
+	s64 disp, new_disp;
+	u64 addr, new_addr;
+	unsigned int len;
+
+	len = insn_length(*p->addr >> 8);
+	memcpy(&insn, p->addr, len);
+	p->opcode = insn[0];
+	if (probe_is_insn_relative_long(&insn[0])) {
+		/*
+		 * For pc-relative instructions in RIL-b or RIL-c format patch
+		 * the RI2 displacement field. We have already made sure that
+		 * the insn slot for the patched instruction is within the same
+		 * 2GB area as the original instruction (either kernel image or
+		 * module area). Therefore the new displacement will always fit.
+		 */
+		disp = *(s32 *)&insn[1];
+		addr = (u64)(unsigned long)p->addr;
+		new_addr = (u64)(unsigned long)p->ainsn.insn;
+		new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+		*(s32 *)&insn[1] = new_disp;
+	}
+	s390_kernel_write(p->ainsn.insn, &insn, len);
+}
+NOKPROBE_SYMBOL(copy_instruction);
+
+static int s390_get_insn_slot(struct kprobe *p)
+{
+	/*
+	 * Get an insn slot that is within the same 2GB area like the original
+	 * instruction. That way instructions with a 32bit signed displacement
+	 * field can be patched and executed within the insn slot.
+	 */
+	p->ainsn.insn = NULL;
+	if (is_kernel((unsigned long)p->addr))
+		p->ainsn.insn = get_s390_insn_slot();
+	else if (is_module_addr(p->addr))
+		p->ainsn.insn = get_insn_slot();
+	return p->ainsn.insn ? 0 : -ENOMEM;
+}
+NOKPROBE_SYMBOL(s390_get_insn_slot);
+
+static void s390_free_insn_slot(struct kprobe *p)
+{
+	if (!p->ainsn.insn)
+		return;
+	if (is_kernel((unsigned long)p->addr))
+		free_s390_insn_slot(p->ainsn.insn, 0);
+	else
+		free_insn_slot(p->ainsn.insn, 0);
+	p->ainsn.insn = NULL;
+}
+NOKPROBE_SYMBOL(s390_free_insn_slot);
+
+/* Check if paddr is at an instruction boundary */
+static bool can_probe(unsigned long paddr)
+{
+	unsigned long addr, offset = 0;
+	kprobe_opcode_t insn;
+	struct kprobe *kp;
+
+	if (paddr & 0x01)
+		return false;
+
+	if (!kallsyms_lookup_size_offset(paddr, NULL, &offset))
+		return false;
+
+	/* Decode instructions */
+	addr = paddr - offset;
+	while (addr < paddr) {
+		if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(insn)))
+			return false;
+
+		if (insn >> 8 == 0) {
+			if (insn != BREAKPOINT_INSTRUCTION) {
+				/*
+				 * Note that QEMU inserts opcode 0x0000 to implement
+				 * software breakpoints for guests. Since the size of
+				 * the original instruction is unknown, stop following
+				 * instructions and prevent setting a kprobe.
+				 */
+				return false;
+			}
+			/*
+			 * Check if the instruction has been modified by another
+			 * kprobe, in which case the original instruction is
+			 * decoded.
+			 */
+			kp = get_kprobe((void *)addr);
+			if (!kp) {
+				/* not a kprobe */
+				return false;
+			}
+			insn = kp->opcode;
+		}
+		addr += insn_length(insn >> 8);
+	}
+	return addr == paddr;
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
+{
+	if (!can_probe((unsigned long)p->addr))
+		return -EINVAL;
+	/* Make sure the probe isn't going on a difficult instruction */
+	if (probe_is_prohibited_opcode(p->addr))
+		return -EINVAL;
+	if (s390_get_insn_slot(p))
+		return -ENOMEM;
+	copy_instruction(p);
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
+
+struct swap_insn_args {
+	struct kprobe *p;
+	unsigned int arm_kprobe : 1;
+};
+
+static int swap_instruction(void *data)
+{
+	struct swap_insn_args *args = data;
+	struct kprobe *p = args->p;
+	u16 opc;
+
+	opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+	s390_kernel_write(p->addr, &opc, sizeof(opc));
+	return 0;
+}
+NOKPROBE_SYMBOL(swap_instruction);
+
+void arch_arm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
+
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
+
+void arch_disarm_kprobe(struct kprobe *p)
+{
+	struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
+
+	stop_machine_cpuslocked(swap_instruction, &args, NULL);
+}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
+
+void arch_remove_kprobe(struct kprobe *p)
+{
+	s390_free_insn_slot(p);
+}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
+
+static void enable_singlestep(struct kprobe_ctlblk *kcb,
+			      struct pt_regs *regs,
+			      unsigned long ip)
+{
+	struct per_regs per_kprobe;
+
+	/* Set up the PER control registers %cr9-%cr11 */
+	per_kprobe.control = PER_EVENT_IFETCH;
+	per_kprobe.start = ip;
+	per_kprobe.end = ip;
+
+	/* Save control regs and psw mask */
+	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
+	kcb->kprobe_saved_imask = regs->psw.mask &
+		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
+
+	/* Set PER control regs, turns on single step for the given address */
+	__ctl_load(per_kprobe, 9, 11);
+	regs->psw.mask |= PSW_MASK_PER;
+	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
+	regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(enable_singlestep);
+
+static void disable_singlestep(struct kprobe_ctlblk *kcb,
+			       struct pt_regs *regs,
+			       unsigned long ip)
+{
+	/* Restore control regs and psw mask, set new psw address */
+	__ctl_load(kcb->kprobe_saved_ctl, 9, 11);
+	regs->psw.mask &= ~PSW_MASK_PER;
+	regs->psw.mask |= kcb->kprobe_saved_imask;
+	regs->psw.addr = ip;
+}
+NOKPROBE_SYMBOL(disable_singlestep);
+
+/*
+ * Activate a kprobe by storing its pointer to current_kprobe. The
+ * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
+ * two kprobes can be active, see KPROBE_REENTER.
+ */
+static void push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
+	kcb->prev_kprobe.status = kcb->kprobe_status;
+	__this_cpu_write(current_kprobe, p);
+}
+NOKPROBE_SYMBOL(push_kprobe);
+
+/*
+ * Deactivate a kprobe by backing up to the previous state. If the
+ * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
+ * for any other state prev_kprobe.kp will be NULL.
+ */
+static void pop_kprobe(struct kprobe_ctlblk *kcb)
+{
+	__this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+	kcb->kprobe_status = kcb->prev_kprobe.status;
+	kcb->prev_kprobe.kp = NULL;
+}
+NOKPROBE_SYMBOL(pop_kprobe);
+
+static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p)
+{
+	switch (kcb->kprobe_status) {
+	case KPROBE_HIT_SSDONE:
+	case KPROBE_HIT_ACTIVE:
+		kprobes_inc_nmissed_count(p);
+		break;
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+	default:
+		/*
+		 * A kprobe on the code path to single step an instruction
+		 * is a BUG. The code path resides in the .kprobes.text
+		 * section and is executed with interrupts disabled.
+		 */
+		pr_err("Failed to recover from reentered kprobes.\n");
+		dump_kprobe(p);
+		BUG();
+	}
+}
+NOKPROBE_SYMBOL(kprobe_reenter_check);
+
+static int kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb;
+	struct kprobe *p;
+
+	/*
+	 * We want to disable preemption for the entire duration of kprobe
+	 * processing. That includes the calls to the pre/post handlers
+	 * and single stepping the kprobe instruction.
+	 */
+	preempt_disable();
+	kcb = get_kprobe_ctlblk();
+	p = get_kprobe((void *)(regs->psw.addr - 2));
+
+	if (p) {
+		if (kprobe_running()) {
+			/*
+			 * We have hit a kprobe while another is still
+			 * active. This can happen in the pre and post
+			 * handler. Single step the instruction of the
+			 * new probe but do not call any handler function
+			 * of this secondary kprobe.
+			 * push_kprobe and pop_kprobe saves and restores
+			 * the currently active kprobe.
+			 */
+			kprobe_reenter_check(kcb, p);
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_REENTER;
+		} else {
+			/*
+			 * If we have no pre-handler or it returned 0, we
+			 * continue with single stepping. If we have a
+			 * pre-handler and it returned non-zero, it prepped
+			 * for changing execution path, so get out doing
+			 * nothing more here.
+			 */
+			push_kprobe(kcb, p);
+			kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+			if (p->pre_handler && p->pre_handler(p, regs)) {
+				pop_kprobe(kcb);
+				preempt_enable_no_resched();
+				return 1;
+			}
+			kcb->kprobe_status = KPROBE_HIT_SS;
+		}
+		enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
+		return 1;
+	} /* else:
+	   * No kprobe at this address and no active kprobe. The trap has
+	   * not been caused by a kprobe breakpoint. The race of breakpoint
+	   * vs. kprobe remove does not exist because on s390 as we use
+	   * stop_machine to arm/disarm the breakpoints.
+	   */
+	preempt_enable_no_resched();
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_handler);
+
+/*
+ * Called after single-stepping.  p->addr is the address of the
+ * instruction whose first byte has been replaced by the "breakpoint"
+ * instruction.  To avoid the SMP problems that can occur when we
+ * temporarily put back the original opcode to single-step, we
+ * single-stepped a copy of the instruction.  The address of this
+ * copy is p->ainsn.insn.
+ */
+static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	unsigned long ip = regs->psw.addr;
+	int fixup = probe_get_fixup_type(p->ainsn.insn);
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
+
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(p->ainsn.insn[0] >> 8);
+		if (ip - (unsigned long) p->ainsn.insn == ilen)
+			ip = (unsigned long) p->addr + ilen;
+	}
+
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
+		regs->gprs[reg] += (unsigned long) p->addr -
+				   (unsigned long) p->ainsn.insn;
+	}
+
+	disable_singlestep(kcb, regs, ip);
+}
+NOKPROBE_SYMBOL(resume_execution);
+
+static int post_kprobe_handler(struct pt_regs *regs)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+
+	if (!p)
+		return 0;
+
+	resume_execution(p, regs);
+	if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	pop_kprobe(kcb);
+	preempt_enable_no_resched();
+
+	/*
+	 * if somebody else is singlestepping across a probe point, psw mask
+	 * will have PER set, in which case, continue the remaining processing
+	 * of do_single_step, as if this is not a probe hit.
+	 */
+	if (regs->psw.mask & PSW_MASK_PER)
+		return 0;
+
+	return 1;
+}
+NOKPROBE_SYMBOL(post_kprobe_handler);
+
+static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
+{
+	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+	struct kprobe *p = kprobe_running();
+
+	switch(kcb->kprobe_status) {
+	case KPROBE_HIT_SS:
+	case KPROBE_REENTER:
+		/*
+		 * We are here because the instruction being single
+		 * stepped caused a page fault. We reset the current
+		 * kprobe and the nip points back to the probe address
+		 * and allow the page fault handler to continue as a
+		 * normal page fault.
+		 */
+		disable_singlestep(kcb, regs, (unsigned long) p->addr);
+		pop_kprobe(kcb);
+		preempt_enable_no_resched();
+		break;
+	case KPROBE_HIT_ACTIVE:
+	case KPROBE_HIT_SSDONE:
+		/*
+		 * In case the user-specified fault handler returned
+		 * zero, try to fix up.
+		 */
+		if (fixup_exception(regs))
+			return 1;
+		/*
+		 * fixup_exception() could not handle it,
+		 * Let do_page_fault() fix it.
+		 */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+NOKPROBE_SYMBOL(kprobe_trap_handler);
+
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+	int ret;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+	ret = kprobe_trap_handler(regs, trapnr);
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+int kprobe_exceptions_notify(struct notifier_block *self,
+			     unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *) data;
+	struct pt_regs *regs = args->regs;
+	int ret = NOTIFY_DONE;
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_disable();
+
+	switch (val) {
+	case DIE_BPT:
+		if (kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (post_kprobe_handler(regs))
+			ret = NOTIFY_STOP;
+		break;
+	case DIE_TRAP:
+		if (!preemptible() && kprobe_running() &&
+		    kprobe_trap_handler(regs, args->trapnr))
+			ret = NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+
+	if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
+		local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
+
+	return ret;
+}
+NOKPROBE_SYMBOL(kprobe_exceptions_notify);
+
+int __init arch_init_kprobes(void)
+{
+	return 0;
+}
+
+int arch_trampoline_kprobe(struct kprobe *p)
+{
+	return 0;
+}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/kprobes.h b/arch/s390/kernel/kprobes.h
new file mode 100644
index 0000000000..dc3ed5098e
--- /dev/null
+++ b/arch/s390/kernel/kprobes.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#ifndef _ARCH_S390_KPROBES_H
+#define _ARCH_S390_KPROBES_H
+
+#include <linux/kprobes.h>
+
+DEFINE_INSN_CACHE_OPS(s390_insn);
+
+#endif
diff --git a/arch/s390/kernel/kprobes_insn_page.S b/arch/s390/kernel/kprobes_insn_page.S
new file mode 100644
index 0000000000..0fe4d725e9
--- /dev/null
+++ b/arch/s390/kernel/kprobes_insn_page.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+
+/*
+ * insn_page is a special 4k aligned dummy function for kprobes.
+ * It will contain all kprobed instructions that are out-of-line executed.
+ * The page must be within the kernel image to guarantee that the
+ * out-of-line instructions are within 2GB distance of their original
+ * location. Using a dummy function ensures that the insn_page is within
+ * the text section of the kernel and mapped read-only/executable from
+ * the beginning on, thus avoiding to split large mappings if the page
+ * would be in the data section instead.
+ */
+	.section .kprobes.text, "ax"
+	.balign 4096
+SYM_CODE_START(kprobes_insn_page)
+	.rept 2048
+	.word 0x07fe
+	.endr
+SYM_CODE_END(kprobes_insn_page)
+	.previous
diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c
new file mode 100644
index 0000000000..6652e54cf3
--- /dev/null
+++ b/arch/s390/kernel/lgr.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Guest Relocation (LGR) detection
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <asm/facility.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/ipl.h>
+
+#define LGR_TIMER_INTERVAL_SECS (30 * 60)
+#define VM_LEVEL_MAX 2 /* Maximum is 8, but we only record two levels */
+
+/*
+ * LGR info: Contains stfle and stsi data
+ */
+struct lgr_info {
+	/* Bit field with facility information: 4 DWORDs are stored */
+	u64 stfle_fac_list[4];
+	/* Level of system (1 = CEC, 2 = LPAR, 3 = z/VM */
+	u32 level;
+	/* Level 1: CEC info (stsi 1.1.1) */
+	char manufacturer[16];
+	char type[4];
+	char sequence[16];
+	char plant[4];
+	char model[16];
+	/* Level 2: LPAR info (stsi 2.2.2) */
+	u16 lpar_number;
+	char name[8];
+	/* Level 3: VM info (stsi 3.2.2) */
+	u8 vm_count;
+	struct {
+		char name[8];
+		char cpi[16];
+	} vm[VM_LEVEL_MAX];
+} __packed __aligned(8);
+
+/*
+ * LGR globals
+ */
+static char lgr_page[PAGE_SIZE] __aligned(PAGE_SIZE);
+static struct lgr_info lgr_info_last;
+static struct lgr_info lgr_info_cur;
+static struct debug_info *lgr_dbf;
+
+/*
+ * Copy buffer and then convert it to ASCII
+ */
+static void cpascii(char *dst, char *src, int size)
+{
+	memcpy(dst, src, size);
+	EBCASC(dst, size);
+}
+
+/*
+ * Fill LGR info with 1.1.1 stsi data
+ */
+static void lgr_stsi_1_1_1(struct lgr_info *lgr_info)
+{
+	struct sysinfo_1_1_1 *si = (void *) lgr_page;
+
+	if (stsi(si, 1, 1, 1))
+		return;
+	cpascii(lgr_info->manufacturer, si->manufacturer,
+		sizeof(si->manufacturer));
+	cpascii(lgr_info->type, si->type, sizeof(si->type));
+	cpascii(lgr_info->model, si->model, sizeof(si->model));
+	cpascii(lgr_info->sequence, si->sequence, sizeof(si->sequence));
+	cpascii(lgr_info->plant, si->plant, sizeof(si->plant));
+}
+
+/*
+ * Fill LGR info with 2.2.2 stsi data
+ */
+static void lgr_stsi_2_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_2_2_2 *si = (void *) lgr_page;
+
+	if (stsi(si, 2, 2, 2))
+		return;
+	cpascii(lgr_info->name, si->name, sizeof(si->name));
+	lgr_info->lpar_number = si->lpar_number;
+}
+
+/*
+ * Fill LGR info with 3.2.2 stsi data
+ */
+static void lgr_stsi_3_2_2(struct lgr_info *lgr_info)
+{
+	struct sysinfo_3_2_2 *si = (void *) lgr_page;
+	int i;
+
+	if (stsi(si, 3, 2, 2))
+		return;
+	for (i = 0; i < min_t(u8, si->count, VM_LEVEL_MAX); i++) {
+		cpascii(lgr_info->vm[i].name, si->vm[i].name,
+			sizeof(si->vm[i].name));
+		cpascii(lgr_info->vm[i].cpi, si->vm[i].cpi,
+			sizeof(si->vm[i].cpi));
+	}
+	lgr_info->vm_count = si->count;
+}
+
+/*
+ * Fill LGR info with current data
+ */
+static void lgr_info_get(struct lgr_info *lgr_info)
+{
+	int level;
+
+	memset(lgr_info, 0, sizeof(*lgr_info));
+	stfle(lgr_info->stfle_fac_list, ARRAY_SIZE(lgr_info->stfle_fac_list));
+	level = stsi(NULL, 0, 0, 0);
+	lgr_info->level = level;
+	if (level >= 1)
+		lgr_stsi_1_1_1(lgr_info);
+	if (level >= 2)
+		lgr_stsi_2_2_2(lgr_info);
+	if (level >= 3)
+		lgr_stsi_3_2_2(lgr_info);
+}
+
+/*
+ * Check if LGR info has changed and if yes log new LGR info to s390dbf
+ */
+void lgr_info_log(void)
+{
+	static DEFINE_SPINLOCK(lgr_info_lock);
+	unsigned long flags;
+
+	if (!spin_trylock_irqsave(&lgr_info_lock, flags))
+		return;
+	lgr_info_get(&lgr_info_cur);
+	if (memcmp(&lgr_info_last, &lgr_info_cur, sizeof(lgr_info_cur)) != 0) {
+		debug_event(lgr_dbf, 1, &lgr_info_cur, sizeof(lgr_info_cur));
+		lgr_info_last = lgr_info_cur;
+	}
+	spin_unlock_irqrestore(&lgr_info_lock, flags);
+}
+EXPORT_SYMBOL_GPL(lgr_info_log);
+
+static void lgr_timer_set(void);
+
+/*
+ * LGR timer callback
+ */
+static void lgr_timer_fn(struct timer_list *unused)
+{
+	lgr_info_log();
+	lgr_timer_set();
+}
+
+static struct timer_list lgr_timer;
+
+/*
+ * Setup next LGR timer
+ */
+static void lgr_timer_set(void)
+{
+	mod_timer(&lgr_timer, jiffies + msecs_to_jiffies(LGR_TIMER_INTERVAL_SECS * MSEC_PER_SEC));
+}
+
+/*
+ * Initialize LGR: Add s390dbf, write initial lgr_info and setup timer
+ */
+static int __init lgr_init(void)
+{
+	lgr_dbf = debug_register("lgr", 1, 1, sizeof(struct lgr_info));
+	if (!lgr_dbf)
+		return -ENOMEM;
+	debug_register_view(lgr_dbf, &debug_hex_ascii_view);
+	lgr_info_get(&lgr_info_last);
+	debug_event(lgr_dbf, 1, &lgr_info_last, sizeof(lgr_info_last));
+	timer_setup(&lgr_timer, lgr_timer_fn, TIMER_DEFERRABLE);
+	lgr_timer_set();
+	return 0;
+}
+device_initcall(lgr_init);
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
new file mode 100644
index 0000000000..ce65fc0167
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2005, 2011
+ *
+ * Author(s): Rolf Adelsberger,
+ *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/device.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/ftrace.h>
+#include <linux/debug_locks.h>
+#include <asm/pfault.h>
+#include <asm/cio.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/ipl.h>
+#include <asm/diag.h>
+#include <asm/elf.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/set_memory.h>
+#include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/nmi.h>
+#include <asm/sclp.h>
+
+typedef void (*relocate_kernel_t)(unsigned long, unsigned long, unsigned long);
+typedef int (*purgatory_t)(int);
+
+extern const unsigned char relocate_kernel[];
+extern const unsigned long long relocate_kernel_len;
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Reset the system, copy boot CPU registers to absolute zero,
+ * and jump to the kdump image
+ */
+static void __do_machine_kdump(void *data)
+{
+	struct kimage *image = data;
+	purgatory_t purgatory;
+	unsigned long prefix;
+
+	purgatory = (purgatory_t)image->start;
+
+	/* store_status() saved the prefix register to lowcore */
+	prefix = (unsigned long) S390_lowcore.prefixreg_save_area;
+
+	/* Now do the reset  */
+	s390_reset_system();
+
+	/*
+	 * Copy dump CPU store status info to absolute zero.
+	 * This need to be done *after* s390_reset_system set the
+	 * prefix register of this CPU to zero
+	 */
+	memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
+	       phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
+
+	call_nodat(1, int, purgatory, int, 1);
+
+	/* Die if kdump returns */
+	disabled_wait();
+}
+
+/*
+ * Start kdump: create a LGR log entry, store status of all CPUs and
+ * branch to __do_machine_kdump.
+ */
+static noinline void __machine_kdump(void *image)
+{
+	struct mcesa *mcesa;
+	union ctlreg2 cr2_old, cr2_new;
+	int this_cpu, cpu;
+
+	lgr_info_log();
+	/* Get status of the other CPUs */
+	this_cpu = smp_find_processor_id(stap());
+	for_each_online_cpu(cpu) {
+		if (cpu == this_cpu)
+			continue;
+		if (smp_store_status(cpu))
+			continue;
+	}
+	/* Store status of the boot CPU */
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	if (MACHINE_HAS_VX)
+		save_vx_regs((__vector128 *) mcesa->vector_save_area);
+	if (MACHINE_HAS_GS) {
+		__ctl_store(cr2_old.val, 2, 2);
+		cr2_new = cr2_old;
+		cr2_new.gse = 1;
+		__ctl_load(cr2_new.val, 2, 2);
+		save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+		__ctl_load(cr2_old.val, 2, 2);
+	}
+	/*
+	 * To create a good backchain for this CPU in the dump store_status
+	 * is passed the address of a function. The address is saved into
+	 * the PSW save area of the boot CPU and the function is invoked as
+	 * a tail call of store_status. The backchain in the dump will look
+	 * like this:
+	 *   restart_int_handler ->  __machine_kexec -> __do_machine_kdump
+	 * The call to store_status() will not return.
+	 */
+	store_status(__do_machine_kdump, image);
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static bool kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+	purgatory_t purgatory = (purgatory_t)image->start;
+	int rc;
+
+	rc = call_nodat(1, int, purgatory, int, 0);
+	return rc == 0;
+#else
+	return false;
+#endif
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
+{
+	unsigned long addr, size;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE)
+		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+	size = begin - crashk_res.start;
+	if (size)
+		os_info_crashkernel_add(crashk_res.start, size);
+	else
+		os_info_crashkernel_add(0, 0);
+}
+
+static void crash_protect_pages(int protect)
+{
+	unsigned long size;
+
+	if (!crashk_res.end)
+		return;
+	size = resource_size(&crashk_res);
+	if (protect)
+		set_memory_ro(crashk_res.start, size >> PAGE_SHIFT);
+	else
+		set_memory_rw(crashk_res.start, size >> PAGE_SHIFT);
+}
+
+void arch_kexec_protect_crashkres(void)
+{
+	crash_protect_pages(1);
+}
+
+void arch_kexec_unprotect_crashkres(void)
+{
+	crash_protect_pages(0);
+}
+
+#endif
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	if (MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crashk_res.start),
+			     PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *reboot_code_buffer;
+
+	if (image->type == KEXEC_TYPE_CRASH)
+		return machine_kexec_prepare_kdump();
+
+	/* We don't support anything but the default image type for now. */
+	if (image->type != KEXEC_TYPE_DEFAULT)
+		return -EINVAL;
+
+	/* Get the destination where the assembler code should be copied to.*/
+	reboot_code_buffer = page_to_virt(image->control_code_page);
+
+	/* Then copy it */
+	memcpy(reboot_code_buffer, relocate_kernel, relocate_kernel_len);
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	struct lowcore *abs_lc;
+
+	VMCOREINFO_SYMBOL(lowcore_ptr);
+	VMCOREINFO_SYMBOL(high_memory);
+	VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
+	vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+	vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
+	vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+	abs_lc = get_abs_lowcore();
+	abs_lc->vmcore_info = paddr_vmcoreinfo_note();
+	put_abs_lowcore(abs_lc);
+}
+
+void machine_shutdown(void)
+{
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	set_os_info_reipl_block();
+}
+
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
+{
+	unsigned long data_mover, entry, diag308_subcode;
+	struct kimage *image = data;
+
+	data_mover = page_to_phys(image->control_code_page);
+	entry = virt_to_phys(&image->head);
+	diag308_subcode = DIAG308_CLEAR_RESET;
+	if (sclp.has_iplcc)
+		diag308_subcode |= DIAG308_FLAG_EI;
+	s390_reset_system();
+
+	call_nodat(3, void, (relocate_kernel_t)data_mover,
+		   unsigned long, entry,
+		   unsigned long, image->start,
+		   unsigned long, diag308_subcode);
+
+	/* Die if kexec returns */
+	disabled_wait();
+}
+
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+	pfault_fini();
+	tracing_off();
+	debug_locks_off();
+#ifdef CONFIG_CRASH_DUMP
+	if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH)
+		__machine_kdump(data);
+#endif
+	__do_machine_kexec(data);
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
+void machine_kexec(struct kimage *image)
+{
+	if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+		return;
+	tracer_disable();
+	smp_send_stop();
+	smp_call_ipl_cpu(__machine_kexec, image);
+}
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
new file mode 100644
index 0000000000..8d207b82d9
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * s390 code for kexec_file_load system call
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Philipp Rudo <prudo@linux.vnet.ibm.com>
+ */
+
+#define pr_fmt(fmt)	"kexec: " fmt
+
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/kexec.h>
+#include <linux/module_signature.h>
+#include <linux/verification.h>
+#include <linux/vmalloc.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&s390_kexec_elf_ops,
+	&s390_kexec_image_ops,
+	NULL,
+};
+
+#ifdef CONFIG_KEXEC_SIG
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+	const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+	struct module_signature *ms;
+	unsigned long sig_len;
+	int ret;
+
+	/* Skip signature verification when not secure IPLed. */
+	if (!ipl_secure_flag)
+		return 0;
+
+	if (marker_len > kernel_len)
+		return -EKEYREJECTED;
+
+	if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+		   marker_len))
+		return -EKEYREJECTED;
+	kernel_len -= marker_len;
+
+	ms = (void *)kernel + kernel_len - sizeof(*ms);
+	kernel_len -= sizeof(*ms);
+
+	sig_len = be32_to_cpu(ms->sig_len);
+	if (sig_len >= kernel_len)
+		return -EKEYREJECTED;
+	kernel_len -= sig_len;
+
+	if (ms->id_type != PKEY_ID_PKCS7)
+		return -EKEYREJECTED;
+
+	if (ms->algo != 0 ||
+	    ms->hash != 0 ||
+	    ms->signer_len != 0 ||
+	    ms->key_id_len != 0 ||
+	    ms->__pad[0] != 0 ||
+	    ms->__pad[1] != 0 ||
+	    ms->__pad[2] != 0) {
+		return -EBADMSG;
+	}
+
+	ret = verify_pkcs7_signature(kernel, kernel_len,
+				     kernel + kernel_len, sig_len,
+				     VERIFY_USE_SECONDARY_KEYRING,
+				     VERIFYING_MODULE_SIGNATURE,
+				     NULL, NULL);
+	if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
+		ret = verify_pkcs7_signature(kernel, kernel_len,
+					     kernel + kernel_len, sig_len,
+					     VERIFY_USE_PLATFORM_KEYRING,
+					     VERIFYING_MODULE_SIGNATURE,
+					     NULL, NULL);
+	return ret;
+}
+#endif /* CONFIG_KEXEC_SIG */
+
+static int kexec_file_update_purgatory(struct kimage *image,
+				       struct s390_load_data *data)
+{
+	u64 entry, type;
+	int ret;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		entry = STARTUP_KDUMP_OFFSET;
+		type = KEXEC_TYPE_CRASH;
+	} else {
+		entry = STARTUP_NORMAL_OFFSET;
+		type = KEXEC_TYPE_DEFAULT;
+	}
+
+	ret = kexec_purgatory_get_set_symbol(image, "kernel_entry", &entry,
+					     sizeof(entry), false);
+	if (ret)
+		return ret;
+
+	ret = kexec_purgatory_get_set_symbol(image, "kernel_type", &type,
+					     sizeof(type), false);
+	if (ret)
+		return ret;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		u64 crash_size;
+
+		ret = kexec_purgatory_get_set_symbol(image, "crash_start",
+						     &crashk_res.start,
+						     sizeof(crashk_res.start),
+						     false);
+		if (ret)
+			return ret;
+
+		crash_size = crashk_res.end - crashk_res.start + 1;
+		ret = kexec_purgatory_get_set_symbol(image, "crash_size",
+						     &crash_size,
+						     sizeof(crash_size),
+						     false);
+	}
+	return ret;
+}
+
+static int kexec_file_add_purgatory(struct kimage *image,
+				    struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	int ret;
+
+	buf.image = image;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+
+	ret = kexec_load_purgatory(image, &buf);
+	if (ret)
+		return ret;
+	data->memsz += buf.memsz;
+
+	return kexec_file_update_purgatory(image, data);
+}
+
+static int kexec_file_add_initrd(struct kimage *image,
+				 struct s390_load_data *data)
+{
+	struct kexec_buf buf;
+	int ret;
+
+	buf.image = image;
+
+	buf.buffer = image->initrd_buf;
+	buf.bufsz = image->initrd_buf_len;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+	buf.memsz = buf.bufsz;
+
+	data->parm->initrd_start = data->memsz;
+	data->parm->initrd_size = buf.memsz;
+	data->memsz += buf.memsz;
+
+	ret = kexec_add_buffer(&buf);
+	if (ret)
+		return ret;
+
+	return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+				     struct s390_load_data *data)
+{
+	__u32 *lc_ipl_parmblock_ptr;
+	unsigned int len, ncerts;
+	struct kexec_buf buf;
+	unsigned long addr;
+	void *ptr, *end;
+	int ret;
+
+	buf.image = image;
+
+	data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+	buf.mem = data->memsz;
+
+	ptr = __va(ipl_cert_list_addr);
+	end = ptr + ipl_cert_list_size;
+	ncerts = 0;
+	while (ptr < end) {
+		ncerts++;
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ptr += len;
+	}
+
+	addr = data->memsz + data->report->size;
+	addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+	ptr = __va(ipl_cert_list_addr);
+	while (ptr < end) {
+		len = *(unsigned int *)ptr;
+		ptr += sizeof(len);
+		ipl_report_add_certificate(data->report, ptr, addr, len);
+		addr += len;
+		ptr += len;
+	}
+
+	ret = -ENOMEM;
+	buf.buffer = ipl_report_finish(data->report);
+	if (!buf.buffer)
+		goto out;
+	buf.bufsz = data->report->size;
+	buf.memsz = buf.bufsz;
+	image->arch.ipl_buf = buf.buffer;
+
+	data->memsz += buf.memsz;
+
+	lc_ipl_parmblock_ptr =
+		data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+	*lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+	if (image->type == KEXEC_TYPE_CRASH)
+		buf.mem += crashk_res.start;
+
+	ret = kexec_add_buffer(&buf);
+out:
+	return ret;
+}
+
+void *kexec_file_add_components(struct kimage *image,
+				int (*add_kernel)(struct kimage *image,
+						  struct s390_load_data *data))
+{
+	unsigned long max_command_line_size = LEGACY_COMMAND_LINE_SIZE;
+	struct s390_load_data data = {0};
+	unsigned long minsize;
+	int ret;
+
+	data.report = ipl_report_init(&ipl_block);
+	if (IS_ERR(data.report))
+		return data.report;
+
+	ret = add_kernel(image, &data);
+	if (ret)
+		goto out;
+
+	ret = -EINVAL;
+	minsize = PARMAREA + offsetof(struct parmarea, command_line);
+	if (image->kernel_buf_len < minsize)
+		goto out;
+
+	if (data.parm->max_command_line_size)
+		max_command_line_size = data.parm->max_command_line_size;
+
+	if (minsize + max_command_line_size < minsize)
+		goto out;
+
+	if (image->kernel_buf_len < minsize + max_command_line_size)
+		goto out;
+
+	if (image->cmdline_buf_len >= max_command_line_size)
+		goto out;
+
+	memcpy(data.parm->command_line, image->cmdline_buf,
+	       image->cmdline_buf_len);
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		data.parm->oldmem_base = crashk_res.start;
+		data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+	}
+
+	if (image->initrd_buf) {
+		ret = kexec_file_add_initrd(image, &data);
+		if (ret)
+			goto out;
+	}
+
+	ret = kexec_file_add_purgatory(image, &data);
+	if (ret)
+		goto out;
+
+	if (data.kernel_mem == 0) {
+		unsigned long restart_psw =  0x0008000080000000UL;
+		restart_psw += image->start;
+		memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+		image->start = 0;
+	}
+
+	ret = kexec_file_add_ipl_report(image, &data);
+out:
+	ipl_report_free(data.report);
+	return ERR_PTR(ret);
+}
+
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section,
+				     const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtab)
+{
+	const char *strtab, *name, *shstrtab;
+	const Elf_Shdr *sechdrs;
+	Elf_Rela *relas;
+	int i, r_type;
+	int ret;
+
+	/* String & section header string table */
+	sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff;
+	strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset;
+	shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset;
+
+	relas = (void *)pi->ehdr + relsec->sh_offset;
+
+	for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) {
+		const Elf_Sym *sym;	/* symbol to relocate */
+		unsigned long addr;	/* final location after relocation */
+		unsigned long val;	/* relocated symbol value */
+		void *loc;		/* tmp location to modify */
+
+		sym = (void *)pi->ehdr + symtab->sh_offset;
+		sym += ELF64_R_SYM(relas[i].r_info);
+
+		if (sym->st_name)
+			name = strtab + sym->st_name;
+		else
+			name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+		if (sym->st_shndx == SHN_UNDEF) {
+			pr_err("Undefined symbol: %s\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx == SHN_COMMON) {
+			pr_err("symbol '%s' in common section\n", name);
+			return -ENOEXEC;
+		}
+
+		if (sym->st_shndx >= pi->ehdr->e_shnum &&
+		    sym->st_shndx != SHN_ABS) {
+			pr_err("Invalid section %d for symbol %s\n",
+			       sym->st_shndx, name);
+			return -ENOEXEC;
+		}
+
+		loc = pi->purgatory_buf;
+		loc += section->sh_offset;
+		loc += relas[i].r_offset;
+
+		val = sym->st_value;
+		if (sym->st_shndx != SHN_ABS)
+			val += pi->sechdrs[sym->st_shndx].sh_addr;
+		val += relas[i].r_addend;
+
+		addr = section->sh_addr + relas[i].r_offset;
+
+		r_type = ELF64_R_TYPE(relas[i].r_info);
+
+		if (r_type == R_390_PLT32DBL)
+			r_type = R_390_PC32DBL;
+
+		ret = arch_kexec_do_relocs(r_type, loc, val, addr);
+		if (ret) {
+			pr_err("Unknown rela relocation: %d\n", r_type);
+			return -ENOEXEC;
+		}
+	}
+	return 0;
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	vfree(image->arch.ipl_buf);
+	image->arch.ipl_buf = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 0000000000..b7182cec48
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+#include <asm/kexec.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+			 unsigned long addr)
+{
+	switch (r_type) {
+	case R_390_NONE:
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+		*(u8 *)loc = val;
+		break;
+	case R_390_12:		/* Direct 12 bit.  */
+		*(u16 *)loc &= 0xf000;
+		*(u16 *)loc |= val & 0xfff;
+		break;
+	case R_390_16:		/* Direct 16 bit.  */
+		*(u16 *)loc = val;
+		break;
+	case R_390_20:		/* Direct 20 bit.  */
+		*(u32 *)loc &= 0xf00000ff;
+		*(u32 *)loc |= (val & 0xfff) << 16;	/* DL */
+		*(u32 *)loc |= (val & 0xff000) >> 4;	/* DH */
+		break;
+	case R_390_32:		/* Direct 32 bit.  */
+		*(u32 *)loc = val;
+		break;
+	case R_390_64:		/* Direct 64 bit.  */
+	case R_390_GLOB_DAT:
+	case R_390_JMP_SLOT:
+		*(u64 *)loc = val;
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.	*/
+		*(u16 *)loc = (val - addr);
+		break;
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+		*(u16 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+		*(u32 *)loc = (val - addr) >> 1;
+		break;
+	case R_390_PC32:	/* PC relative 32 bit.	*/
+		*(u32 *)loc = (val - addr);
+		break;
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		*(u64 *)loc = (val - addr);
+		break;
+	case R_390_RELATIVE:
+		*(unsigned long *) loc = val;
+		break;
+	default:
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
new file mode 100644
index 0000000000..ae4d4fd9af
--- /dev/null
+++ b/arch/s390/kernel/mcount.S
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2008, 2009
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
+#include <asm/ptrace.h>
+
+#define STACK_FRAME_SIZE_PTREGS		(STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS		(STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW		(STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS			(STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS		(STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS		(STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW		(STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2	(STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS	(STACK_FREGS_PTREGS + __PT_FLAGS)
+
+/* packed stack: allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE	24
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+	GEN_BR_THUNK %r1
+	GEN_BR_THUNK %r14
+
+	.section .kprobes.text, "ax"
+
+SYM_FUNC_START(ftrace_stub)
+	BR_EX	%r14
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_stub_direct_tramp)
+	lgr	%r1, %r0
+	BR_EX	%r1
+SYM_CODE_END(ftrace_stub_direct_tramp)
+
+	.macro	ftrace_regs_entry, allregs=0
+	stg	%r14,(__SF_GPRS+8*8)(%r15)	# save traced function caller
+
+	.if \allregs == 1
+	# save psw mask
+	# don't put any instructions clobbering CC before this point
+	epsw	%r1,%r14
+	risbg	%r14,%r1,0,31,32
+	.endif
+
+	lgr	%r1,%r15
+	# allocate stack frame for ftrace_caller to contain traced function
+	aghi	%r15,-TRACED_FUNC_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r0,(__SF_GPRS+8*8)(%r15)
+	stg	%r15,(__SF_GPRS+9*8)(%r15)
+	# allocate ftrace_regs and stack frame for ftrace_trace_function
+	aghi	%r15,-STACK_FRAME_SIZE_FREGS
+	stg	%r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+	xc	STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+
+	.if \allregs == 1
+	stg	%r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+	mvghi	STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+	.else
+	xc	STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
+	.endif
+
+	lg	%r14,(__SF_GPRS+8*8)(%r1)	# restore original return address
+	aghi	%r1,-TRACED_FUNC_FRAME_SIZE
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	stg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	stmg	%r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+	.endm
+
+SYM_CODE_START(ftrace_regs_caller)
+	ftrace_regs_entry	1
+	j	ftrace_common
+SYM_CODE_END(ftrace_regs_caller)
+
+SYM_CODE_START(ftrace_caller)
+	ftrace_regs_entry	0
+	j	ftrace_common
+SYM_CODE_END(ftrace_caller)
+
+SYM_CODE_START(ftrace_common)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	aghik	%r2,%r0,-MCOUNT_INSN_SIZE
+	lgrl	%r4,function_trace_op
+	lgrl	%r1,ftrace_func
+#else
+	lgr	%r2,%r0
+	aghi	%r2,-MCOUNT_INSN_SIZE
+	larl	%r4,function_trace_op
+	lg	%r4,0(%r4)
+	larl	%r1,ftrace_func
+	lg	%r1,0(%r1)
+#endif
+	lgr	%r3,%r14
+	la	%r5,STACK_FREGS(%r15)
+	BASR_EX	%r14,%r1
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
+SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
+	j	.Lftrace_graph_caller_end
+	lmg	%r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+	lg	%r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+	brasl	%r14,prepare_ftrace_return
+	stg	%r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+.Lftrace_graph_caller_end:
+#endif
+	lg	%r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+	ltg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+	locgrz	%r1,%r0
+#else
+	lg	%r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
+	ltgr	%r1,%r1
+	jnz	0f
+	lgr	%r1,%r0
+#endif
+0:	lmg	%r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
+	BR_EX	%r1
+SYM_CODE_END(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+SYM_FUNC_START(return_to_handler)
+	stmg	%r2,%r5,32(%r15)
+	lgr	%r1,%r15
+	aghi	%r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
+	stg	%r1,__SF_BACKCHAIN(%r15)
+	la	%r3,STACK_FRAME_OVERHEAD(%r15)
+	stg	%r1,__FGRAPH_RET_FP(%r3)
+	stg	%r2,__FGRAPH_RET_GPR2(%r3)
+	lgr	%r2,%r3
+	brasl	%r14,ftrace_return_to_handler
+	aghi	%r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
+	lgr	%r14,%r2
+	lmg	%r2,%r5,32(%r15)
+	BR_EX	%r14
+SYM_FUNC_END(return_to_handler)
+
+#endif
+#endif /* CONFIG_FUNCTION_TRACER */
+
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_br)
+	lmg	%r0,%r1,2(%r1)
+	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_br_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_br)
+
+#ifdef CONFIG_EXPOLINE
+SYM_CODE_START(ftrace_shared_hotpatch_trampoline_exrl)
+	lmg	%r0,%r1,2(%r1)
+	exrl	%r0,0f
+	j	.
+0:	br	%r1
+SYM_INNER_LABEL(ftrace_shared_hotpatch_trampoline_exrl_end, SYM_L_GLOBAL)
+SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
+#endif /* CONFIG_EXPOLINE */
+
+#ifdef CONFIG_RETHOOK
+
+SYM_CODE_START(arch_rethook_trampoline)
+	stg	%r14,(__SF_GPRS+8*8)(%r15)
+	lay	%r15,-STACK_FRAME_SIZE_PTREGS(%r15)
+	stmg	%r0,%r14,STACK_PTREGS_GPRS(%r15)
+
+	# store original stack pointer in backchain and pt_regs
+	lay	%r7,STACK_FRAME_SIZE_PTREGS(%r15)
+	stg	%r7,__SF_BACKCHAIN(%r15)
+	stg	%r7,STACK_PTREGS_GPRS+(15*8)(%r15)
+
+	# store full psw
+	epsw	%r2,%r3
+	risbg	%r3,%r2,0,31,32
+	stg	%r3,STACK_PTREGS_PSW(%r15)
+	larl	%r1,arch_rethook_trampoline
+	stg	%r1,STACK_PTREGS_PSW+8(%r15)
+
+	lay	%r2,STACK_PTREGS(%r15)
+	brasl	%r14,arch_rethook_trampoline_callback
+
+	mvc	__SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15)
+	lmg	%r0,%r15,STACK_PTREGS_GPRS(%r15)
+	lpswe	__SF_EMPTY(%r15)
+SYM_CODE_END(arch_rethook_trampoline)
+
+#endif /* CONFIG_RETHOOK */
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
new file mode 100644
index 0000000000..42215f9404
--- /dev/null
+++ b/arch/s390/kernel/module.c
@@ -0,0 +1,576 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ *  Kernel module help for s390.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 2002, 2003
+ *    Author(s): Arnd Bergmann (arndb@de.ibm.com)
+ *		 Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  based on i386 version
+ *    Copyright (C) 2001 Rusty Russell.
+ */
+#include <linux/module.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/ftrace.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/kasan.h>
+#include <linux/moduleloader.h>
+#include <linux/bug.h>
+#include <linux/memory.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/facility.h>
+#include <asm/ftrace.lds.h>
+#include <asm/set_memory.h>
+#include <asm/setup.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt , ...)
+#endif
+
+#define PLT_ENTRY_SIZE 22
+
+static unsigned long get_module_load_offset(void)
+{
+	static DEFINE_MUTEX(module_kaslr_mutex);
+	static unsigned long module_load_offset;
+
+	if (!kaslr_enabled())
+		return 0;
+	/*
+	 * Calculate the module_load_offset the first time this code
+	 * is called. Once calculated it stays the same until reboot.
+	 */
+	mutex_lock(&module_kaslr_mutex);
+	if (!module_load_offset)
+		module_load_offset = get_random_u32_inclusive(1, 1024) * PAGE_SIZE;
+	mutex_unlock(&module_kaslr_mutex);
+	return module_load_offset;
+}
+
+void *module_alloc(unsigned long size)
+{
+	gfp_t gfp_mask = GFP_KERNEL;
+	void *p;
+
+	if (PAGE_ALIGN(size) > MODULES_LEN)
+		return NULL;
+	p = __vmalloc_node_range(size, MODULE_ALIGN,
+				 MODULES_VADDR + get_module_load_offset(),
+				 MODULES_END, gfp_mask, PAGE_KERNEL,
+				 VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
+				 NUMA_NO_NODE, __builtin_return_address(0));
+	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
+		vfree(p);
+		return NULL;
+	}
+	return p;
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+void module_arch_cleanup(struct module *mod)
+{
+	module_memfree(mod->arch.trampolines_start);
+}
+#endif
+
+void module_arch_freeing_init(struct module *mod)
+{
+	if (is_livepatch_module(mod) &&
+	    mod->state == MODULE_STATE_LIVE)
+		return;
+
+	vfree(mod->arch.syminfo);
+	mod->arch.syminfo = NULL;
+}
+
+static void check_rela(Elf_Rela *rela, struct module *me)
+{
+	struct mod_arch_syminfo *info;
+
+	info = me->arch.syminfo + ELF_R_SYM (rela->r_info);
+	switch (ELF_R_TYPE (rela->r_info)) {
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_offset == -1UL) {
+			info->got_offset = me->arch.got_size;
+			me->arch.got_size += sizeof(void*);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_offset == -1UL) {
+			info->plt_offset = me->arch.plt_size;
+			me->arch.plt_size += PLT_ENTRY_SIZE;
+		}
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:
+	case R_390_JMP_SLOT:
+	case R_390_RELATIVE:
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		break;
+	}
+}
+
+/*
+ * Account for GOT and PLT relocations. We can't add sections for
+ * got and plt but we can increase the core module size.
+ */
+int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+			      char *secstrings, struct module *me)
+{
+	Elf_Shdr *symtab;
+	Elf_Sym *symbols;
+	Elf_Rela *rela;
+	char *strings;
+	int nrela, i, j;
+	struct module_memory *mod_mem;
+
+	/* Find symbol table and string table. */
+	symtab = NULL;
+	for (i = 0; i < hdr->e_shnum; i++)
+		switch (sechdrs[i].sh_type) {
+		case SHT_SYMTAB:
+			symtab = sechdrs + i;
+			break;
+		}
+	if (!symtab) {
+		printk(KERN_ERR "module %s: no symbol table\n", me->name);
+		return -ENOEXEC;
+	}
+
+	/* Allocate one syminfo structure per symbol. */
+	me->arch.nsyms = symtab->sh_size / sizeof(Elf_Sym);
+	me->arch.syminfo = vmalloc(array_size(sizeof(struct mod_arch_syminfo),
+					      me->arch.nsyms));
+	if (!me->arch.syminfo)
+		return -ENOMEM;
+	symbols = (void *) hdr + symtab->sh_offset;
+	strings = (void *) hdr + sechdrs[symtab->sh_link].sh_offset;
+	for (i = 0; i < me->arch.nsyms; i++) {
+		if (symbols[i].st_shndx == SHN_UNDEF &&
+		    strcmp(strings + symbols[i].st_name,
+			   "_GLOBAL_OFFSET_TABLE_") == 0)
+			/* "Define" it as absolute. */
+			symbols[i].st_shndx = SHN_ABS;
+		me->arch.syminfo[i].got_offset = -1UL;
+		me->arch.syminfo[i].plt_offset = -1UL;
+		me->arch.syminfo[i].got_initialized = 0;
+		me->arch.syminfo[i].plt_initialized = 0;
+	}
+
+	/* Search for got/plt relocations. */
+	me->arch.got_size = me->arch.plt_size = 0;
+	for (i = 0; i < hdr->e_shnum; i++) {
+		if (sechdrs[i].sh_type != SHT_RELA)
+			continue;
+		nrela = sechdrs[i].sh_size / sizeof(Elf_Rela);
+		rela = (void *) hdr + sechdrs[i].sh_offset;
+		for (j = 0; j < nrela; j++)
+			check_rela(rela + j, me);
+	}
+
+	/* Increase core size by size of got & plt and set start
+	   offsets for got and plt. */
+	mod_mem = &me->mem[MOD_TEXT];
+	mod_mem->size = ALIGN(mod_mem->size, 4);
+	me->arch.got_offset = mod_mem->size;
+	mod_mem->size += me->arch.got_size;
+	me->arch.plt_offset = mod_mem->size;
+	if (me->arch.plt_size) {
+		if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable)
+			me->arch.plt_size += PLT_ENTRY_SIZE;
+		mod_mem->size += me->arch.plt_size;
+	}
+	return 0;
+}
+
+static int apply_rela_bits(Elf_Addr loc, Elf_Addr val,
+			   int sign, int bits, int shift,
+			   void *(*write)(void *dest, const void *src, size_t len))
+{
+	unsigned long umax;
+	long min, max;
+	void *dest = (void *)loc;
+
+	if (val & ((1UL << shift) - 1))
+		return -ENOEXEC;
+	if (sign) {
+		val = (Elf_Addr)(((long) val) >> shift);
+		min = -(1L << (bits - 1));
+		max = (1L << (bits - 1)) - 1;
+		if ((long) val < min || (long) val > max)
+			return -ENOEXEC;
+	} else {
+		val >>= shift;
+		umax = ((1UL << (bits - 1)) << 1) - 1;
+		if ((unsigned long) val > umax)
+			return -ENOEXEC;
+	}
+
+	if (bits == 8) {
+		unsigned char tmp = val;
+		write(dest, &tmp, 1);
+	} else if (bits == 12) {
+		unsigned short tmp = (val & 0xfff) |
+			(*(unsigned short *) loc & 0xf000);
+		write(dest, &tmp, 2);
+	} else if (bits == 16) {
+		unsigned short tmp = val;
+		write(dest, &tmp, 2);
+	} else if (bits == 20) {
+		unsigned int tmp = (val & 0xfff) << 16 |
+			(val & 0xff000) >> 4 | (*(unsigned int *) loc & 0xf00000ff);
+		write(dest, &tmp, 4);
+	} else if (bits == 32) {
+		unsigned int tmp = val;
+		write(dest, &tmp, 4);
+	} else if (bits == 64) {
+		unsigned long tmp = val;
+		write(dest, &tmp, 8);
+	}
+	return 0;
+}
+
+static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
+		      const char *strtab, struct module *me,
+		      void *(*write)(void *dest, const void *src, size_t len))
+{
+	struct mod_arch_syminfo *info;
+	Elf_Addr loc, val;
+	int r_type, r_sym;
+	int rc = -ENOEXEC;
+
+	/* This is where to make the change */
+	loc = base + rela->r_offset;
+	/* This is the symbol it is referring to.  Note that all
+	   undefined symbols have been resolved.  */
+	r_sym = ELF_R_SYM(rela->r_info);
+	r_type = ELF_R_TYPE(rela->r_info);
+	info = me->arch.syminfo + r_sym;
+	val = symtab[r_sym].st_value;
+
+	switch (r_type) {
+	case R_390_NONE:	/* No relocation.  */
+		rc = 0;
+		break;
+	case R_390_8:		/* Direct 8 bit.   */
+	case R_390_12:		/* Direct 12 bit.  */
+	case R_390_16:		/* Direct 16 bit.  */
+	case R_390_20:		/* Direct 20 bit.  */
+	case R_390_32:		/* Direct 32 bit.  */
+	case R_390_64:		/* Direct 64 bit.  */
+		val += rela->r_addend;
+		if (r_type == R_390_8)
+			rc = apply_rela_bits(loc, val, 0, 8, 0, write);
+		else if (r_type == R_390_12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
+		else if (r_type == R_390_16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
+		else if (r_type == R_390_32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_PC16:	/* PC relative 16 bit.  */
+	case R_390_PC16DBL:	/* PC relative 16 bit shifted by 1.  */
+	case R_390_PC32DBL:	/* PC relative 32 bit shifted by 1.  */
+	case R_390_PC32:	/* PC relative 32 bit.  */
+	case R_390_PC64:	/* PC relative 64 bit.	*/
+		val += rela->r_addend - loc;
+		if (r_type == R_390_PC16)
+			rc = apply_rela_bits(loc, val, 1, 16, 0, write);
+		else if (r_type == R_390_PC16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
+		else if (r_type == R_390_PC32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		else if (r_type == R_390_PC32)
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
+		else if (r_type == R_390_PC64)
+			rc = apply_rela_bits(loc, val, 1, 64, 0, write);
+		break;
+	case R_390_GOT12:	/* 12 bit GOT offset.  */
+	case R_390_GOT16:	/* 16 bit GOT offset.  */
+	case R_390_GOT20:	/* 20 bit GOT offset.  */
+	case R_390_GOT32:	/* 32 bit GOT offset.  */
+	case R_390_GOT64:	/* 64 bit GOT offset.  */
+	case R_390_GOTENT:	/* 32 bit PC rel. to GOT entry shifted by 1. */
+	case R_390_GOTPLT12:	/* 12 bit offset to jump slot.	*/
+	case R_390_GOTPLT20:	/* 20 bit offset to jump slot.  */
+	case R_390_GOTPLT16:	/* 16 bit offset to jump slot.  */
+	case R_390_GOTPLT32:	/* 32 bit offset to jump slot.  */
+	case R_390_GOTPLT64:	/* 64 bit offset to jump slot.	*/
+	case R_390_GOTPLTENT:	/* 32 bit rel. offset to jump slot >> 1. */
+		if (info->got_initialized == 0) {
+			Elf_Addr *gotent = me->mem[MOD_TEXT].base +
+					   me->arch.got_offset +
+					   info->got_offset;
+
+			write(gotent, &val, sizeof(*gotent));
+			info->got_initialized = 1;
+		}
+		val = info->got_offset + rela->r_addend;
+		if (r_type == R_390_GOT12 ||
+		    r_type == R_390_GOTPLT12)
+			rc = apply_rela_bits(loc, val, 0, 12, 0, write);
+		else if (r_type == R_390_GOT16 ||
+			 r_type == R_390_GOTPLT16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_GOT20 ||
+			 r_type == R_390_GOTPLT20)
+			rc = apply_rela_bits(loc, val, 1, 20, 0, write);
+		else if (r_type == R_390_GOT32 ||
+			 r_type == R_390_GOTPLT32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_GOT64 ||
+			 r_type == R_390_GOTPLT64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		else if (r_type == R_390_GOTENT ||
+			 r_type == R_390_GOTPLTENT) {
+			val += (Elf_Addr)me->mem[MOD_TEXT].base +
+				me->arch.got_offset - loc;
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		}
+		break;
+	case R_390_PLT16DBL:	/* 16 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32DBL:	/* 32 bit PC rel. PLT shifted by 1.  */
+	case R_390_PLT32:	/* 32 bit PC relative PLT address.  */
+	case R_390_PLT64:	/* 64 bit PC relative PLT address.  */
+	case R_390_PLTOFF16:	/* 16 bit offset from GOT to PLT. */
+	case R_390_PLTOFF32:	/* 32 bit offset from GOT to PLT. */
+	case R_390_PLTOFF64:	/* 16 bit offset from GOT to PLT. */
+		if (info->plt_initialized == 0) {
+			unsigned char insn[PLT_ENTRY_SIZE];
+			char *plt_base;
+			char *ip;
+
+			plt_base = me->mem[MOD_TEXT].base + me->arch.plt_offset;
+			ip = plt_base + info->plt_offset;
+			*(int *)insn = 0x0d10e310;	/* basr 1,0  */
+			*(int *)&insn[4] = 0x100c0004;	/* lg	1,12(1) */
+			if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) {
+				char *jump_r1;
+
+				jump_r1 = plt_base + me->arch.plt_size -
+					PLT_ENTRY_SIZE;
+				/* brcl	0xf,__jump_r1 */
+				*(short *)&insn[8] = 0xc0f4;
+				*(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2;
+			} else {
+				*(int *)&insn[8] = 0x07f10000;	/* br %r1 */
+			}
+			*(long *)&insn[14] = val;
+
+			write(ip, insn, sizeof(insn));
+			info->plt_initialized = 1;
+		}
+		if (r_type == R_390_PLTOFF16 ||
+		    r_type == R_390_PLTOFF32 ||
+		    r_type == R_390_PLTOFF64)
+			val = me->arch.plt_offset - me->arch.got_offset +
+				info->plt_offset + rela->r_addend;
+		else {
+			if (!((r_type == R_390_PLT16DBL &&
+			       val - loc + 0xffffUL < 0x1ffffeUL) ||
+			      (r_type == R_390_PLT32DBL &&
+			       val - loc + 0xffffffffULL < 0x1fffffffeULL)))
+				val = (Elf_Addr) me->mem[MOD_TEXT].base +
+					me->arch.plt_offset +
+					info->plt_offset;
+			val += rela->r_addend - loc;
+		}
+		if (r_type == R_390_PLT16DBL)
+			rc = apply_rela_bits(loc, val, 1, 16, 1, write);
+		else if (r_type == R_390_PLTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_PLT32DBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		else if (r_type == R_390_PLT32 ||
+			 r_type == R_390_PLTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_PLT64 ||
+			 r_type == R_390_PLTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_GOTOFF16:	/* 16 bit offset to GOT.  */
+	case R_390_GOTOFF32:	/* 32 bit offset to GOT.  */
+	case R_390_GOTOFF64:	/* 64 bit offset to GOT. */
+		val = val + rela->r_addend -
+			((Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset);
+		if (r_type == R_390_GOTOFF16)
+			rc = apply_rela_bits(loc, val, 0, 16, 0, write);
+		else if (r_type == R_390_GOTOFF32)
+			rc = apply_rela_bits(loc, val, 0, 32, 0, write);
+		else if (r_type == R_390_GOTOFF64)
+			rc = apply_rela_bits(loc, val, 0, 64, 0, write);
+		break;
+	case R_390_GOTPC:	/* 32 bit PC relative offset to GOT. */
+	case R_390_GOTPCDBL:	/* 32 bit PC rel. off. to GOT shifted by 1. */
+		val = (Elf_Addr) me->mem[MOD_TEXT].base + me->arch.got_offset +
+			rela->r_addend - loc;
+		if (r_type == R_390_GOTPC)
+			rc = apply_rela_bits(loc, val, 1, 32, 0, write);
+		else if (r_type == R_390_GOTPCDBL)
+			rc = apply_rela_bits(loc, val, 1, 32, 1, write);
+		break;
+	case R_390_COPY:
+	case R_390_GLOB_DAT:	/* Create GOT entry.  */
+	case R_390_JMP_SLOT:	/* Create PLT entry.  */
+	case R_390_RELATIVE:	/* Adjust by program base.  */
+		/* Only needed if we want to support loading of 
+		   modules linked with -shared. */
+		return -ENOEXEC;
+	default:
+		printk(KERN_ERR "module %s: unknown relocation: %u\n",
+		       me->name, r_type);
+		return -ENOEXEC;
+	}
+	if (rc) {
+		printk(KERN_ERR "module %s: relocation error for symbol %s "
+		       "(r_type %i, value 0x%lx)\n",
+		       me->name, strtab + symtab[r_sym].st_name,
+		       r_type, (unsigned long) val);
+		return rc;
+	}
+	return 0;
+}
+
+static int __apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me,
+		       void *(*write)(void *dest, const void *src, size_t len))
+{
+	Elf_Addr base;
+	Elf_Sym *symtab;
+	Elf_Rela *rela;
+	unsigned long i, n;
+	int rc;
+
+	DEBUGP("Applying relocate section %u to %u\n",
+	       relsec, sechdrs[relsec].sh_info);
+	base = sechdrs[sechdrs[relsec].sh_info].sh_addr;
+	symtab = (Elf_Sym *) sechdrs[symindex].sh_addr;
+	rela = (Elf_Rela *) sechdrs[relsec].sh_addr;
+	n = sechdrs[relsec].sh_size / sizeof(Elf_Rela);
+
+	for (i = 0; i < n; i++, rela++) {
+		rc = apply_rela(rela, base, symtab, strtab, me, write);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
+		       unsigned int symindex, unsigned int relsec,
+		       struct module *me)
+{
+	bool early = me->state == MODULE_STATE_UNFORMED;
+	void *(*write)(void *, const void *, size_t) = memcpy;
+
+	if (!early)
+		write = s390_kernel_write;
+
+	return __apply_relocate_add(sechdrs, strtab, symindex, relsec, me,
+				    write);
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+static int module_alloc_ftrace_hotpatch_trampolines(struct module *me,
+						    const Elf_Shdr *s)
+{
+	char *start, *end;
+	int numpages;
+	size_t size;
+
+	size = FTRACE_HOTPATCH_TRAMPOLINES_SIZE(s->sh_size);
+	numpages = DIV_ROUND_UP(size, PAGE_SIZE);
+	start = module_alloc(numpages * PAGE_SIZE);
+	if (!start)
+		return -ENOMEM;
+	set_memory_rox((unsigned long)start, numpages);
+	end = start + size;
+
+	me->arch.trampolines_start = (struct ftrace_hotpatch_trampoline *)start;
+	me->arch.trampolines_end = (struct ftrace_hotpatch_trampoline *)end;
+	me->arch.next_trampoline = me->arch.trampolines_start;
+
+	return 0;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs,
+		    struct module *me)
+{
+	const Elf_Shdr *s;
+	char *secstrings, *secname;
+	void *aseg;
+#ifdef CONFIG_FUNCTION_TRACER
+	int ret;
+#endif
+
+	if (IS_ENABLED(CONFIG_EXPOLINE) &&
+	    !nospec_disable && me->arch.plt_size) {
+		unsigned int *ij;
+
+		ij = me->mem[MOD_TEXT].base + me->arch.plt_offset +
+			me->arch.plt_size - PLT_ENTRY_SIZE;
+		ij[0] = 0xc6000000;	/* exrl	%r0,.+10	*/
+		ij[1] = 0x0005a7f4;	/* j	.		*/
+		ij[2] = 0x000007f1;	/* br	%r1		*/
+	}
+
+	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+		aseg = (void *) s->sh_addr;
+		secname = secstrings + s->sh_name;
+
+		if (!strcmp(".altinstructions", secname))
+			/* patch .altinstructions */
+			apply_alternatives(aseg, aseg + s->sh_size);
+
+		if (IS_ENABLED(CONFIG_EXPOLINE) &&
+		    (str_has_prefix(secname, ".s390_indirect")))
+			nospec_revert(aseg, aseg + s->sh_size);
+
+		if (IS_ENABLED(CONFIG_EXPOLINE) &&
+		    (str_has_prefix(secname, ".s390_return")))
+			nospec_revert(aseg, aseg + s->sh_size);
+
+#ifdef CONFIG_FUNCTION_TRACER
+		if (!strcmp(FTRACE_CALLSITE_SECTION, secname)) {
+			ret = module_alloc_ftrace_hotpatch_trampolines(me, s);
+			if (ret < 0)
+				return ret;
+		}
+#endif /* CONFIG_FUNCTION_TRACER */
+	}
+
+	return 0;
+}
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
new file mode 100644
index 0000000000..38ec048752
--- /dev/null
+++ b/arch/s390/kernel/nmi.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *   Machine check handler
+ *
+ *    Copyright IBM Corp. 2000, 2009
+ *    Author(s): Ingo Adlung <adlung@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Cornelia Huck <cornelia.huck@de.ibm.com>,
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/entry-common.h>
+#include <linux/hardirq.h>
+#include <linux/log2.h>
+#include <linux/kprobes.h>
+#include <linux/kmemleak.h>
+#include <linux/time.h>
+#include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/kvm_host.h>
+#include <linux/export.h>
+#include <asm/lowcore.h>
+#include <asm/smp.h>
+#include <asm/stp.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/crw.h>
+#include <asm/switch_to.h>
+#include <asm/ctl_reg.h>
+#include <asm/asm-offsets.h>
+#include <asm/pai.h>
+#include <asm/vx-insn.h>
+
+struct mcck_struct {
+	unsigned int kill_task : 1;
+	unsigned int channel_report : 1;
+	unsigned int warning : 1;
+	unsigned int stp_queue : 1;
+	unsigned long mcck_code;
+};
+
+static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
+
+static inline int nmi_needs_mcesa(void)
+{
+	return MACHINE_HAS_VX || MACHINE_HAS_GS;
+}
+
+/*
+ * The initial machine check extended save area for the boot CPU.
+ * It will be replaced on the boot CPU reinit with an allocated
+ * structure. The structure is required for machine check happening
+ * early in the boot process.
+ */
+static struct mcesa boot_mcesa __aligned(MCESA_MAX_SIZE);
+
+void __init nmi_alloc_mcesa_early(u64 *mcesad)
+{
+	if (!nmi_needs_mcesa())
+		return;
+	*mcesad = __pa(&boot_mcesa);
+	if (MACHINE_HAS_GS)
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
+}
+
+int nmi_alloc_mcesa(u64 *mcesad)
+{
+	unsigned long size;
+	void *origin;
+
+	*mcesad = 0;
+	if (!nmi_needs_mcesa())
+		return 0;
+	size = MACHINE_HAS_GS ? MCESA_MAX_SIZE : MCESA_MIN_SIZE;
+	origin = kmalloc(size, GFP_KERNEL);
+	if (!origin)
+		return -ENOMEM;
+	/* The pointer is stored with mcesa_bits ORed in */
+	kmemleak_not_leak(origin);
+	*mcesad = __pa(origin);
+	if (MACHINE_HAS_GS)
+		*mcesad |= ilog2(MCESA_MAX_SIZE);
+	return 0;
+}
+
+void nmi_free_mcesa(u64 *mcesad)
+{
+	if (!nmi_needs_mcesa())
+		return;
+	kfree(__va(*mcesad & MCESA_ORIGIN_MASK));
+}
+
+static __always_inline char *nmi_puts(char *dest, const char *src)
+{
+	while (*src)
+		*dest++ = *src++;
+	*dest = 0;
+	return dest;
+}
+
+static __always_inline char *u64_to_hex(char *dest, u64 val)
+{
+	int i, num;
+
+	for (i = 1; i <= 16; i++) {
+		num = (val >> (64 - 4 * i)) & 0xf;
+		if (num >= 10)
+			*dest++ = 'A' + num - 10;
+		else
+			*dest++ = '0' + num;
+	}
+	*dest = 0;
+	return dest;
+}
+
+static notrace void s390_handle_damage(void)
+{
+	union ctlreg0 cr0, cr0_new;
+	char message[100];
+	psw_t psw_save;
+	char *ptr;
+
+	smp_emergency_stop();
+	diag_amode31_ops.diag308_reset();
+	ptr = nmi_puts(message, "System stopped due to unrecoverable machine check, code: 0x");
+	u64_to_hex(ptr, S390_lowcore.mcck_interruption_code);
+
+	/*
+	 * Disable low address protection and make machine check new PSW a
+	 * disabled wait PSW. Any additional machine check cannot be handled.
+	 */
+	__ctl_store(cr0.val, 0, 0);
+	cr0_new = cr0;
+	cr0_new.lap = 0;
+	__ctl_load(cr0_new.val, 0, 0);
+	psw_save = S390_lowcore.mcck_new_psw;
+	psw_bits(S390_lowcore.mcck_new_psw).io = 0;
+	psw_bits(S390_lowcore.mcck_new_psw).ext = 0;
+	psw_bits(S390_lowcore.mcck_new_psw).wait = 1;
+	sclp_emergency_printk(message);
+
+	/*
+	 * Restore machine check new PSW and control register 0 to original
+	 * values. This makes possible system dump analysis easier.
+	 */
+	S390_lowcore.mcck_new_psw = psw_save;
+	__ctl_load(cr0.val, 0, 0);
+	disabled_wait();
+	while (1);
+}
+NOKPROBE_SYMBOL(s390_handle_damage);
+
+/*
+ * Main machine check handler function. Will be called with interrupts disabled
+ * and machine checks enabled.
+ */
+void s390_handle_mcck(void)
+{
+	struct mcck_struct mcck;
+
+	/*
+	 * Disable machine checks and get the current state of accumulated
+	 * machine checks. Afterwards delete the old state and enable machine
+	 * checks again.
+	 */
+	local_mcck_disable();
+	mcck = *this_cpu_ptr(&cpu_mcck);
+	memset(this_cpu_ptr(&cpu_mcck), 0, sizeof(mcck));
+	local_mcck_enable();
+
+	if (mcck.channel_report)
+		crw_handle_channel_report();
+	/*
+	 * A warning may remain for a prolonged period on the bare iron.
+	 * (actually until the machine is powered off, or the problem is gone)
+	 * So we just stop listening for the WARNING MCH and avoid continuously
+	 * being interrupted.  One caveat is however, that we must do this per
+	 * processor and cannot use the smp version of ctl_clear_bit().
+	 * On VM we only get one interrupt per virtally presented machinecheck.
+	 * Though one suffices, we may get one interrupt per (virtual) cpu.
+	 */
+	if (mcck.warning) {	/* WARNING pending ? */
+		static int mchchk_wng_posted = 0;
+
+		/* Use single cpu clear, as we cannot handle smp here. */
+		__ctl_clear_bit(14, 24);	/* Disable WARNING MCH */
+		if (xchg(&mchchk_wng_posted, 1) == 0)
+			kill_cad_pid(SIGPWR, 1);
+	}
+	if (mcck.stp_queue)
+		stp_queue_work();
+	if (mcck.kill_task) {
+		printk(KERN_EMERG "mcck: Terminating task because of machine "
+		       "malfunction (code 0x%016lx).\n", mcck.mcck_code);
+		printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
+		       current->comm, current->pid);
+		if (is_global_init(current))
+			panic("mcck: Attempting to kill init!\n");
+		do_send_sig_info(SIGKILL, SEND_SIG_PRIV, current, PIDTYPE_PID);
+	}
+}
+
+/*
+ * returns 0 if register contents could be validated
+ * returns 1 otherwise
+ */
+static int notrace s390_validate_registers(union mci mci)
+{
+	struct mcesa *mcesa;
+	void *fpt_save_area;
+	union ctlreg2 cr2;
+	int kill_task;
+	u64 zero;
+
+	kill_task = 0;
+	zero = 0;
+
+	if (!mci.gr || !mci.fp)
+		kill_task = 1;
+	fpt_save_area = &S390_lowcore.floating_pt_save_area;
+	if (!mci.fc) {
+		kill_task = 1;
+		asm volatile(
+			"	lfpc	%0\n"
+			:
+			: "Q" (zero));
+	} else {
+		asm volatile(
+			"	lfpc	%0\n"
+			:
+			: "Q" (S390_lowcore.fpt_creg_save_area));
+	}
+
+	mcesa = __va(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
+	if (!MACHINE_HAS_VX) {
+		/* Validate floating point registers */
+		asm volatile(
+			"	ld	0,0(%0)\n"
+			"	ld	1,8(%0)\n"
+			"	ld	2,16(%0)\n"
+			"	ld	3,24(%0)\n"
+			"	ld	4,32(%0)\n"
+			"	ld	5,40(%0)\n"
+			"	ld	6,48(%0)\n"
+			"	ld	7,56(%0)\n"
+			"	ld	8,64(%0)\n"
+			"	ld	9,72(%0)\n"
+			"	ld	10,80(%0)\n"
+			"	ld	11,88(%0)\n"
+			"	ld	12,96(%0)\n"
+			"	ld	13,104(%0)\n"
+			"	ld	14,112(%0)\n"
+			"	ld	15,120(%0)\n"
+			:
+			: "a" (fpt_save_area)
+			: "memory");
+	} else {
+		/* Validate vector registers */
+		union ctlreg0 cr0;
+
+		/*
+		 * The vector validity must only be checked if not running a
+		 * KVM guest. For KVM guests the machine check is forwarded by
+		 * KVM and it is the responsibility of the guest to take
+		 * appropriate actions. The host vector or FPU values have been
+		 * saved by KVM and will be restored by KVM.
+		 */
+		if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST))
+			kill_task = 1;
+		cr0.val = S390_lowcore.cregs_save_area[0];
+		cr0.afp = cr0.vx = 1;
+		__ctl_load(cr0.val, 0, 0);
+		asm volatile(
+			"	la	1,%0\n"
+			"	VLM	0,15,0,1\n"
+			"	VLM	16,31,256,1\n"
+			:
+			: "Q" (*(struct vx_array *)mcesa->vector_save_area)
+			: "1");
+		__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
+	}
+	/* Validate access registers */
+	asm volatile(
+		"	lam	0,15,0(%0)\n"
+		:
+		: "a" (&S390_lowcore.access_regs_save_area)
+		: "memory");
+	if (!mci.ar)
+		kill_task = 1;
+	/* Validate guarded storage registers */
+	cr2.val = S390_lowcore.cregs_save_area[2];
+	if (cr2.gse) {
+		if (!mci.gs) {
+			/*
+			 * 2 cases:
+			 * - machine check in kernel or userspace
+			 * - machine check while running SIE (KVM guest)
+			 * For kernel or userspace the userspace values of
+			 * guarded storage control can not be recreated, the
+			 * process must be terminated.
+			 * For SIE the guest values of guarded storage can not
+			 * be recreated. This is either due to a bug or due to
+			 * GS being disabled in the guest. The guest will be
+			 * notified by KVM code and the guests machine check
+			 * handling must take care of this.  The host values
+			 * are saved by KVM and are not affected.
+			 */
+			if (!test_cpu_flag(CIF_MCCK_GUEST))
+				kill_task = 1;
+		} else {
+			load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area);
+		}
+	}
+	/*
+	 * The getcpu vdso syscall reads CPU number from the programmable
+	 * field of the TOD clock. Disregard the TOD programmable register
+	 * validity bit and load the CPU number into the TOD programmable
+	 * field unconditionally.
+	 */
+	set_tod_programmable_field(raw_smp_processor_id());
+	/* Validate clock comparator register */
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	if (!mci.ms || !mci.pm || !mci.ia)
+		kill_task = 1;
+
+	return kill_task;
+}
+NOKPROBE_SYMBOL(s390_validate_registers);
+
+/*
+ * Backup the guest's machine check info to its description block
+ */
+static void notrace s390_backup_mcck_info(struct pt_regs *regs)
+{
+	struct mcck_volatile_info *mcck_backup;
+	struct sie_page *sie_page;
+
+	/* r14 contains the sie block, which was set in sie64a */
+	struct kvm_s390_sie_block *sie_block = phys_to_virt(regs->gprs[14]);
+
+	if (sie_block == NULL)
+		/* Something's seriously wrong, stop system. */
+		s390_handle_damage();
+
+	sie_page = container_of(sie_block, struct sie_page, sie_block);
+	mcck_backup = &sie_page->mcck_info;
+	mcck_backup->mcic = S390_lowcore.mcck_interruption_code &
+				~(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE);
+	mcck_backup->ext_damage_code = S390_lowcore.external_damage_code;
+	mcck_backup->failing_storage_address
+			= S390_lowcore.failing_storage_address;
+}
+NOKPROBE_SYMBOL(s390_backup_mcck_info);
+
+#define MAX_IPD_COUNT	29
+#define MAX_IPD_TIME	(5 * 60 * USEC_PER_SEC) /* 5 minutes */
+
+#define ED_STP_ISLAND	6	/* External damage STP island check */
+#define ED_STP_SYNC	7	/* External damage STP sync check */
+
+#define MCCK_CODE_NO_GUEST	(MCCK_CODE_CP | MCCK_CODE_EXT_DAMAGE)
+
+/*
+ * machine check handler.
+ */
+void notrace s390_do_machine_check(struct pt_regs *regs)
+{
+	static int ipd_count;
+	static DEFINE_SPINLOCK(ipd_lock);
+	static unsigned long long last_ipd;
+	struct mcck_struct *mcck;
+	unsigned long long tmp;
+	irqentry_state_t irq_state;
+	union mci mci;
+	unsigned long mcck_dam_code;
+	int mcck_pending = 0;
+
+	irq_state = irqentry_nmi_enter(regs);
+
+	if (user_mode(regs))
+		update_timer_mcck();
+	inc_irq_stat(NMI_NMI);
+	mci.val = S390_lowcore.mcck_interruption_code;
+	mcck = this_cpu_ptr(&cpu_mcck);
+
+	/*
+	 * Reinject the instruction processing damages' machine checks
+	 * including Delayed Access Exception into the guest
+	 * instead of damaging the host if they happen in the guest.
+	 */
+	if (mci.pd && !test_cpu_flag(CIF_MCCK_GUEST)) {
+		if (mci.b) {
+			/* Processing backup -> verify if we can survive this */
+			u64 z_mcic, o_mcic, t_mcic;
+			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
+			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
+				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
+				  1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
+				  1ULL<<16);
+			t_mcic = mci.val;
+
+			if (((t_mcic & z_mcic) != 0) ||
+			    ((t_mcic & o_mcic) != o_mcic)) {
+				s390_handle_damage();
+			}
+
+			/*
+			 * Nullifying exigent condition, therefore we might
+			 * retry this instruction.
+			 */
+			spin_lock(&ipd_lock);
+			tmp = get_tod_clock();
+			if (((tmp - last_ipd) >> 12) < MAX_IPD_TIME)
+				ipd_count++;
+			else
+				ipd_count = 1;
+			last_ipd = tmp;
+			if (ipd_count == MAX_IPD_COUNT)
+				s390_handle_damage();
+			spin_unlock(&ipd_lock);
+		} else {
+			/* Processing damage -> stopping machine */
+			s390_handle_damage();
+		}
+	}
+	if (s390_validate_registers(mci)) {
+		if (!user_mode(regs))
+			s390_handle_damage();
+		/*
+		 * Couldn't restore all register contents for the
+		 * user space process -> mark task for termination.
+		 */
+		mcck->kill_task = 1;
+		mcck->mcck_code = mci.val;
+		mcck_pending = 1;
+	}
+
+	/*
+	 * Backup the machine check's info if it happens when the guest
+	 * is running.
+	 */
+	if (test_cpu_flag(CIF_MCCK_GUEST))
+		s390_backup_mcck_info(regs);
+
+	if (mci.cd) {
+		/* Timing facility damage */
+		s390_handle_damage();
+	}
+	if (mci.ed && mci.ec) {
+		/* External damage */
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_SYNC))
+			mcck->stp_queue |= stp_sync_check();
+		if (S390_lowcore.external_damage_code & (1U << ED_STP_ISLAND))
+			mcck->stp_queue |= stp_island_check();
+		mcck_pending = 1;
+	}
+	/*
+	 * Reinject storage related machine checks into the guest if they
+	 * happen when the guest is running.
+	 */
+	if (!test_cpu_flag(CIF_MCCK_GUEST)) {
+		/* Storage error uncorrected */
+		if (mci.se)
+			s390_handle_damage();
+		/* Storage key-error uncorrected */
+		if (mci.ke)
+			s390_handle_damage();
+		/* Storage degradation */
+		if (mci.ds && mci.fa)
+			s390_handle_damage();
+	}
+	if (mci.cp) {
+		/* Channel report word pending */
+		mcck->channel_report = 1;
+		mcck_pending = 1;
+	}
+	if (mci.w) {
+		/* Warning pending */
+		mcck->warning = 1;
+		mcck_pending = 1;
+	}
+
+	/*
+	 * If there are only Channel Report Pending and External Damage
+	 * machine checks, they will not be reinjected into the guest
+	 * because they refer to host conditions only.
+	 */
+	mcck_dam_code = (mci.val & MCIC_SUBCLASS_MASK);
+	if (test_cpu_flag(CIF_MCCK_GUEST) &&
+	(mcck_dam_code & MCCK_CODE_NO_GUEST) != mcck_dam_code) {
+		/* Set exit reason code for host's later handling */
+		*((long *)(regs->gprs[15] + __SF_SIE_REASON)) = -EINTR;
+	}
+	clear_cpu_flag(CIF_MCCK_GUEST);
+
+	if (mcck_pending)
+		schedule_mcck_handler();
+
+	irqentry_nmi_exit(regs, irq_state);
+}
+NOKPROBE_SYMBOL(s390_do_machine_check);
+
+static int __init machine_check_init(void)
+{
+	ctl_set_bit(14, 25);	/* enable external damage MCH */
+	ctl_set_bit(14, 27);	/* enable system recovery MCH */
+	ctl_set_bit(14, 24);	/* enable warning MCH */
+	return 0;
+}
+early_initcall(machine_check_init);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
new file mode 100644
index 0000000000..d1b16d83e4
--- /dev/null
+++ b/arch/s390/kernel/nospec-branch.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/nospec-branch.h>
+
+static int __init nobp_setup_early(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	if (enabled && test_facility(82)) {
+		/*
+		 * The user explicitly requested nobp=1, enable it and
+		 * disable the expoline support.
+		 */
+		__set_facility(82, alt_stfle_fac_list);
+		if (IS_ENABLED(CONFIG_EXPOLINE))
+			nospec_disable = 1;
+	} else {
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	return 0;
+}
+early_param("nobp", nobp_setup_early);
+
+static int __init nospec_setup_early(char *str)
+{
+	__clear_facility(82, alt_stfle_fac_list);
+	return 0;
+}
+early_param("nospec", nospec_setup_early);
+
+static int __init nospec_report(void)
+{
+	if (test_facility(156))
+		pr_info("Spectre V2 mitigation: etokens\n");
+	if (nospec_uses_trampoline())
+		pr_info("Spectre V2 mitigation: execute trampolines\n");
+	if (__test_facility(82, alt_stfle_fac_list))
+		pr_info("Spectre V2 mitigation: limited branch prediction\n");
+	return 0;
+}
+arch_initcall(nospec_report);
+
+#ifdef CONFIG_EXPOLINE
+
+int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
+
+static int __init nospectre_v2_setup_early(char *str)
+{
+	nospec_disable = 1;
+	return 0;
+}
+early_param("nospectre_v2", nospectre_v2_setup_early);
+
+void __init nospec_auto_detect(void)
+{
+	if (test_facility(156) || cpu_mitigations_off()) {
+		/*
+		 * The machine supports etokens.
+		 * Disable expolines and disable nobp.
+		 */
+		if (__is_defined(CC_USING_EXPOLINE))
+			nospec_disable = 1;
+		__clear_facility(82, alt_stfle_fac_list);
+	} else if (__is_defined(CC_USING_EXPOLINE)) {
+		/*
+		 * The kernel has been compiled with expolines.
+		 * Keep expolines enabled and disable nobp.
+		 */
+		nospec_disable = 0;
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	/*
+	 * If the kernel has not been compiled with expolines the
+	 * nobp setting decides what is done, this depends on the
+	 * CONFIG_KERNEL_NP option and the nobp/nospec parameters.
+	 */
+}
+
+static int __init spectre_v2_setup_early(char *str)
+{
+	if (str && !strncmp(str, "on", 2)) {
+		nospec_disable = 0;
+		__clear_facility(82, alt_stfle_fac_list);
+	}
+	if (str && !strncmp(str, "off", 3))
+		nospec_disable = 1;
+	if (str && !strncmp(str, "auto", 4))
+		nospec_auto_detect();
+	return 0;
+}
+early_param("spectre_v2", spectre_v2_setup_early);
+
+static void __init_or_module __nospec_revert(s32 *start, s32 *end)
+{
+	enum { BRCL_EXPOLINE, BRASL_EXPOLINE } type;
+	static const u8 branch[] = { 0x47, 0x00, 0x07, 0x00 };
+	u8 *instr, *thunk, *br;
+	u8 insnbuf[6];
+	s32 *epo;
+
+	/* Second part of the instruction replace is always a nop */
+	memcpy(insnbuf + 2, branch, sizeof(branch));
+	for (epo = start; epo < end; epo++) {
+		instr = (u8 *) epo + *epo;
+		if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
+			type = BRCL_EXPOLINE;	/* brcl instruction */
+		else if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x05)
+			type = BRASL_EXPOLINE;	/* brasl instruction */
+		else
+			continue;
+		thunk = instr + (*(int *)(instr + 2)) * 2;
+		if (thunk[0] == 0xc6 && thunk[1] == 0x00)
+			/* exrl %r0,<target-br> */
+			br = thunk + (*(int *)(thunk + 2)) * 2;
+		else
+			continue;
+		if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+			continue;
+		switch (type) {
+		case BRCL_EXPOLINE:
+			/* brcl to thunk, replace with br + nop */
+			insnbuf[0] = br[0];
+			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			break;
+		case BRASL_EXPOLINE:
+			/* brasl to thunk, replace with basr + nop */
+			insnbuf[0] = 0x0d;
+			insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+			break;
+		}
+
+		s390_kernel_write(instr, insnbuf, 6);
+	}
+}
+
+void __init_or_module nospec_revert(s32 *start, s32 *end)
+{
+	if (nospec_disable)
+		__nospec_revert(start, end);
+}
+
+extern s32 __nospec_call_start[], __nospec_call_end[];
+extern s32 __nospec_return_start[], __nospec_return_end[];
+void __init nospec_init_branches(void)
+{
+	nospec_revert(__nospec_call_start, __nospec_call_end);
+	nospec_revert(__nospec_return_start, __nospec_return_end);
+}
+
+#endif /* CONFIG_EXPOLINE */
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 0000000000..52d4353188
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	if (test_facility(156))
+		return sprintf(buf, "Mitigation: etokens\n");
+	if (nospec_uses_trampoline())
+		return sprintf(buf, "Mitigation: execute trampolines\n");
+	if (__test_facility(82, alt_stfle_fac_list))
+		return sprintf(buf, "Mitigation: limited branch prediction\n");
+	return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/numa.c b/arch/s390/kernel/numa.c
new file mode 100644
index 0000000000..23ab9f02f2
--- /dev/null
+++ b/arch/s390/kernel/numa.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NUMA support for s390
+ *
+ * Implement NUMA core code.
+ *
+ * Copyright IBM Corp. 2015
+ */
+
+#include <linux/kernel.h>
+#include <linux/mmzone.h>
+#include <linux/cpumask.h>
+#include <linux/memblock.h>
+#include <linux/node.h>
+#include <asm/numa.h>
+
+struct pglist_data *node_data[MAX_NUMNODES];
+EXPORT_SYMBOL(node_data);
+
+void __init numa_setup(void)
+{
+	int nid;
+
+	nodes_clear(node_possible_map);
+	node_set(0, node_possible_map);
+	node_set_online(0);
+	for (nid = 0; nid < MAX_NUMNODES; nid++) {
+		NODE_DATA(nid) = memblock_alloc(sizeof(pg_data_t), 8);
+		if (!NODE_DATA(nid))
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(pg_data_t), 8);
+	}
+	NODE_DATA(0)->node_spanned_pages = memblock_end_of_DRAM() >> PAGE_SHIFT;
+	NODE_DATA(0)->node_id = 0;
+}
diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c
new file mode 100644
index 0000000000..6e1824141b
--- /dev/null
+++ b/arch/s390/kernel/os_info.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OS info memory interface
+ *
+ * Copyright IBM Corp. 2012
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define KMSG_COMPONENT "os_info"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/crash_dump.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <asm/checksum.h>
+#include <asm/abs_lowcore.h>
+#include <asm/os_info.h>
+#include <asm/maccess.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * OS info structure has to be page aligned
+ */
+static struct os_info os_info __page_aligned_data;
+
+/*
+ * Compute checksum over OS info structure
+ */
+u32 os_info_csum(struct os_info *os_info)
+{
+	int size = sizeof(*os_info) - offsetof(struct os_info, version_major);
+	return (__force u32)csum_partial(&os_info->version_major, size, 0);
+}
+
+/*
+ * Add crashkernel info to OS info and update checksum
+ */
+void os_info_crashkernel_add(unsigned long base, unsigned long size)
+{
+	os_info.crashkernel_addr = (u64)(unsigned long)base;
+	os_info.crashkernel_size = (u64)(unsigned long)size;
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Add OS info entry and update checksum
+ */
+void os_info_entry_add(int nr, void *ptr, u64 size)
+{
+	os_info.entry[nr].addr = __pa(ptr);
+	os_info.entry[nr].size = size;
+	os_info.entry[nr].csum = (__force u32)csum_partial(ptr, size, 0);
+	os_info.csum = os_info_csum(&os_info);
+}
+
+/*
+ * Initialize OS info structure and set lowcore pointer
+ */
+void __init os_info_init(void)
+{
+	struct lowcore *abs_lc;
+
+	os_info.version_major = OS_INFO_VERSION_MAJOR;
+	os_info.version_minor = OS_INFO_VERSION_MINOR;
+	os_info.magic = OS_INFO_MAGIC;
+	os_info.csum = os_info_csum(&os_info);
+	abs_lc = get_abs_lowcore();
+	abs_lc->os_info = __pa(&os_info);
+	put_abs_lowcore(abs_lc);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+static struct os_info *os_info_old;
+
+/*
+ * Allocate and copy OS info entry from oldmem
+ */
+static void os_info_old_alloc(int nr, int align)
+{
+	unsigned long addr, size = 0;
+	char *buf, *buf_align, *msg;
+	u32 csum;
+
+	addr = os_info_old->entry[nr].addr;
+	if (!addr) {
+		msg = "not available";
+		goto fail;
+	}
+	size = os_info_old->entry[nr].size;
+	buf = kmalloc(size + align - 1, GFP_KERNEL);
+	if (!buf) {
+		msg = "alloc failed";
+		goto fail;
+	}
+	buf_align = PTR_ALIGN(buf, align);
+	if (copy_oldmem_kernel(buf_align, addr, size)) {
+		msg = "copy failed";
+		goto fail_free;
+	}
+	csum = (__force u32)csum_partial(buf_align, size, 0);
+	if (csum != os_info_old->entry[nr].csum) {
+		msg = "checksum failed";
+		goto fail_free;
+	}
+	os_info_old->entry[nr].addr = (u64)(unsigned long)buf_align;
+	msg = "copied";
+	goto out;
+fail_free:
+	kfree(buf);
+fail:
+	os_info_old->entry[nr].addr = 0;
+out:
+	pr_info("entry %i: %s (addr=0x%lx size=%lu)\n",
+		nr, msg, addr, size);
+}
+
+/*
+ * Initialize os info and os info entries from oldmem
+ */
+static void os_info_old_init(void)
+{
+	static int os_info_init;
+	unsigned long addr;
+
+	if (os_info_init)
+		return;
+	if (!oldmem_data.start)
+		goto fail;
+	if (copy_oldmem_kernel(&addr, __LC_OS_INFO, sizeof(addr)))
+		goto fail;
+	if (addr == 0 || addr % PAGE_SIZE)
+		goto fail;
+	os_info_old = kzalloc(sizeof(*os_info_old), GFP_KERNEL);
+	if (!os_info_old)
+		goto fail;
+	if (copy_oldmem_kernel(os_info_old, addr, sizeof(*os_info_old)))
+		goto fail_free;
+	if (os_info_old->magic != OS_INFO_MAGIC)
+		goto fail_free;
+	if (os_info_old->csum != os_info_csum(os_info_old))
+		goto fail_free;
+	if (os_info_old->version_major > OS_INFO_VERSION_MAJOR)
+		goto fail_free;
+	os_info_old_alloc(OS_INFO_VMCOREINFO, 1);
+	os_info_old_alloc(OS_INFO_REIPL_BLOCK, 1);
+	pr_info("crashkernel: addr=0x%lx size=%lu\n",
+		(unsigned long) os_info_old->crashkernel_addr,
+		(unsigned long) os_info_old->crashkernel_size);
+	os_info_init = 1;
+	return;
+fail_free:
+	kfree(os_info_old);
+fail:
+	os_info_init = 1;
+	os_info_old = NULL;
+}
+
+/*
+ * Return pointer to os infor entry and its size
+ */
+void *os_info_old_entry(int nr, unsigned long *size)
+{
+	os_info_old_init();
+
+	if (!os_info_old)
+		return NULL;
+	if (!os_info_old->entry[nr].addr)
+		return NULL;
+	*size = (unsigned long) os_info_old->entry[nr].size;
+	return (void *)(unsigned long)os_info_old->entry[nr].addr;
+}
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
new file mode 100644
index 0000000000..850c11ea63
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -0,0 +1,1950 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x - CPU-measurement Counter Facility
+ *
+ *  Copyright IBM Corp. 2012, 2023
+ *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ *	       Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"cpum_cf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
+
+#include <asm/cpu_mf.h>
+#include <asm/hwctrset.h>
+#include <asm/debug.h>
+
+enum cpumf_ctr_set {
+	CPUMF_CTR_SET_BASIC   = 0,    /* Basic Counter Set */
+	CPUMF_CTR_SET_USER    = 1,    /* Problem-State Counter Set */
+	CPUMF_CTR_SET_CRYPTO  = 2,    /* Crypto-Activity Counter Set */
+	CPUMF_CTR_SET_EXT     = 3,    /* Extended Counter Set */
+	CPUMF_CTR_SET_MT_DIAG = 4,    /* MT-diagnostic Counter Set */
+
+	/* Maximum number of counter sets */
+	CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT    16
+#define CPUMF_LCCTL_ACTCTL_SHIFT     0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+	*state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+	*state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+		return stcctm(BASIC, range, dest);
+	case CPUMF_CTR_SET_USER:
+		return stcctm(PROBLEM_STATE, range, dest);
+	case CPUMF_CTR_SET_CRYPTO:
+		return stcctm(CRYPTO_ACTIVITY, range, dest);
+	case CPUMF_CTR_SET_EXT:
+		return stcctm(EXTENDED, range, dest);
+	case CPUMF_CTR_SET_MT_DIAG:
+		return stcctm(MT_DIAG_CLEARING, range, dest);
+	case CPUMF_CTR_SET_MAX:
+		return 3;
+	}
+	return 3;
+}
+
+struct cpu_cf_events {
+	refcount_t refcnt;		/* Reference count */
+	atomic_t		ctr_set[CPUMF_CTR_SET_MAX];
+	u64			state;		/* For perf_event_open SVC */
+	u64			dev_state;	/* For /dev/hwctr */
+	unsigned int		flags;
+	size_t used;			/* Bytes used in data */
+	size_t usedss;			/* Bytes used in start/stop */
+	unsigned char start[PAGE_SIZE];	/* Counter set at event add */
+	unsigned char stop[PAGE_SIZE];	/* Counter set at event delete */
+	unsigned char data[PAGE_SIZE];	/* Counter set at /dev/hwctr */
+	unsigned int sets;		/* # Counter set saved in memory */
+};
+
+static unsigned int cfdiag_cpu_speed;	/* CPU speed for CF_DIAG trailer */
+static debug_info_t *cf_dbg;
+
+/*
+ * The CPU Measurement query counter information instruction contains
+ * information which varies per machine generation, but is constant and
+ * does not change when running on a particular machine, such as counter
+ * first and second version number. This is needed to determine the size
+ * of counter sets. Extract this information at device driver initialization.
+ */
+static struct cpumf_ctr_info	cpumf_ctr_info;
+
+struct cpu_cf_ptr {
+	struct cpu_cf_events *cpucf;
+};
+
+static struct cpu_cf_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct cpu_cf_ptr __percpu *cfptr;
+} cpu_cf_root;
+
+/*
+ * Serialize event initialization and event removal. Both are called from
+ * user space in task context with perf_event_open() and close()
+ * system calls.
+ *
+ * This mutex serializes functions cpum_cf_alloc_cpu() called at event
+ * initialization via cpumf_pmu_event_init() and function cpum_cf_free_cpu()
+ * called at event removal via call back function hw_perf_event_destroy()
+ * when the event is deleted. They are serialized to enforce correct
+ * bookkeeping of pointer and reference counts anchored by
+ * struct cpu_cf_root and the access to cpu_cf_root::refcnt and the
+ * per CPU pointers stored in cpu_cf_root::cfptr.
+ */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Get pointer to per-cpu structure.
+ *
+ * Function get_cpu_cfhw() is called from
+ * - cfset_copy_all(): This function is protected by cpus_read_lock(), so
+ *   CPU hot plug remove can not happen. Event removal requires a close()
+ *   first.
+ *
+ * Function this_cpu_cfhw() is called from perf common code functions:
+ * - pmu_{en|dis}able(), pmu_{add|del}()and pmu_{start|stop}():
+ *   All functions execute with interrupts disabled on that particular CPU.
+ * - cfset_ioctl_{on|off}, cfset_cpu_read(): see comment cfset_copy_all().
+ *
+ * Therefore it is safe to access the CPU specific pointer to the event.
+ */
+static struct cpu_cf_events *get_cpu_cfhw(int cpu)
+{
+	struct cpu_cf_ptr __percpu *p = cpu_cf_root.cfptr;
+
+	if (p) {
+		struct cpu_cf_ptr *q = per_cpu_ptr(p, cpu);
+
+		return q->cpucf;
+	}
+	return NULL;
+}
+
+static struct cpu_cf_events *this_cpu_cfhw(void)
+{
+	return get_cpu_cfhw(smp_processor_id());
+}
+
+/* Disable counter sets on dedicated CPU */
+static void cpum_cf_reset_cpu(void *flags)
+{
+	lcctl(0);
+}
+
+/* Free per CPU data when the last event is removed. */
+static void cpum_cf_free_root(void)
+{
+	if (!refcount_dec_and_test(&cpu_cf_root.refcnt))
+		return;
+	free_percpu(cpu_cf_root.cfptr);
+	cpu_cf_root.cfptr = NULL;
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+	debug_sprintf_event(cf_dbg, 4, "%s root.refcnt %u cfptr %d\n",
+			    __func__, refcount_read(&cpu_cf_root.refcnt),
+			    !cpu_cf_root.cfptr);
+}
+
+/*
+ * On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int cpum_cf_alloc_root(void)
+{
+	int rc = 0;
+
+	if (refcount_inc_not_zero(&cpu_cf_root.refcnt))
+		return rc;
+
+	/* The memory is already zeroed. */
+	cpu_cf_root.cfptr = alloc_percpu(struct cpu_cf_ptr);
+	if (cpu_cf_root.cfptr) {
+		refcount_set(&cpu_cf_root.refcnt, 1);
+		on_each_cpu(cpum_cf_reset_cpu, NULL, 1);
+		irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	} else {
+		rc = -ENOMEM;
+	}
+
+	return rc;
+}
+
+/* Free CPU counter data structure for a PMU */
+static void cpum_cf_free_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+
+	mutex_lock(&pmc_reserve_mutex);
+	/*
+	 * When invoked via CPU hotplug handler, there might be no events
+	 * installed or that particular CPU might not have an
+	 * event installed. This anchor pointer can be NULL!
+	 */
+	if (!cpu_cf_root.cfptr)
+		goto out;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+	/*
+	 * Might be zero when called from CPU hotplug handler and no event
+	 * installed on that CPU, but on different CPUs.
+	 */
+	if (!cpuhw)
+		goto out;
+
+	if (refcount_dec_and_test(&cpuhw->refcnt)) {
+		kfree(cpuhw);
+		p->cpucf = NULL;
+	}
+	cpum_cf_free_root();
+out:
+	mutex_unlock(&pmc_reserve_mutex);
+}
+
+/* Allocate CPU counter data structure for a PMU. Called under mutex lock. */
+static int cpum_cf_alloc_cpu(int cpu)
+{
+	struct cpu_cf_events *cpuhw;
+	struct cpu_cf_ptr *p;
+	int rc;
+
+	mutex_lock(&pmc_reserve_mutex);
+	rc = cpum_cf_alloc_root();
+	if (rc)
+		goto unlock;
+	p = per_cpu_ptr(cpu_cf_root.cfptr, cpu);
+	cpuhw = p->cpucf;
+
+	if (!cpuhw) {
+		cpuhw = kzalloc(sizeof(*cpuhw), GFP_KERNEL);
+		if (cpuhw) {
+			p->cpucf = cpuhw;
+			refcount_set(&cpuhw->refcnt, 1);
+		} else {
+			rc = -ENOMEM;
+		}
+	} else {
+		refcount_inc(&cpuhw->refcnt);
+	}
+	if (rc) {
+		/*
+		 * Error in allocation of event, decrement anchor. Since
+		 * cpu_cf_event in not created, its destroy() function is not
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		cpum_cf_free_root();
+	}
+unlock:
+	mutex_unlock(&pmc_reserve_mutex);
+	return rc;
+}
+
+/*
+ * Create/delete per CPU data structures for /dev/hwctr interface and events
+ * created by perf_event_open().
+ * If cpu is -1, track task on all available CPUs. This requires
+ * allocation of hardware data structures for all CPUs. This setup handles
+ * perf_event_open() with task context and /dev/hwctr interface.
+ * If cpu is non-zero install event on this CPU only. This setup handles
+ * perf_event_open() with CPU context.
+ */
+static int cpum_cf_alloc(int cpu)
+{
+	cpumask_var_t mask;
+	int rc;
+
+	if (cpu == -1) {
+		if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+			return -ENOMEM;
+		for_each_online_cpu(cpu) {
+			rc = cpum_cf_alloc_cpu(cpu);
+			if (rc) {
+				for_each_cpu(cpu, mask)
+					cpum_cf_free_cpu(cpu);
+				break;
+			}
+			cpumask_set_cpu(cpu, mask);
+		}
+		free_cpumask_var(mask);
+	} else {
+		rc = cpum_cf_alloc_cpu(cpu);
+	}
+	return rc;
+}
+
+static void cpum_cf_free(int cpu)
+{
+	if (cpu == -1) {
+		for_each_online_cpu(cpu)
+			cpum_cf_free_cpu(cpu);
+	} else {
+		cpum_cf_free_cpu(cpu);
+	}
+}
+
+#define	CF_DIAG_CTRSET_DEF		0xfeef	/* Counter set header mark */
+						/* interval in seconds */
+
+/* Counter sets are stored as data stream in a page sized memory buffer and
+ * exported to user space via raw data attached to the event sample data.
+ * Each counter set starts with an eight byte header consisting of:
+ * - a two byte eye catcher (0xfeef)
+ * - a one byte counter set number
+ * - a two byte counter set size (indicates the number of counters in this set)
+ * - a three byte reserved value (must be zero) to make the header the same
+ *   size as a counter value.
+ * All counter values are eight byte in size.
+ *
+ * All counter sets are followed by a 64 byte trailer.
+ * The trailer consists of a:
+ * - flag field indicating valid fields when corresponding bit set
+ * - the counter facility first and second version number
+ * - the CPU speed if nonzero
+ * - the time stamp the counter sets have been collected
+ * - the time of day (TOD) base value
+ * - the machine type.
+ *
+ * The counter sets are saved when the process is prepared to be executed on a
+ * CPU and saved again when the process is going to be removed from a CPU.
+ * The difference of both counter sets are calculated and stored in the event
+ * sample data area.
+ */
+struct cf_ctrset_entry {	/* CPU-M CF counter set entry (8 byte) */
+	unsigned int def:16;	/* 0-15  Data Entry Format */
+	unsigned int set:16;	/* 16-31 Counter set identifier */
+	unsigned int ctr:16;	/* 32-47 Number of stored counters */
+	unsigned int res1:16;	/* 48-63 Reserved */
+};
+
+struct cf_trailer_entry {	/* CPU-M CF_DIAG trailer (64 byte) */
+	/* 0 - 7 */
+	union {
+		struct {
+			unsigned int clock_base:1;	/* TOD clock base set */
+			unsigned int speed:1;		/* CPU speed set */
+			/* Measurement alerts */
+			unsigned int mtda:1;	/* Loss of MT ctr. data alert */
+			unsigned int caca:1;	/* Counter auth. change alert */
+			unsigned int lcda:1;	/* Loss of counter data alert */
+		};
+		unsigned long flags;	/* 0-63    All indicators */
+	};
+	/* 8 - 15 */
+	unsigned int cfvn:16;			/* 64-79   Ctr First Version */
+	unsigned int csvn:16;			/* 80-95   Ctr Second Version */
+	unsigned int cpu_speed:32;		/* 96-127  CPU speed */
+	/* 16 - 23 */
+	unsigned long timestamp;		/* 128-191 Timestamp (TOD) */
+	/* 24 - 55 */
+	union {
+		struct {
+			unsigned long progusage1;
+			unsigned long progusage2;
+			unsigned long progusage3;
+			unsigned long tod_base;
+		};
+		unsigned long progusage[4];
+	};
+	/* 56 - 63 */
+	unsigned int mach_type:16;		/* Machine type */
+	unsigned int res1:16;			/* Reserved */
+	unsigned int res2:32;			/* Reserved */
+};
+
+/* Create the trailer data at the end of a page. */
+static void cfdiag_trailer(struct cf_trailer_entry *te)
+{
+	struct cpuid cpuid;
+
+	te->cfvn = cpumf_ctr_info.cfvn;		/* Counter version numbers */
+	te->csvn = cpumf_ctr_info.csvn;
+
+	get_cpu_id(&cpuid);			/* Machine type */
+	te->mach_type = cpuid.machine;
+	te->cpu_speed = cfdiag_cpu_speed;
+	if (te->cpu_speed)
+		te->speed = 1;
+	te->clock_base = 1;			/* Save clock base */
+	te->tod_base = tod_clock_base.tod;
+	te->timestamp = get_tod_clock_fast();
+}
+
+/*
+ * The number of counters per counter set varies between machine generations,
+ * but is constant when running on a particular machine generation.
+ * Determine each counter set size at device driver initialization and
+ * retrieve it later.
+ */
+static size_t cpumf_ctr_setsizes[CPUMF_CTR_SET_MAX];
+static void cpum_cf_make_setsize(enum cpumf_ctr_set ctrset)
+{
+	size_t ctrset_size = 0;
+
+	switch (ctrset) {
+	case CPUMF_CTR_SET_BASIC:
+		if (cpumf_ctr_info.cfvn >= 1)
+			ctrset_size = 6;
+		break;
+	case CPUMF_CTR_SET_USER:
+		if (cpumf_ctr_info.cfvn == 1)
+			ctrset_size = 6;
+		else if (cpumf_ctr_info.cfvn >= 3)
+			ctrset_size = 2;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+		if (cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 16;
+		else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+			ctrset_size = 20;
+		break;
+	case CPUMF_CTR_SET_EXT:
+		if (cpumf_ctr_info.csvn == 1)
+			ctrset_size = 32;
+		else if (cpumf_ctr_info.csvn == 2)
+			ctrset_size = 48;
+		else if (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5)
+			ctrset_size = 128;
+		else if (cpumf_ctr_info.csvn == 6 || cpumf_ctr_info.csvn == 7)
+			ctrset_size = 160;
+		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpumf_ctr_info.csvn > 3)
+			ctrset_size = 48;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		break;
+	}
+	cpumf_ctr_setsizes[ctrset] = ctrset_size;
+}
+
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_read_setsize(enum cpumf_ctr_set ctrset)
+{
+	return cpumf_ctr_setsizes[ctrset];
+}
+
+/* Read a counter set. The counter set number determines the counter set and
+ * the CPUM-CF first and second version number determine the number of
+ * available counters in each counter set.
+ * Each counter set starts with header containing the counter set number and
+ * the number of eight byte counters.
+ *
+ * The functions returns the number of bytes occupied by this counter set
+ * including the header.
+ * If there is no counter in the counter set, this counter set is useless and
+ * zero is returned on this case.
+ *
+ * Note that the counter sets may not be enabled or active and the stcctm
+ * instruction might return error 3. Depending on error_ok value this is ok,
+ * for example when called from cpumf_pmu_start() call back function.
+ */
+static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
+			       size_t room, bool error_ok)
+{
+	size_t ctrset_size, need = 0;
+	int rc = 3;				/* Assume write failure */
+
+	ctrdata->def = CF_DIAG_CTRSET_DEF;
+	ctrdata->set = ctrset;
+	ctrdata->res1 = 0;
+	ctrset_size = cpum_cf_read_setsize(ctrset);
+
+	if (ctrset_size) {			/* Save data */
+		need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
+		if (need <= room) {
+			rc = ctr_stcctm(ctrset, ctrset_size,
+					(u64 *)(ctrdata + 1));
+		}
+		if (rc != 3 || error_ok)
+			ctrdata->ctr = ctrset_size;
+		else
+			need = 0;
+	}
+
+	return need;
+}
+
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+	[CPUMF_CTR_SET_BASIC]	= 0x02,
+	[CPUMF_CTR_SET_USER]	= 0x04,
+	[CPUMF_CTR_SET_CRYPTO]	= 0x08,
+	[CPUMF_CTR_SET_EXT]	= 0x01,
+	[CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
+/* Read out all counter sets and save them in the provided data buffer.
+ * The last 64 byte host an artificial trailer entry.
+ */
+static size_t cfdiag_getctr(void *data, size_t sz, unsigned long auth,
+			    bool error_ok)
+{
+	struct cf_trailer_entry *trailer;
+	size_t offset = 0, done;
+	int i;
+
+	memset(data, 0, sz);
+	sz -= sizeof(*trailer);		/* Always room for trailer */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		struct cf_ctrset_entry *ctrdata = data + offset;
+
+		if (!(auth & cpumf_ctr_ctl[i]))
+			continue;	/* Counter set not authorized */
+
+		done = cfdiag_getctrset(ctrdata, i, sz - offset, error_ok);
+		offset += done;
+	}
+	trailer = data + offset;
+	cfdiag_trailer(trailer);
+	return offset + sizeof(*trailer);
+}
+
+/* Calculate the difference for each counter in a counter set. */
+static void cfdiag_diffctrset(u64 *pstart, u64 *pstop, int counters)
+{
+	for (; --counters >= 0; ++pstart, ++pstop)
+		if (*pstop >= *pstart)
+			*pstop -= *pstart;
+		else
+			*pstop = *pstart - *pstop + 1;
+}
+
+/* Scan the counter sets and calculate the difference of each counter
+ * in each set. The result is the increment of each counter during the
+ * period the counter set has been activated.
+ *
+ * Return true on success.
+ */
+static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
+{
+	struct cf_trailer_entry *trailer_start, *trailer_stop;
+	struct cf_ctrset_entry *ctrstart, *ctrstop;
+	size_t offset = 0;
+
+	auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
+	do {
+		ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
+		ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
+
+		if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
+			pr_err_once("cpum_cf_diag counter set compare error "
+				    "in set %i\n", ctrstart->set);
+			return 0;
+		}
+		auth &= ~cpumf_ctr_ctl[ctrstart->set];
+		if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
+			cfdiag_diffctrset((u64 *)(ctrstart + 1),
+					  (u64 *)(ctrstop + 1), ctrstart->ctr);
+			offset += ctrstart->ctr * sizeof(u64) +
+							sizeof(*ctrstart);
+		}
+	} while (ctrstart->def && auth);
+
+	/* Save time_stamp from start of event in stop's trailer */
+	trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);
+	trailer_stop = (struct cf_trailer_entry *)(cpuhw->stop + offset);
+	trailer_stop->progusage[0] = trailer_start->timestamp;
+
+	return 1;
+}
+
+static enum cpumf_ctr_set get_counter_set(u64 event)
+{
+	int set = CPUMF_CTR_SET_MAX;
+
+	if (event < 32)
+		set = CPUMF_CTR_SET_BASIC;
+	else if (event < 64)
+		set = CPUMF_CTR_SET_USER;
+	else if (event < 128)
+		set = CPUMF_CTR_SET_CRYPTO;
+	else if (event < 288)
+		set = CPUMF_CTR_SET_EXT;
+	else if (event >= 448 && event < 496)
+		set = CPUMF_CTR_SET_MT_DIAG;
+
+	return set;
+}
+
+static int validate_ctr_version(const u64 config, enum cpumf_ctr_set set)
+{
+	u16 mtdiag_ctl;
+	int err = 0;
+
+	/* check required version for counter sets */
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+		if (cpumf_ctr_info.cfvn < 1)
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_CRYPTO:
+		if ((cpumf_ctr_info.csvn >= 1 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 79) || (cpumf_ctr_info.csvn >= 6 && config > 83))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_EXT:
+		if (cpumf_ctr_info.csvn < 1)
+			err = -EOPNOTSUPP;
+		if ((cpumf_ctr_info.csvn == 1 && config > 159) ||
+		    (cpumf_ctr_info.csvn == 2 && config > 175) ||
+		    (cpumf_ctr_info.csvn >= 3 && cpumf_ctr_info.csvn <= 5 &&
+		     config > 255) ||
+		    (cpumf_ctr_info.csvn >= 6 && config > 287))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_MT_DIAG:
+		if (cpumf_ctr_info.csvn <= 3)
+			err = -EOPNOTSUPP;
+		/*
+		 * MT-diagnostic counters are read-only.  The counter set
+		 * is automatically enabled and activated on all CPUs with
+		 * multithreading (SMT).  Deactivation of multithreading
+		 * also disables the counter set.  State changes are ignored
+		 * by lcctl().	Because Linux controls SMT enablement through
+		 * a kernel parameter only, the counter set is either disabled
+		 * or enabled and active.
+		 *
+		 * Thus, the counters can only be used if SMT is on and the
+		 * counter set is enabled and active.
+		 */
+		mtdiag_ctl = cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG];
+		if (!((cpumf_ctr_info.auth_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.enable_ctl & mtdiag_ctl) &&
+		      (cpumf_ctr_info.act_ctl & mtdiag_ctl)))
+			err = -EOPNOTSUPP;
+		break;
+	case CPUMF_CTR_SET_MAX:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+/*
+ * Change the CPUMF state to active.
+ * Enable and activate the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int err;
+
+	if (!cpuhw || (cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	err = lcctl(cpuhw->state | cpuhw->dev_state);
+	if (err)
+		pr_err("Enabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags |= PMU_F_ENABLED;
+}
+
+/*
+ * Change the CPUMF state to inactive.
+ * Disable and enable (inactive) the CPU-counter sets according
+ * to the per-cpu control state.
+ */
+static void cpumf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	u64 inactive;
+	int err;
+
+	if (!cpuhw || !(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	inactive = cpuhw->state & ~((1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1);
+	inactive |= cpuhw->dev_state;
+	err = lcctl(inactive);
+	if (err)
+		pr_err("Disabling the performance measuring unit failed with rc=%x\n", err);
+	else
+		cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* Release the PMU if event is the last perf event */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	cpum_cf_free(event->cpu);
+}
+
+/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
+static const int cpumf_generic_events_basic[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 0,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 1,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+/* CPUMF <-> perf event mappings for userspace (problem-state set) */
+static const int cpumf_generic_events_user[] = {
+	[PERF_COUNT_HW_CPU_CYCLES]	    = 32,
+	[PERF_COUNT_HW_INSTRUCTIONS]	    = 33,
+	[PERF_COUNT_HW_CACHE_REFERENCES]    = -1,
+	[PERF_COUNT_HW_CACHE_MISSES]	    = -1,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+	[PERF_COUNT_HW_BRANCH_MISSES]	    = -1,
+	[PERF_COUNT_HW_BUS_CYCLES]	    = -1,
+};
+
+static int is_userspace_event(u64 ev)
+{
+	return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	       cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	enum cpumf_ctr_set set;
+	u64 ev;
+
+	switch (type) {
+	case PERF_TYPE_RAW:
+		/* Raw events are used to access counters directly,
+		 * hence do not permit excludes */
+		if (attr->exclude_kernel || attr->exclude_user ||
+		    attr->exclude_hv)
+			return -EOPNOTSUPP;
+		ev = attr->config;
+		break;
+
+	case PERF_TYPE_HARDWARE:
+		if (is_sampling_event(event))	/* No sampling support */
+			return -ENOENT;
+		ev = attr->config;
+		if (!attr->exclude_user && attr->exclude_kernel) {
+			/*
+			 * Count user space (problem-state) only
+			 * Handle events 32 and 33 as 0:u and 1:u
+			 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_user[ev];
+			}
+		} else if (!attr->exclude_kernel && attr->exclude_user) {
+			/* No support for kernel space counters only */
+			return -EOPNOTSUPP;
+		} else {
+			/* Count user and kernel space, incl. events 32 + 33 */
+			if (!is_userspace_event(ev)) {
+				if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+					return -EOPNOTSUPP;
+				ev = cpumf_generic_events_basic[ev];
+			}
+		}
+		break;
+
+	default:
+		return -ENOENT;
+	}
+
+	if (ev == -1)
+		return -ENOENT;
+
+	if (ev > PERF_CPUM_CF_MAX_CTR)
+		return -ENOENT;
+
+	/* Obtain the counter set to which the specified counter belongs */
+	set = get_counter_set(ev);
+	switch (set) {
+	case CPUMF_CTR_SET_BASIC:
+	case CPUMF_CTR_SET_USER:
+	case CPUMF_CTR_SET_CRYPTO:
+	case CPUMF_CTR_SET_EXT:
+	case CPUMF_CTR_SET_MT_DIAG:
+		/*
+		 * Use the hardware perf event structure to store the
+		 * counter number in the 'config' member and the counter
+		 * set number in the 'config_base' as bit mask.
+		 * It is later used to enable/disable the counter(s).
+		 */
+		hwc->config = ev;
+		hwc->config_base = cpumf_ctr_ctl[set];
+		break;
+	case CPUMF_CTR_SET_MAX:
+		/* The counter could not be associated to a counter set */
+		return -EINVAL;
+	}
+
+	/* Initialize for using the CPU-measurement counter facility */
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Finally, validate version and authorization of the counter set.
+	 * If the particular CPU counter set is not authorized,
+	 * return with -ENOENT in order to fall back to other
+	 * PMUs that might suffice the event request.
+	 */
+	if (!(hwc->config_base & cpumf_ctr_info.auth_ctl))
+		return -ENOENT;
+	return validate_ctr_version(hwc->config, set);
+}
+
+/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+	u64 ev = event->attr.config;
+
+	if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	    cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+	    cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+	    cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+		return PERF_TYPE_HARDWARE;
+	return PERF_TYPE_RAW;
+}
+
+static int cpumf_pmu_event_init(struct perf_event *event)
+{
+	unsigned int type = event->attr.type;
+	int err;
+
+	if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+		err = __hw_perf_event_init(event, type);
+	else if (event->pmu->type == type)
+		/* Registered as unknown PMU */
+		err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
+	else
+		return -ENOENT;
+
+	if (unlikely(err) && event->destroy)
+		event->destroy(event);
+
+	return err;
+}
+
+static int hw_perf_event_reset(struct perf_event *event)
+{
+	u64 prev, new;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err) {
+			if (err != 3)
+				break;
+			/* The counter is not (yet) available. This
+			 * might happen if the counter set to which
+			 * this counter belongs is in the disabled
+			 * state.
+			 */
+			new = 0;
+		}
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	return err;
+}
+
+static void hw_perf_event_update(struct perf_event *event)
+{
+	u64 prev, new, delta;
+	int err;
+
+	do {
+		prev = local64_read(&event->hw.prev_count);
+		err = ecctr(event->hw.config, &new);
+		if (err)
+			return;
+	} while (local64_cmpxchg(&event->hw.prev_count, prev, new) != prev);
+
+	delta = (prev <= new) ? new - prev
+			      : (-1ULL - prev) + new + 1;	 /* overflow */
+	local64_add(delta, &event->count);
+}
+
+static void cpumf_pmu_read(struct perf_event *event)
+{
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	hw_perf_event_update(event);
+}
+
+static void cpumf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	if (!(hwc->state & PERF_HES_STOPPED))
+		return;
+
+	hwc->state = 0;
+
+	/* (Re-)enable and activate the counter set */
+	ctr_set_enable(&cpuhw->state, hwc->config_base);
+	ctr_set_start(&cpuhw->state, hwc->config_base);
+
+	/* The counter set to which this counter belongs can be already active.
+	 * Because all counters in a set are active, the event->hw.prev_count
+	 * needs to be synchronized.  At this point, the counter set can be in
+	 * the inactive or disabled state.
+	 */
+	if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+		cpuhw->usedss = cfdiag_getctr(cpuhw->start,
+					      sizeof(cpuhw->start),
+					      hwc->config_base, true);
+	} else {
+		hw_perf_event_reset(event);
+	}
+
+	/* Increment refcount for counter sets */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+		if ((hwc->config_base & cpumf_ctr_ctl[i]))
+			atomic_inc(&cpuhw->ctr_set[i]);
+}
+
+/* Create perf event sample with the counter sets as raw data.	The sample
+ * is then pushed to the event subsystem and the function checks for
+ * possible event overflows. If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int cfdiag_push_sample(struct perf_event *event,
+			      struct cpu_cf_events *cpuhw)
+{
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	int overflow;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = event->cpu;
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = cpuhw->usedss;
+		raw.frag.data = cpuhw->stop;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	if (overflow)
+		event->pmu->stop(event, 0);
+
+	perf_event_update_userpage(event);
+	return overflow;
+}
+
+static void cpumf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct hw_perf_event *hwc = &event->hw;
+	int i;
+
+	if (!(hwc->state & PERF_HES_STOPPED)) {
+		/* Decrement reference count for this counter set and if this
+		 * is the last used counter in the set, clear activation
+		 * control and set the counter set state to inactive.
+		 */
+		for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+			if (!(hwc->config_base & cpumf_ctr_ctl[i]))
+				continue;
+			if (!atomic_dec_return(&cpuhw->ctr_set[i]))
+				ctr_set_stop(&cpuhw->state, cpumf_ctr_ctl[i]);
+		}
+		hwc->state |= PERF_HES_STOPPED;
+	}
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		if (hwc->config == PERF_EVENT_CPUM_CF_DIAG) {
+			local64_inc(&event->count);
+			cpuhw->usedss = cfdiag_getctr(cpuhw->stop,
+						      sizeof(cpuhw->stop),
+						      event->hw.config_base,
+						      false);
+			if (cfdiag_diffctr(cpuhw, event->hw.config_base))
+				cfdiag_push_sample(event, cpuhw);
+		} else {
+			hw_perf_event_update(event);
+		}
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int cpumf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+
+	ctr_set_enable(&cpuhw->state, event->hw.config_base);
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		cpumf_pmu_start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+static void cpumf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int i;
+
+	cpumf_pmu_stop(event, PERF_EF_UPDATE);
+
+	/* Check if any counter in the counter set is still used.  If not used,
+	 * change the counter set to the disabled state.  This also clears the
+	 * content of all counters in the set.
+	 *
+	 * When a new perf event has been added but not yet started, this can
+	 * clear enable control and resets all counters in a set.  Therefore,
+	 * cpumf_pmu_start() always has to reenable a counter set.
+	 */
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
+		if (!atomic_read(&cpuhw->ctr_set[i]))
+			ctr_set_disable(&cpuhw->state, cpumf_ctr_ctl[i]);
+}
+
+/* Performance monitoring unit for s390x */
+static struct pmu cpumf_pmu = {
+	.task_ctx_nr  = perf_sw_context,
+	.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+	.pmu_enable   = cpumf_pmu_enable,
+	.pmu_disable  = cpumf_pmu_disable,
+	.event_init   = cpumf_pmu_event_init,
+	.add	      = cpumf_pmu_add,
+	.del	      = cpumf_pmu_del,
+	.start	      = cpumf_pmu_start,
+	.stop	      = cpumf_pmu_stop,
+	.read	      = cpumf_pmu_read,
+};
+
+static struct cfset_session {		/* CPUs and counter set bit mask */
+	struct list_head head;		/* Head of list of active processes */
+} cfset_session = {
+	.head = LIST_HEAD_INIT(cfset_session.head)
+};
+
+static refcount_t cfset_opencnt = REFCOUNT_INIT(0);	/* Access count */
+/*
+ * Synchronize access to device /dev/hwc. This mutex protects against
+ * concurrent access to functions cfset_open() and cfset_release().
+ * Same for CPU hotplug add and remove events triggering
+ * cpum_cf_online_cpu() and cpum_cf_offline_cpu().
+ * It also serializes concurrent device ioctl access from multiple
+ * processes accessing /dev/hwc.
+ *
+ * The mutex protects concurrent access to the /dev/hwctr session management
+ * struct cfset_session and reference counting variable cfset_opencnt.
+ */
+static DEFINE_MUTEX(cfset_ctrset_mutex);
+
+/*
+ * CPU hotplug handles only /dev/hwctr device.
+ * For perf_event_open() the CPU hotplug handling is done on kernel common
+ * code:
+ * - CPU add: Nothing is done since a file descriptor can not be created
+ *   and returned to the user.
+ * - CPU delete: Handled by common code via pmu_disable(), pmu_stop() and
+ *   pmu_delete(). The event itself is removed when the file descriptor is
+ *   closed.
+ */
+static int cfset_online_cpu(unsigned int cpu);
+
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+	int rc = 0;
+
+	/*
+	 * Ignore notification for perf_event_open().
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		rc = cpum_cf_alloc_cpu(cpu);
+		if (!rc)
+			cfset_online_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return rc;
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+	/*
+	 * During task exit processing of grouped perf events triggered by CPU
+	 * hotplug processing, pmu_disable() is called as part of perf context
+	 * removal process. Therefore do not trigger event removal now for
+	 * perf_event_open() created events. Perf common code triggers event
+	 * destruction when the event file descriptor is closed.
+	 *
+	 * Handle only /dev/hwctr device sessions.
+	 */
+	mutex_lock(&cfset_ctrset_mutex);
+	if (refcount_read(&cfset_opencnt)) {
+		cfset_offline_cpu(cpu);
+		cpum_cf_free_cpu(cpu);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return 0;
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+	return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_cf_events *cpuhw;
+
+	if (!(alert & CPU_MF_INT_CF_MASK))
+		return;
+
+	inc_irq_stat(IRQEXT_CMC);
+
+	/*
+	 * Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case.
+	 */
+	cpuhw = this_cpu_cfhw();
+	if (!cpuhw)
+		return;
+
+	/* counter authorization change alert */
+	if (alert & CPU_MF_INT_CF_CACA)
+		qctri(&cpumf_ctr_info);
+
+	/* loss of counter data alert */
+	if (alert & CPU_MF_INT_CF_LCDA)
+		pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+	/* loss of MT counter data alert */
+	if (alert & CPU_MF_INT_CF_MTDA)
+		pr_warn("CPU[%i] MT counter data was lost\n",
+			smp_processor_id());
+}
+
+static int cfset_init(void);
+static int __init cpumf_pmu_init(void)
+{
+	int rc;
+
+	/* Extract counter measurement facility information */
+	if (!cpum_cf_avail() || qctri(&cpumf_ctr_info))
+		return -ENODEV;
+
+	/* Determine and store counter set sizes for later reference */
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		cpum_cf_make_setsize(rc);
+
+	/*
+	 * Clear bit 15 of cr0 to unauthorize problem-state to
+	 * extract measurement counters
+	 */
+	ctl_clear_bit(0, 48);
+
+	/* register handler for measurement-alert interruptions */
+	rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				   cpumf_measurement_alert);
+	if (rc) {
+		pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
+	if (!cf_dbg) {
+		pr_err("Registration of s390dbf(cpum_cf) failed\n");
+		rc = -ENOMEM;
+		goto out1;
+	}
+	debug_register_view(cf_dbg, &debug_sprintf_view);
+
+	cpumf_pmu.attr_groups = cpumf_cf_event_group();
+	rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
+	if (rc) {
+		pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+		goto out2;
+	} else if (stccm_avail()) {	/* Setup counter set device */
+		cfset_init();
+	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+			       "perf/s390/cf:online",
+			       cpum_cf_online_cpu, cpum_cf_offline_cpu);
+	return rc;
+
+out2:
+	debug_unregister_view(cf_dbg, &debug_sprintf_view);
+	debug_unregister(cf_dbg);
+out1:
+	unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
+	return rc;
+}
+
+/* Support for the CPU Measurement Facility counter set extraction using
+ * device /dev/hwctr. This allows user space programs to extract complete
+ * counter set via normal file operations.
+ */
+
+struct cfset_call_on_cpu_parm {		/* Parm struct for smp_call_on_cpu */
+	unsigned int sets;		/* Counter set bit mask */
+	atomic_t cpus_ack;		/* # CPUs successfully executed func */
+};
+
+struct cfset_request {			/* CPUs and counter set bit mask */
+	unsigned long ctrset;		/* Bit mask of counter set to read */
+	cpumask_t mask;			/* CPU mask to read from */
+	struct list_head node;		/* Chain to cfset_session.head */
+};
+
+static void cfset_session_init(void)
+{
+	INIT_LIST_HEAD(&cfset_session.head);
+}
+
+/* Remove current request from global bookkeeping. Maintain a counter set bit
+ * mask on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_del(struct cfset_request *p)
+{
+	list_del(&p->node);
+}
+
+/* Add current request to global bookkeeping. Maintain a counter set bit mask
+ * on a per CPU basis.
+ * Done in process context under mutex protection.
+ */
+static void cfset_session_add(struct cfset_request *p)
+{
+	list_add(&p->node, &cfset_session.head);
+}
+
+/* The /dev/hwctr device access uses PMU_F_IN_USE to mark the device access
+ * path is currently used.
+ * The cpu_cf_events::dev_state is used to denote counter sets in use by this
+ * interface. It is always or'ed in. If this interface is not active, its
+ * value is zero and no additional counter sets will be included.
+ *
+ * The cpu_cf_events::state is used by the perf_event_open SVC and remains
+ * unchanged.
+ *
+ * perf_pmu_enable() and perf_pmu_enable() and its call backs
+ * cpumf_pmu_enable() and  cpumf_pmu_disable() are called by the
+ * performance measurement subsystem to enable per process
+ * CPU Measurement counter facility.
+ * The XXX_enable() and XXX_disable functions are used to turn off
+ * x86 performance monitoring interrupt (PMI) during scheduling.
+ * s390 uses these calls to temporarily stop and resume the active CPU
+ * counters sets during scheduling.
+ *
+ * We do allow concurrent access of perf_event_open() SVC and /dev/hwctr
+ * device access.  The perf_event_open() SVC interface makes a lot of effort
+ * to only run the counters while the calling process is actively scheduled
+ * to run.
+ * When /dev/hwctr interface is also used at the same time, the counter sets
+ * will keep running, even when the process is scheduled off a CPU.
+ * However this is not a problem and does not lead to wrong counter values
+ * for the perf_event_open() SVC. The current counter value will be recorded
+ * during schedule-in. At schedule-out time the current counter value is
+ * extracted again and the delta is calculated and added to the event.
+ */
+/* Stop all counter sets via ioctl interface */
+static void cfset_ioctl_off(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int rc;
+
+	/* Check if any counter set used by /dev/hwctr */
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		if ((p->sets & cpumf_ctr_ctl[rc])) {
+			if (!atomic_dec_return(&cpuhw->ctr_set[rc])) {
+				ctr_set_disable(&cpuhw->dev_state,
+						cpumf_ctr_ctl[rc]);
+				ctr_set_stop(&cpuhw->dev_state,
+					     cpumf_ctr_ctl[rc]);
+			}
+		}
+	/* Keep perf_event_open counter sets */
+	rc = lcctl(cpuhw->dev_state | cpuhw->state);
+	if (rc)
+		pr_err("Counter set stop %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->state, S390_HWCTR_DEVICE, rc);
+	if (!cpuhw->dev_state)
+		cpuhw->flags &= ~PMU_F_IN_USE;
+}
+
+/* Start counter sets on particular CPU */
+static void cfset_ioctl_on(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int rc;
+
+	cpuhw->flags |= PMU_F_IN_USE;
+	ctr_set_enable(&cpuhw->dev_state, p->sets);
+	ctr_set_start(&cpuhw->dev_state, p->sets);
+	for (rc = CPUMF_CTR_SET_BASIC; rc < CPUMF_CTR_SET_MAX; ++rc)
+		if ((p->sets & cpumf_ctr_ctl[rc]))
+			atomic_inc(&cpuhw->ctr_set[rc]);
+	rc = lcctl(cpuhw->dev_state | cpuhw->state);	/* Start counter sets */
+	if (!rc)
+		atomic_inc(&p->cpus_ack);
+	else
+		pr_err("Counter set start %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->dev_state | cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+static void cfset_release_cpu(void *p)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	int rc;
+
+	cpuhw->dev_state = 0;
+	rc = lcctl(cpuhw->state);	/* Keep perf_event_open counter sets */
+	if (rc)
+		pr_err("Counter set release %#llx of /dev/%s failed rc=%i\n",
+		       cpuhw->state, S390_HWCTR_DEVICE, rc);
+}
+
+/* This modifies the process CPU mask to adopt it to the currently online
+ * CPUs. Offline CPUs can not be addresses. This call terminates the access
+ * and is usually followed by close() or a new iotcl(..., START, ...) which
+ * creates a new request structure.
+ */
+static void cfset_all_stop(struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p = {
+		.sets = req->ctrset,
+	};
+
+	cpumask_and(&req->mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(&req->mask, cfset_ioctl_off, &p, 1);
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ */
+static int cfset_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&cfset_ctrset_mutex);
+	/* Open followed by close/exit has no private_data */
+	if (file->private_data) {
+		cfset_all_stop(file->private_data);
+		cfset_session_del(file->private_data);
+		kfree(file->private_data);
+		file->private_data = NULL;
+	}
+	if (refcount_dec_and_test(&cfset_opencnt)) {	/* Last close */
+		on_each_cpu(cfset_release_cpu, NULL, 1);
+		cpum_cf_free(-1);
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	return 0;
+}
+
+/*
+ * Open via /dev/hwctr device. Allocate all per CPU resources on the first
+ * open of the device. The last close releases all per CPU resources.
+ * Parallel perf_event_open system calls also use per CPU resources.
+ * These invocations are handled via reference counting on the per CPU data
+ * structures.
+ */
+static int cfset_open(struct inode *inode, struct file *file)
+{
+	int rc = 0;
+
+	if (!perfmon_capable())
+		return -EPERM;
+	file->private_data = NULL;
+
+	mutex_lock(&cfset_ctrset_mutex);
+	if (!refcount_inc_not_zero(&cfset_opencnt)) {	/* First open */
+		rc = cpum_cf_alloc(-1);
+		if (!rc) {
+			cfset_session_init();
+			refcount_set(&cfset_opencnt, 1);
+		}
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+
+	/* nonseekable_open() never fails */
+	return rc ?: nonseekable_open(inode, file);
+}
+
+static int cfset_all_start(struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p = {
+		.sets = req->ctrset,
+		.cpus_ack = ATOMIC_INIT(0),
+	};
+	cpumask_var_t mask;
+	int rc = 0;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+	cpumask_and(mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cfset_ioctl_on, &p, 1);
+	if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+		on_each_cpu_mask(mask, cfset_ioctl_off, &p, 1);
+		rc = -EIO;
+	}
+	free_cpumask_var(mask);
+	return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cfset_needspace(unsigned int sets)
+{
+	size_t bytes = 0;
+	int i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (!(sets & cpumf_ctr_ctl[i]))
+			continue;
+		bytes += cpum_cf_read_setsize(i) * sizeof(u64) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->set) +
+			 sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+	}
+	bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+		(bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+		     sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+	return bytes;
+}
+
+static int cfset_all_copy(unsigned long arg, cpumask_t *mask)
+{
+	struct s390_ctrset_read __user *ctrset_read;
+	unsigned int cpu, cpus, rc = 0;
+	void __user *uptr;
+
+	ctrset_read = (struct s390_ctrset_read __user *)arg;
+	uptr = ctrset_read->data;
+	for_each_cpu(cpu, mask) {
+		struct cpu_cf_events *cpuhw = get_cpu_cfhw(cpu);
+		struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+		ctrset_cpudata = uptr;
+		rc  = put_user(cpu, &ctrset_cpudata->cpu_nr);
+		rc |= put_user(cpuhw->sets, &ctrset_cpudata->no_sets);
+		rc |= copy_to_user(ctrset_cpudata->data, cpuhw->data,
+				   cpuhw->used);
+		if (rc) {
+			rc = -EFAULT;
+			goto out;
+		}
+		uptr += sizeof(struct s390_ctrset_cpudata) + cpuhw->used;
+		cond_resched();
+	}
+	cpus = cpumask_weight(mask);
+	if (put_user(cpus, &ctrset_read->no_cpus))
+		rc = -EFAULT;
+out:
+	return rc;
+}
+
+static size_t cfset_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+				int ctrset_size, size_t room)
+{
+	size_t need = 0;
+	int rc = -1;
+
+	need = sizeof(*p) + sizeof(u64) * ctrset_size;
+	if (need <= room) {
+		p->set = cpumf_ctr_ctl[ctrset];
+		p->no_cnts = ctrset_size;
+		rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+		if (rc == 3)		/* Nothing stored */
+			need = 0;
+	}
+	return need;
+}
+
+/* Read all counter sets. */
+static void cfset_cpu_read(void *parm)
+{
+	struct cpu_cf_events *cpuhw = this_cpu_cfhw();
+	struct cfset_call_on_cpu_parm *p = parm;
+	int set, set_size;
+	size_t space;
+
+	/* No data saved yet */
+	cpuhw->used = 0;
+	cpuhw->sets = 0;
+	memset(cpuhw->data, 0, sizeof(cpuhw->data));
+
+	/* Scan the counter sets */
+	for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+		struct s390_ctrset_setdata *sp = (void *)cpuhw->data +
+						 cpuhw->used;
+
+		if (!(p->sets & cpumf_ctr_ctl[set]))
+			continue;	/* Counter set not in list */
+		set_size = cpum_cf_read_setsize(set);
+		space = sizeof(cpuhw->data) - cpuhw->used;
+		space = cfset_cpuset_read(sp, set, set_size, space);
+		if (space) {
+			cpuhw->used += space;
+			cpuhw->sets += 1;
+		}
+	}
+}
+
+static int cfset_all_read(unsigned long arg, struct cfset_request *req)
+{
+	struct cfset_call_on_cpu_parm p;
+	cpumask_var_t mask;
+	int rc;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	p.sets = req->ctrset;
+	cpumask_and(mask, &req->mask, cpu_online_mask);
+	on_each_cpu_mask(mask, cfset_cpu_read, &p, 1);
+	rc = cfset_all_copy(arg, mask);
+	free_cpumask_var(mask);
+	return rc;
+}
+
+static long cfset_ioctl_read(unsigned long arg, struct cfset_request *req)
+{
+	int ret = -ENODATA;
+
+	if (req && req->ctrset)
+		ret = cfset_all_read(arg, req);
+	return ret;
+}
+
+static long cfset_ioctl_stop(struct file *file)
+{
+	struct cfset_request *req = file->private_data;
+	int ret = -ENXIO;
+
+	if (req) {
+		cfset_all_stop(req);
+		cfset_session_del(req);
+		kfree(req);
+		file->private_data = NULL;
+		ret = 0;
+	}
+	return ret;
+}
+
+static long cfset_ioctl_start(unsigned long arg, struct file *file)
+{
+	struct s390_ctrset_start __user *ustart;
+	struct s390_ctrset_start start;
+	struct cfset_request *preq;
+	void __user *umask;
+	unsigned int len;
+	int ret = 0;
+	size_t need;
+
+	if (file->private_data)
+		return -EBUSY;
+	ustart = (struct s390_ctrset_start __user *)arg;
+	if (copy_from_user(&start, ustart, sizeof(start)))
+		return -EFAULT;
+	if (start.version != S390_HWCTR_START_VERSION)
+		return -EINVAL;
+	if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+				   cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+		return -EINVAL;		/* Invalid counter set */
+	if (!start.counter_sets)
+		return -EINVAL;		/* No counter set at all? */
+
+	preq = kzalloc(sizeof(*preq), GFP_KERNEL);
+	if (!preq)
+		return -ENOMEM;
+	cpumask_clear(&preq->mask);
+	len = min_t(u64, start.cpumask_len, cpumask_size());
+	umask = (void __user *)start.cpumask;
+	if (copy_from_user(&preq->mask, umask, len)) {
+		kfree(preq);
+		return -EFAULT;
+	}
+	if (cpumask_empty(&preq->mask)) {
+		kfree(preq);
+		return -EINVAL;
+	}
+	need = cfset_needspace(start.counter_sets);
+	if (put_user(need, &ustart->data_bytes)) {
+		kfree(preq);
+		return -EFAULT;
+	}
+	preq->ctrset = start.counter_sets;
+	ret = cfset_all_start(preq);
+	if (!ret) {
+		cfset_session_add(preq);
+		file->private_data = preq;
+	} else {
+		kfree(preq);
+	}
+	return ret;
+}
+
+/* Entry point to the /dev/hwctr device interface.
+ * The ioctl system call supports three subcommands:
+ * S390_HWCTR_START: Start the specified counter sets on a CPU list. The
+ *    counter set keeps running until explicitly stopped. Returns the number
+ *    of bytes needed to store the counter values. If another S390_HWCTR_START
+ *    ioctl subcommand is called without a previous S390_HWCTR_STOP stop
+ *    command on the same file descriptor, -EBUSY is returned.
+ * S390_HWCTR_READ: Read the counter set values from specified CPU list given
+ *    with the S390_HWCTR_START command.
+ * S390_HWCTR_STOP: Stops the counter sets on the CPU list given with the
+ *    previous S390_HWCTR_START subcommand.
+ */
+static long cfset_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	cpus_read_lock();
+	mutex_lock(&cfset_ctrset_mutex);
+	switch (cmd) {
+	case S390_HWCTR_START:
+		ret = cfset_ioctl_start(arg, file);
+		break;
+	case S390_HWCTR_STOP:
+		ret = cfset_ioctl_stop(file);
+		break;
+	case S390_HWCTR_READ:
+		ret = cfset_ioctl_read(arg, file->private_data);
+		break;
+	default:
+		ret = -ENOTTY;
+		break;
+	}
+	mutex_unlock(&cfset_ctrset_mutex);
+	cpus_read_unlock();
+	return ret;
+}
+
+static const struct file_operations cfset_fops = {
+	.owner = THIS_MODULE,
+	.open = cfset_open,
+	.release = cfset_release,
+	.unlocked_ioctl	= cfset_ioctl,
+	.compat_ioctl = cfset_ioctl,
+	.llseek = no_llseek
+};
+
+static struct miscdevice cfset_dev = {
+	.name	= S390_HWCTR_DEVICE,
+	.minor	= MISC_DYNAMIC_MINOR,
+	.fops	= &cfset_fops,
+	.mode	= 0666,
+};
+
+/* Hotplug add of a CPU. Scan through all active processes and add
+ * that CPU to the list of CPUs supplied with ioctl(..., START, ...).
+ */
+static int cfset_online_cpu(unsigned int cpu)
+{
+	struct cfset_call_on_cpu_parm p;
+	struct cfset_request *rp;
+
+	if (!list_empty(&cfset_session.head)) {
+		list_for_each_entry(rp, &cfset_session.head, node) {
+			p.sets = rp->ctrset;
+			cfset_ioctl_on(&p);
+			cpumask_set_cpu(cpu, &rp->mask);
+		}
+	}
+	return 0;
+}
+
+/* Hotplug remove of a CPU. Scan through all active processes and clear
+ * that CPU from the list of CPUs supplied with ioctl(..., START, ...).
+ * Adjust reference counts.
+ */
+static int cfset_offline_cpu(unsigned int cpu)
+{
+	struct cfset_call_on_cpu_parm p;
+	struct cfset_request *rp;
+
+	if (!list_empty(&cfset_session.head)) {
+		list_for_each_entry(rp, &cfset_session.head, node) {
+			p.sets = rp->ctrset;
+			cfset_ioctl_off(&p);
+			cpumask_clear_cpu(cpu, &rp->mask);
+		}
+	}
+	return 0;
+}
+
+static void cfdiag_read(struct perf_event *event)
+{
+}
+
+static int get_authctrsets(void)
+{
+	unsigned long auth = 0;
+	enum cpumf_ctr_set i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		if (cpumf_ctr_info.auth_ctl & cpumf_ctr_ctl[i])
+			auth |= cpumf_ctr_ctl[i];
+	}
+	return auth;
+}
+
+/* Setup the event. Test for authorized counter sets and only include counter
+ * sets which are authorized at the time of the setup. Including unauthorized
+ * counter sets result in specification exception (and panic).
+ */
+static int cfdiag_event_init2(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	int err = 0;
+
+	/* Set sample_period to indicate sampling */
+	event->hw.config = attr->config;
+	event->hw.sample_period = attr->sample_period;
+	local64_set(&event->hw.period_left, event->hw.sample_period);
+	local64_set(&event->count, 0);
+	event->hw.last_period = event->hw.sample_period;
+
+	/* Add all authorized counter sets to config_base. The
+	 * the hardware init function is either called per-cpu or just once
+	 * for all CPUS (event->cpu == -1).  This depends on the whether
+	 * counting is started for all CPUs or on a per workload base where
+	 * the perf event moves from one CPU to another CPU.
+	 * Checking the authorization on any CPU is fine as the hardware
+	 * applies the same authorization settings to all CPUs.
+	 */
+	event->hw.config_base = get_authctrsets();
+
+	/* No authorized counter sets, nothing to count/sample */
+	if (!event->hw.config_base)
+		err = -EINVAL;
+
+	return err;
+}
+
+static int cfdiag_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	int err = -ENOENT;
+
+	if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
+	    event->attr.type != event->pmu->type)
+		goto out;
+
+	/* Raw events are used to access counters directly,
+	 * hence do not permit excludes.
+	 * This event is useless without PERF_SAMPLE_RAW to return counter set
+	 * values as raw data.
+	 */
+	if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv ||
+	    !(attr->sample_type & (PERF_SAMPLE_CPU | PERF_SAMPLE_RAW))) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/* Initialize for using the CPU-measurement counter facility */
+	if (cpum_cf_alloc(event->cpu))
+		return -ENOMEM;
+	event->destroy = hw_perf_event_destroy;
+
+	err = cfdiag_event_init2(event);
+	if (unlikely(err))
+		event->destroy(event);
+out:
+	return err;
+}
+
+/* Create cf_diag/events/CF_DIAG event sysfs file. This counter is used
+ * to collect the complete counter sets for a scheduled process. Target
+ * are complete counter sets attached as raw data to the artificial event.
+ * This results in complete counter sets available when a process is
+ * scheduled. Contains the delta of every counter while the process was
+ * running.
+ */
+CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
+
+static struct attribute *cfdiag_events_attr[] = {
+	CPUMF_EVENT_PTR(CF_DIAG, CF_DIAG),
+	NULL,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cfdiag_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cfdiag_events_group = {
+	.name = "events",
+	.attrs = cfdiag_events_attr,
+};
+static struct attribute_group cfdiag_format_group = {
+	.name = "format",
+	.attrs = cfdiag_format_attr,
+};
+static const struct attribute_group *cfdiag_attr_groups[] = {
+	&cfdiag_events_group,
+	&cfdiag_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for event CF_DIAG. Since this event
+ * is also started and stopped via the perf_event_open() system call, use
+ * the same event enable/disable call back functions. They do not
+ * have a pointer to the perf_event strcture as first parameter.
+ *
+ * The functions XXX_add, XXX_del, XXX_start and XXX_stop are also common.
+ * Reuse them and distinguish the event (always first parameter) via
+ * 'config' member.
+ */
+static struct pmu cf_diag = {
+	.task_ctx_nr  = perf_sw_context,
+	.event_init   = cfdiag_event_init,
+	.pmu_enable   = cpumf_pmu_enable,
+	.pmu_disable  = cpumf_pmu_disable,
+	.add	      = cpumf_pmu_add,
+	.del	      = cpumf_pmu_del,
+	.start	      = cpumf_pmu_start,
+	.stop	      = cpumf_pmu_stop,
+	.read	      = cfdiag_read,
+
+	.attr_groups  = cfdiag_attr_groups
+};
+
+/* Calculate memory needed to store all counter sets together with header and
+ * trailer data. This is independent of the counter set authorization which
+ * can vary depending on the configuration.
+ */
+static size_t cfdiag_maxsize(struct cpumf_ctr_info *info)
+{
+	size_t max_size = sizeof(struct cf_trailer_entry);
+	enum cpumf_ctr_set i;
+
+	for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+		size_t size = cpum_cf_read_setsize(i);
+
+		if (size)
+			max_size += size * sizeof(u64) +
+				    sizeof(struct cf_ctrset_entry);
+	}
+	return max_size;
+}
+
+/* Get the CPU speed, try sampling facility first and CPU attributes second. */
+static void cfdiag_get_cpu_speed(void)
+{
+	unsigned long mhz;
+
+	if (cpum_sf_avail()) {			/* Sampling facility first */
+		struct hws_qsi_info_block si;
+
+		memset(&si, 0, sizeof(si));
+		if (!qsi(&si)) {
+			cfdiag_cpu_speed = si.cpu_speed;
+			return;
+		}
+	}
+
+	/* Fallback: CPU speed extract static part. Used in case
+	 * CPU Measurement Sampling Facility is turned off.
+	 */
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	if (mhz != -1UL)
+		cfdiag_cpu_speed = mhz & 0xffffffff;
+}
+
+static int cfset_init(void)
+{
+	size_t need;
+	int rc;
+
+	cfdiag_get_cpu_speed();
+	/* Make sure the counter set data fits into predefined buffer. */
+	need = cfdiag_maxsize(&cpumf_ctr_info);
+	if (need > sizeof(((struct cpu_cf_events *)0)->start)) {
+		pr_err("Insufficient memory for PMU(cpum_cf_diag) need=%zu\n",
+		       need);
+		return -ENOMEM;
+	}
+
+	rc = misc_register(&cfset_dev);
+	if (rc) {
+		pr_err("Registration of /dev/%s failed rc=%i\n",
+		       cfset_dev.name, rc);
+		goto out;
+	}
+
+	rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
+	if (rc) {
+		misc_deregister(&cfset_dev);
+		pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
+		       rc);
+	}
+out:
+	return rc;
+}
+
+device_initcall(cpumf_pmu_init);
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
new file mode 100644
index 0000000000..0d64aafd15
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -0,0 +1,909 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Perf PMU sysfs events attributes for available CPU-measurement counters
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/perf_event.h>
+#include <asm/cpu_mf.h>
+
+
+/* BEGIN: CPUM_CF COUNTER DEFINITIONS =================================== */
+
+CPUMF_EVENT_ATTR(cf_fvn1, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn1, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn1, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES, 0x0022);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES, 0x0023);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES, 0x0024);
+CPUMF_EVENT_ATTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES, 0x0025);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn1, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_fvn3, CPU_CYCLES, 0x0000);
+CPUMF_EVENT_ATTR(cf_fvn3, INSTRUCTIONS, 0x0001);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_DIR_WRITES, 0x0002);
+CPUMF_EVENT_ATTR(cf_fvn3, L1I_PENALTY_CYCLES, 0x0003);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
+CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
+CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_LOCAL_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z10, L1I_L3_REMOTE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z10, L1D_L3_REMOTE_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z10, L1D_LMEM_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z10, L1I_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z10, L1D_RO_EXCL_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z10, L1I_CACHELINE_INVALIDATES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_PTE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z10, ITLB1_MISSES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z10, DTLB1_MISSES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z10, L2C_STORES_SENT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, L1D_L2_SOURCED_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z196, L1I_L2_SOURCED_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_MISSES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z196, L2C_STORES_SENT, 0x0085);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z196, L1D_RO_EXCL_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_HPAGE_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_z196, L1D_LMEM_SOURCED_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_z196, L1I_LMEM_SOURCED_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z196, DTLB1_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z196, ITLB1_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_PTE_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z196, TLB2_CRSTE_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_MISSES, 0x0080);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_MISSES, 0x0081);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2I_SOURCED_WRITES, 0x0082);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_L2I_SOURCED_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_L2D_SOURCED_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_LMEM_SOURCED_WRITES, 0x0087);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_LMEM_SOURCED_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_RO_EXCL_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_zec12, DTLB1_HPAGE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_zec12, ITLB1_WRITES, 0x008c);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_PTE_WRITES, 0x008d);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES, 0x008e);
+CPUMF_EVENT_ATTR(cf_zec12, TLB2_CRSTE_WRITES, 0x008f);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TEND, 0x0095);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV, 0x0097);
+CPUMF_EVENT_ATTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TEND, 0x009e);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x009f);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV, 0x00a1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_NC_TABORT, 0x00b1);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_NO_SPECIAL, 0x00b2);
+CPUMF_EVENT_ATTR(cf_zec12, TX_C_TABORT_SPECIAL, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z13, DTLB1_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z13, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z13, ITLB1_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z13, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z13, TLB2_CRSTE_WRITES, 0x008b);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z13, L1C_TLB1_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0091);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES, 0x0092);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV, 0x0093);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x0095);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x0098);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x0099);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x009b);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x009c);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES, 0x009e);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES, 0x009f);
+CPUMF_EVENT_ATTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z13, TX_NC_TABORT, 0x00da);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_NO_SPECIAL, 0x00db);
+CPUMF_EVENT_ATTR(cf_z13, TX_C_TABORT_SPECIAL, 0x00dc);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z14, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z14, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z14, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z14, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z14, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z14, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z14, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z14, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z14, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z14, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z14, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z14, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z14, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+CPUMF_EVENT_ATTR(cf_z15, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_HPAGE_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z15, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z15, L1D_L2D_SOURCED_WRITES, 0x0085);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z15, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z15, L1I_L2I_SOURCED_WRITES, 0x0088);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z15, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z15, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES, 0x0090);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES, 0x0091);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES, 0x0093);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x0094);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x0095);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES, 0x0096);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x0097);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x0098);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES, 0x0099);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x009a);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x009b);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES, 0x009c);
+CPUMF_EVENT_ATTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES, 0x009d);
+CPUMF_EVENT_ATTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO, 0x009e);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES, 0x00af);
+CPUMF_EVENT_ATTR(cf_z15, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z15, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z15, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z15, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z15, TX_NC_TABORT, 0x00f3);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_NO_SPECIAL, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z15, TX_C_TABORT_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_ACCESS, 0x00f7);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CYCLES, 0x00fc);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CC, 0x00108);
+CPUMF_EVENT_ATTR(cf_z15, DFLT_CCFINISH, 0x00109);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+CPUMF_EVENT_ATTR(cf_z16, L1D_RO_EXCL_WRITES, 0x0080);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_WRITES, 0x0081);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_MISSES, 0x0082);
+CPUMF_EVENT_ATTR(cf_z16, CRSTE_1MB_WRITES, 0x0083);
+CPUMF_EVENT_ATTR(cf_z16, DTLB2_GPAGE_WRITES, 0x0084);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_WRITES, 0x0086);
+CPUMF_EVENT_ATTR(cf_z16, ITLB2_MISSES, 0x0087);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_PTE_WRITES, 0x0089);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_CRSTE_WRITES, 0x008a);
+CPUMF_EVENT_ATTR(cf_z16, TLB2_ENGINES_BUSY, 0x008b);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TEND, 0x008c);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TEND, 0x008d);
+CPUMF_EVENT_ATTR(cf_z16, L1C_TLB2_MISSES, 0x008f);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ, 0x0091);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_IV, 0x0092);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_CHIP_HIT, 0x0093);
+CPUMF_EVENT_ATTR(cf_z16, DCW_REQ_DRAWER_HIT, 0x0094);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP, 0x0095);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_IV, 0x0096);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_CHIP_HIT, 0x0097);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT, 0x0098);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE, 0x0099);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER, 0x009a);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER, 0x009b);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_CHIP_MEMORY, 0x009c);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_MODULE_MEMORY, 0x009d);
+CPUMF_EVENT_ATTR(cf_z16, DCW_ON_DRAWER_MEMORY, 0x009e);
+CPUMF_EVENT_ATTR(cf_z16, DCW_OFF_DRAWER_MEMORY, 0x009f);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_IV, 0x00a0);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT, 0x00a1);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT, 0x00a2);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_IV, 0x00a3);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT, 0x00a4);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT, 0x00a5);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_IV, 0x00a6);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT, 0x00a7);
+CPUMF_EVENT_ATTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT, 0x00a8);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ, 0x00a9);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_IV, 0x00aa);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_CHIP_HIT, 0x00ab);
+CPUMF_EVENT_ATTR(cf_z16, ICW_REQ_DRAWER_HIT, 0x00ac);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP, 0x00ad);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_IV, 0x00ae);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_CHIP_HIT, 0x00af);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT, 0x00b0);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE, 0x00b1);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER, 0x00b2);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER, 0x00b3);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_CHIP_MEMORY, 0x00b4);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_MODULE_MEMORY, 0x00b5);
+CPUMF_EVENT_ATTR(cf_z16, ICW_ON_DRAWER_MEMORY, 0x00b6);
+CPUMF_EVENT_ATTR(cf_z16, ICW_OFF_DRAWER_MEMORY, 0x00b7);
+CPUMF_EVENT_ATTR(cf_z16, BCD_DFP_EXECUTION_SLOTS, 0x00e0);
+CPUMF_EVENT_ATTR(cf_z16, VX_BCD_EXECUTION_SLOTS, 0x00e1);
+CPUMF_EVENT_ATTR(cf_z16, DECIMAL_INSTRUCTIONS, 0x00e2);
+CPUMF_EVENT_ATTR(cf_z16, LAST_HOST_TRANSLATIONS, 0x00e8);
+CPUMF_EVENT_ATTR(cf_z16, TX_NC_TABORT, 0x00f4);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_NO_SPECIAL, 0x00f5);
+CPUMF_EVENT_ATTR(cf_z16, TX_C_TABORT_SPECIAL, 0x00f6);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_ACCESS, 0x00f8);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CYCLES, 0x00fd);
+CPUMF_EVENT_ATTR(cf_z16, SORTL, 0x0100);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CC, 0x0109);
+CPUMF_EVENT_ATTR(cf_z16, DFLT_CCFINISH, 0x010a);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_INVOCATIONS, 0x010b);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_COMPLETIONS, 0x010c);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_WAIT_LOCK, 0x010d);
+CPUMF_EVENT_ATTR(cf_z16, NNPA_HOLD_LOCK, 0x010e);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE, 0x01c0);
+CPUMF_EVENT_ATTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE, 0x01c1);
+
+static struct attribute *cpumcf_fvn1_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_fvn1, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn1, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, PROBLEM_STATE_L1D_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn1, L1D_PENALTY_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_fvn3, CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn3, L1I_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn3, L1I_PENALTY_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES),
+	CPUMF_EVENT_PTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_fvn3, L1D_DIR_WRITES),
+	CPUMF_EVENT_PTR(cf_fvn3, L1D_PENALTY_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	NULL,
+};
+
+static struct attribute *cpumcf_svn_67_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+	CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+	CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z10_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z10, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_LOCAL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_L3_REMOTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, L1I_CACHELINE_INVALIDATES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z10, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z10, L2C_STORES_SENT),
+	NULL,
+};
+
+static struct attribute *cpumcf_z196_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z196, L1D_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_L2_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z196, L2C_STORES_SENT),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z196, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	NULL,
+};
+
+static struct attribute *cpumcf_zec12_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_LMEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1D_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, L1I_OFFBOOK_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_zec12, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_zec12, TX_C_TABORT_SPECIAL),
+	NULL,
+};
+
+static struct attribute *cpumcf_z13_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z13, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, DTLB1_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, ITLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z13, L1C_TLB1_MISSES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1D_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONNODE_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_OFFDRAWER_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, L1I_ONCHIP_MEM_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z13, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z13, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z14_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z14, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z14, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z14, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z14, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z14, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z14, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z14, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z14, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z14, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z14, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z14, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z15_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z15, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_HPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_L2D_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_L2I_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z15, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1D_ONCHIP_L3_SOURCED_WRITES_RO),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCHIP_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_MEMORY_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L3_SOURCED_WRITES_IV),
+	CPUMF_EVENT_PTR(cf_z15, L1I_ONDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, L1I_OFFDRAWER_L4_SOURCED_WRITES),
+	CPUMF_EVENT_PTR(cf_z15, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z15, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z15, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z15, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z15, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z15, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+static struct attribute *cpumcf_z16_pmu_event_attr[] __initdata = {
+	CPUMF_EVENT_PTR(cf_z16, L1D_RO_EXCL_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, CRSTE_1MB_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, DTLB2_GPAGE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, ITLB2_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, ITLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_PTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_CRSTE_WRITES),
+	CPUMF_EVENT_PTR(cf_z16, TLB2_ENGINES_BUSY),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TEND),
+	CPUMF_EVENT_PTR(cf_z16, TX_NC_TEND),
+	CPUMF_EVENT_PTR(cf_z16, L1C_TLB2_MISSES),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_CHIP_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_MODULE_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_ON_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, DCW_OFF_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_MODULE_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_ON_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_IV),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, IDCW_OFF_DRAWER_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_IV),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_REQ_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_IV),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_CHIP_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_DRAWER_HIT),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_CHIP_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_MODULE_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_ON_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, ICW_OFF_DRAWER_MEMORY),
+	CPUMF_EVENT_PTR(cf_z16, BCD_DFP_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z16, VX_BCD_EXECUTION_SLOTS),
+	CPUMF_EVENT_PTR(cf_z16, DECIMAL_INSTRUCTIONS),
+	CPUMF_EVENT_PTR(cf_z16, LAST_HOST_TRANSLATIONS),
+	CPUMF_EVENT_PTR(cf_z16, TX_NC_TABORT),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_NO_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z16, TX_C_TABORT_SPECIAL),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_ACCESS),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CYCLES),
+	CPUMF_EVENT_PTR(cf_z16, SORTL),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CC),
+	CPUMF_EVENT_PTR(cf_z16, DFLT_CCFINISH),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_INVOCATIONS),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_COMPLETIONS),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_WAIT_LOCK),
+	CPUMF_EVENT_PTR(cf_z16, NNPA_HOLD_LOCK),
+	CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_ONE_THR_ACTIVE),
+	CPUMF_EVENT_PTR(cf_z16, MT_DIAG_CYCLES_TWO_THR_ACTIVE),
+	NULL,
+};
+
+/* END: CPUM_CF COUNTER DEFINITIONS ===================================== */
+
+static struct attribute_group cpumcf_pmu_events_group = {
+	.name = "events",
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumcf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumcf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumcf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumcf_pmu_attr_groups[] = {
+	&cpumcf_pmu_events_group,
+	&cpumcf_pmu_format_group,
+	NULL,
+};
+
+
+static __init struct attribute **merge_attr(struct attribute **a,
+					    struct attribute **b,
+					    struct attribute **c)
+{
+	struct attribute **new;
+	int j, i;
+
+	for (j = 0; a[j]; j++)
+		;
+	for (i = 0; b[i]; i++)
+		j++;
+	for (i = 0; c[i]; i++)
+		j++;
+	j++;
+
+	new = kmalloc_array(j, sizeof(struct attribute *), GFP_KERNEL);
+	if (!new)
+		return NULL;
+	j = 0;
+	for (i = 0; a[i]; i++)
+		new[j++] = a[i];
+	for (i = 0; b[i]; i++)
+		new[j++] = b[i];
+	for (i = 0; c[i]; i++)
+		new[j++] = c[i];
+	new[j] = NULL;
+
+	return new;
+}
+
+__init const struct attribute_group **cpumf_cf_event_group(void)
+{
+	struct attribute **combined, **model, **cfvn, **csvn;
+	struct attribute *none[] = { NULL };
+	struct cpumf_ctr_info ci;
+	struct cpuid cpu_id;
+
+	/* Determine generic counters set(s) */
+	qctri(&ci);
+	switch (ci.cfvn) {
+	case 1:
+		cfvn = cpumcf_fvn1_pmu_event_attr;
+		break;
+	case 3:
+		cfvn = cpumcf_fvn3_pmu_event_attr;
+		break;
+	default:
+		cfvn = none;
+	}
+
+	/* Determine version specific crypto set */
+	switch (ci.csvn) {
+	case 1 ... 5:
+		csvn = cpumcf_svn_12345_pmu_event_attr;
+		break;
+	case 6 ... 7:
+		csvn = cpumcf_svn_67_pmu_event_attr;
+		break;
+	default:
+		csvn = none;
+	}
+
+	/* Determine model-specific counter set(s) */
+	get_cpu_id(&cpu_id);
+	switch (cpu_id.machine) {
+	case 0x2097:
+	case 0x2098:
+		model = cpumcf_z10_pmu_event_attr;
+		break;
+	case 0x2817:
+	case 0x2818:
+		model = cpumcf_z196_pmu_event_attr;
+		break;
+	case 0x2827:
+	case 0x2828:
+		model = cpumcf_zec12_pmu_event_attr;
+		break;
+	case 0x2964:
+	case 0x2965:
+		model = cpumcf_z13_pmu_event_attr;
+		break;
+	case 0x3906:
+	case 0x3907:
+		model = cpumcf_z14_pmu_event_attr;
+		break;
+	case 0x8561:
+	case 0x8562:
+		model = cpumcf_z15_pmu_event_attr;
+		break;
+	case 0x3931:
+	case 0x3932:
+		model = cpumcf_z16_pmu_event_attr;
+		break;
+	default:
+		model = none;
+		break;
+	}
+
+	combined = merge_attr(cfvn, csvn, model);
+	if (combined)
+		cpumcf_pmu_events_group.attrs = combined;
+	return cpumcf_pmu_attr_groups;
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
new file mode 100644
index 0000000000..06efad5b4f
--- /dev/null
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -0,0 +1,2280 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for the System z CPU-measurement Sampling Facility
+ *
+ * Copyright IBM Corp. 2013, 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"cpum_sf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/pid.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/moduleparam.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+#include <asm/debug.h>
+#include <asm/timex.h>
+#include <linux/io.h>
+
+/* Minimum number of sample-data-block-tables:
+ * At least one table is required for the sampling buffer structure.
+ * A single table contains up to 511 pointers to sample-data-blocks.
+ */
+#define CPUM_SF_MIN_SDBT	1
+
+/* Number of sample-data-blocks per sample-data-block-table (SDBT):
+ * A table contains SDB pointers (8 bytes) and one table-link entry
+ * that points to the origin of the next SDBT.
+ */
+#define CPUM_SF_SDB_PER_TABLE	((PAGE_SIZE - 8) / 8)
+
+/* Maximum page offset for an SDBT table-link entry:
+ * If this page offset is reached, a table-link entry to the next SDBT
+ * must be added.
+ */
+#define CPUM_SF_SDBT_TL_OFFSET	(CPUM_SF_SDB_PER_TABLE * 8)
+static inline int require_table_link(const void *sdbt)
+{
+	return ((unsigned long)sdbt & ~PAGE_MASK) == CPUM_SF_SDBT_TL_OFFSET;
+}
+
+/* Minimum and maximum sampling buffer sizes:
+ *
+ * This number represents the maximum size of the sampling buffer taking
+ * the number of sample-data-block-tables into account.  Note that these
+ * numbers apply to the basic-sampling function only.
+ * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if
+ * the diagnostic-sampling function is active.
+ *
+ * Sampling buffer size		Buffer characteristics
+ * ---------------------------------------------------
+ *	 64KB		    ==	  16 pages (4KB per page)
+ *				   1 page  for SDB-tables
+ *				  15 pages for SDBs
+ *
+ *  32MB		    ==	8192 pages (4KB per page)
+ *				  16 pages for SDB-tables
+ *				8176 pages for SDBs
+ */
+static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15;
+static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176;
+static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1;
+
+struct sf_buffer {
+	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
+	/* buffer characteristics (required for buffer increments) */
+	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
+	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
+	unsigned long	 *tail;	    /* last sample-data-block-table */
+};
+
+struct aux_buffer {
+	struct sf_buffer sfb;
+	unsigned long head;	   /* index of SDB of buffer head */
+	unsigned long alert_mark;  /* index of SDB of alert request position */
+	unsigned long empty_mark;  /* mark of SDB not marked full */
+	unsigned long *sdb_index;  /* SDB address for fast lookup */
+	unsigned long *sdbt_index; /* SDBT address for fast lookup */
+};
+
+struct cpu_hw_sf {
+	/* CPU-measurement sampling information block */
+	struct hws_qsi_info_block qsi;
+	/* CPU-measurement sampling control block */
+	struct hws_lsctl_request_block lsctl;
+	struct sf_buffer sfb;	    /* Sampling buffer */
+	unsigned int flags;	    /* Status flags */
+	struct perf_event *event;   /* Scheduled perf event */
+	struct perf_output_handle handle; /* AUX buffer output handle */
+};
+static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf);
+
+/* Debug feature */
+static debug_info_t *sfdbg;
+
+/* Sampling control helper functions */
+static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi,
+						unsigned long freq)
+{
+	return (USEC_PER_SEC / freq) * qsi->cpu_speed;
+}
+
+static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi,
+						unsigned long rate)
+{
+	return USEC_PER_SEC * qsi->cpu_speed / rate;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+	/* TOD in STCKE format */
+	if (te->header.t)
+		return *((unsigned long long *)&te->timestamp[1]);
+
+	/* TOD in STCK format */
+	return *((unsigned long long *)&te->timestamp[0]);
+}
+
+/* Return pointer to trailer entry of an sample data block */
+static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
+{
+	void *ret;
+
+	ret = (void *)v;
+	ret += PAGE_SIZE;
+	ret -= sizeof(struct hws_trailer_entry);
+
+	return ret;
+}
+
+/*
+ * Return true if the entry in the sample data block table (sdbt)
+ * is a link to the next sdbt
+ */
+static inline int is_link_entry(unsigned long *s)
+{
+	return *s & 0x1UL ? 1 : 0;
+}
+
+/* Return pointer to the linked sdbt */
+static inline unsigned long *get_next_sdbt(unsigned long *s)
+{
+	return phys_to_virt(*s & ~0x1UL);
+}
+
+/*
+ * sf_disable() - Switch off sampling facility
+ */
+static int sf_disable(void)
+{
+	struct hws_lsctl_request_block sreq;
+
+	memset(&sreq, 0, sizeof(sreq));
+	return lsctl(&sreq);
+}
+
+/*
+ * sf_buffer_available() - Check for an allocated sampling buffer
+ */
+static int sf_buffer_available(struct cpu_hw_sf *cpuhw)
+{
+	return !!cpuhw->sfb.sdbt;
+}
+
+/*
+ * deallocate sampling facility buffer
+ */
+static void free_sampling_buffer(struct sf_buffer *sfb)
+{
+	unsigned long *sdbt, *curr;
+
+	if (!sfb->sdbt)
+		return;
+
+	sdbt = sfb->sdbt;
+	curr = sdbt;
+
+	/* Free the SDBT after all SDBs are processed... */
+	while (1) {
+		if (!*curr || !sdbt)
+			break;
+
+		/* Process table-link entries */
+		if (is_link_entry(curr)) {
+			curr = get_next_sdbt(curr);
+			if (sdbt)
+				free_page((unsigned long)sdbt);
+
+			/* If the origin is reached, sampling buffer is freed */
+			if (curr == sfb->sdbt)
+				break;
+			else
+				sdbt = curr;
+		} else {
+			/* Process SDB pointer */
+			if (*curr) {
+				free_page((unsigned long)phys_to_virt(*curr));
+				curr++;
+			}
+		}
+	}
+
+	debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+			    (unsigned long)sfb->sdbt);
+	memset(sfb, 0, sizeof(*sfb));
+}
+
+static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags)
+{
+	struct hws_trailer_entry *te;
+	unsigned long sdb;
+
+	/* Allocate and initialize sample-data-block */
+	sdb = get_zeroed_page(gfp_flags);
+	if (!sdb)
+		return -ENOMEM;
+	te = trailer_entry_ptr(sdb);
+	te->header.a = 1;
+
+	/* Link SDB into the sample-data-block-table */
+	*sdbt = virt_to_phys((void *)sdb);
+
+	return 0;
+}
+
+/*
+ * realloc_sampling_buffer() - extend sampler memory
+ *
+ * Allocates new sample-data-blocks and adds them to the specified sampling
+ * buffer memory.
+ *
+ * Important: This modifies the sampling buffer and must be called when the
+ *	      sampling facility is disabled.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int realloc_sampling_buffer(struct sf_buffer *sfb,
+				   unsigned long num_sdb, gfp_t gfp_flags)
+{
+	int i, rc;
+	unsigned long *new, *tail, *tail_prev = NULL;
+
+	if (!sfb->sdbt || !sfb->tail)
+		return -EINVAL;
+
+	if (!is_link_entry(sfb->tail))
+		return -EINVAL;
+
+	/* Append to the existing sampling buffer, overwriting the table-link
+	 * register.
+	 * The tail variables always points to the "tail" (last and table-link)
+	 * entry in an SDB-table.
+	 */
+	tail = sfb->tail;
+
+	/* Do a sanity check whether the table-link entry points to
+	 * the sampling buffer origin.
+	 */
+	if (sfb->sdbt != get_next_sdbt(tail)) {
+		debug_sprintf_event(sfdbg, 3, "%s: "
+				    "sampling buffer is not linked: origin %#lx"
+				    " tail %#lx\n", __func__,
+				    (unsigned long)sfb->sdbt,
+				    (unsigned long)tail);
+		return -EINVAL;
+	}
+
+	/* Allocate remaining SDBs */
+	rc = 0;
+	for (i = 0; i < num_sdb; i++) {
+		/* Allocate a new SDB-table if it is full. */
+		if (require_table_link(tail)) {
+			new = (unsigned long *)get_zeroed_page(gfp_flags);
+			if (!new) {
+				rc = -ENOMEM;
+				break;
+			}
+			sfb->num_sdbt++;
+			/* Link current page to tail of chain */
+			*tail = virt_to_phys((void *)new) + 1;
+			tail_prev = tail;
+			tail = new;
+		}
+
+		/* Allocate a new sample-data-block.
+		 * If there is not enough memory, stop the realloc process
+		 * and simply use what was allocated.  If this is a temporary
+		 * issue, a new realloc call (if required) might succeed.
+		 */
+		rc = alloc_sample_data_block(tail, gfp_flags);
+		if (rc) {
+			/* Undo last SDBT. An SDBT with no SDB at its first
+			 * entry but with an SDBT entry instead can not be
+			 * handled by the interrupt handler code.
+			 * Avoid this situation.
+			 */
+			if (tail_prev) {
+				sfb->num_sdbt--;
+				free_page((unsigned long)new);
+				tail = tail_prev;
+			}
+			break;
+		}
+		sfb->num_sdb++;
+		tail++;
+		tail_prev = new = NULL;	/* Allocated at least one SBD */
+	}
+
+	/* Link sampling buffer to its origin */
+	*tail = virt_to_phys(sfb->sdbt) + 1;
+	sfb->tail = tail;
+
+	debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+			    " settings: sdbt %lu sdb %lu\n", __func__,
+			    sfb->num_sdbt, sfb->num_sdb);
+	return rc;
+}
+
+/*
+ * allocate_sampling_buffer() - allocate sampler memory
+ *
+ * Allocates and initializes a sampling buffer structure using the
+ * specified number of sample-data-blocks (SDB).  For each allocation,
+ * a 4K page is used.  The number of sample-data-block-tables (SDBT)
+ * are calculated from SDBs.
+ * Also set the ALERT_REQ mask in each SDBs trailer.
+ *
+ * Returns zero on success, non-zero otherwise.
+ */
+static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
+{
+	int rc;
+
+	if (sfb->sdbt)
+		return -EINVAL;
+
+	/* Allocate the sample-data-block-table origin */
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		return -ENOMEM;
+	sfb->num_sdb = 0;
+	sfb->num_sdbt = 1;
+
+	/* Link the table origin to point to itself to prepare for
+	 * realloc_sampling_buffer() invocation.
+	 */
+	sfb->tail = sfb->sdbt;
+	*sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1;
+
+	/* Allocate requested number of sample-data-blocks */
+	rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
+	if (rc) {
+		free_sampling_buffer(sfb);
+		debug_sprintf_event(sfdbg, 4, "%s: "
+			"realloc_sampling_buffer failed with rc %i\n",
+			__func__, rc);
+	} else
+		debug_sprintf_event(sfdbg, 4,
+			"%s: tear %#lx dear %#lx\n", __func__,
+			(unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
+	return rc;
+}
+
+static void sfb_set_limits(unsigned long min, unsigned long max)
+{
+	struct hws_qsi_info_block si;
+
+	CPUM_SF_MIN_SDB = min;
+	CPUM_SF_MAX_SDB = max;
+
+	memset(&si, 0, sizeof(si));
+	if (!qsi(&si))
+		CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes);
+}
+
+static unsigned long sfb_max_limit(struct hw_perf_event *hwc)
+{
+	return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR
+				    : CPUM_SF_MAX_SDB;
+}
+
+static unsigned long sfb_pending_allocs(struct sf_buffer *sfb,
+					struct hw_perf_event *hwc)
+{
+	if (!sfb->sdbt)
+		return SFB_ALLOC_REG(hwc);
+	if (SFB_ALLOC_REG(hwc) > sfb->num_sdb)
+		return SFB_ALLOC_REG(hwc) - sfb->num_sdb;
+	return 0;
+}
+
+static int sfb_has_pending_allocs(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	return sfb_pending_allocs(sfb, hwc) > 0;
+}
+
+static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	/* Limit the number of SDBs to not exceed the maximum */
+	num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc));
+	if (num)
+		SFB_ALLOC_REG(hwc) += num;
+}
+
+static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc)
+{
+	SFB_ALLOC_REG(hwc) = 0;
+	sfb_account_allocs(num, hwc);
+}
+
+static void deallocate_buffers(struct cpu_hw_sf *cpuhw)
+{
+	if (cpuhw->sfb.sdbt)
+		free_sampling_buffer(&cpuhw->sfb);
+}
+
+static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
+{
+	unsigned long n_sdb, freq;
+	size_t sample_size;
+
+	/* Calculate sampling buffers using 4K pages
+	 *
+	 *    1. The sampling size is 32 bytes for basic sampling. This size
+	 *	 is the same for all machine types. Diagnostic
+	 *	 sampling uses auxlilary data buffer setup which provides the
+	 *	 memory for SDBs using linux common code auxiliary trace
+	 *	 setup.
+	 *
+	 *    2. Function alloc_sampling_buffer() sets the Alert Request
+	 *	 Control indicator to trigger a measurement-alert to harvest
+	 *	 sample-data-blocks (SDB). This is done per SDB. This
+	 *	 measurement alert interrupt fires quick enough to handle
+	 *	 one SDB, on very high frequency and work loads there might
+	 *	 be 2 to 3 SBDs available for sample processing.
+	 *	 Currently there is no need for setup alert request on every
+	 *	 n-th page. This is counterproductive as one IRQ triggers
+	 *	 a very high number of samples to be processed at one IRQ.
+	 *
+	 *    3. Use the sampling frequency as input.
+	 *	 Compute the number of SDBs and ensure a minimum
+	 *	 of CPUM_SF_MIN_SDB.  Depending on frequency add some more
+	 *	 SDBs to handle a higher sampling rate.
+	 *	 Use a minimum of CPUM_SF_MIN_SDB and allow for 100 samples
+	 *	 (one SDB) for every 10000 HZ frequency increment.
+	 *
+	 *    4. Compute the number of sample-data-block-tables (SDBT) and
+	 *	 ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up
+	 *	 to 511 SDBs).
+	 */
+	sample_size = sizeof(struct hws_basic_entry);
+	freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc));
+	n_sdb = CPUM_SF_MIN_SDB + DIV_ROUND_UP(freq, 10000);
+
+	/* If there is already a sampling buffer allocated, it is very likely
+	 * that the sampling facility is enabled too.  If the event to be
+	 * initialized requires a greater sampling buffer, the allocation must
+	 * be postponed.  Changing the sampling buffer requires the sampling
+	 * facility to be in the disabled state.  So, account the number of
+	 * required SDBs and let cpumsf_pmu_enable() resize the buffer just
+	 * before the event is started.
+	 */
+	sfb_init_allocs(n_sdb, hwc);
+	if (sf_buffer_available(cpuhw))
+		return 0;
+
+	debug_sprintf_event(sfdbg, 3,
+			    "%s: rate %lu f %lu sdb %lu/%lu"
+			    " sample_size %lu cpuhw %p\n", __func__,
+			    SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
+			    sample_size, cpuhw);
+
+	return alloc_sampling_buffer(&cpuhw->sfb,
+				     sfb_pending_allocs(&cpuhw->sfb, hwc));
+}
+
+static unsigned long min_percent(unsigned int percent, unsigned long base,
+				 unsigned long min)
+{
+	return min_t(unsigned long, min, DIV_ROUND_UP(percent * base, 100));
+}
+
+static unsigned long compute_sfb_extent(unsigned long ratio, unsigned long base)
+{
+	/* Use a percentage-based approach to extend the sampling facility
+	 * buffer.  Accept up to 5% sample data loss.
+	 * Vary the extents between 1% to 5% of the current number of
+	 * sample-data-blocks.
+	 */
+	if (ratio <= 5)
+		return 0;
+	if (ratio <= 25)
+		return min_percent(1, base, 1);
+	if (ratio <= 50)
+		return min_percent(1, base, 1);
+	if (ratio <= 75)
+		return min_percent(2, base, 2);
+	if (ratio <= 100)
+		return min_percent(3, base, 3);
+	if (ratio <= 250)
+		return min_percent(4, base, 4);
+
+	return min_percent(5, base, 8);
+}
+
+static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
+				  struct hw_perf_event *hwc)
+{
+	unsigned long ratio, num;
+
+	if (!OVERFLOW_REG(hwc))
+		return;
+
+	/* The sample_overflow contains the average number of sample data
+	 * that has been lost because sample-data-blocks were full.
+	 *
+	 * Calculate the total number of sample data entries that has been
+	 * discarded.  Then calculate the ratio of lost samples to total samples
+	 * per second in percent.
+	 */
+	ratio = DIV_ROUND_UP(100 * OVERFLOW_REG(hwc) * cpuhw->sfb.num_sdb,
+			     sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)));
+
+	/* Compute number of sample-data-blocks */
+	num = compute_sfb_extent(ratio, cpuhw->sfb.num_sdb);
+	if (num)
+		sfb_account_allocs(num, hwc);
+
+	debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+			    __func__, OVERFLOW_REG(hwc), ratio, num);
+	OVERFLOW_REG(hwc) = 0;
+}
+
+/* extend_sampling_buffer() - Extend sampling buffer
+ * @sfb:	Sampling buffer structure (for local CPU)
+ * @hwc:	Perf event hardware structure
+ *
+ * Use this function to extend the sampling buffer based on the overflow counter
+ * and postponed allocation extents stored in the specified Perf event hardware.
+ *
+ * Important: This function disables the sampling facility in order to safely
+ *	      change the sampling buffer structure.  Do not call this function
+ *	      when the PMU is active.
+ */
+static void extend_sampling_buffer(struct sf_buffer *sfb,
+				   struct hw_perf_event *hwc)
+{
+	unsigned long num, num_old;
+	int rc;
+
+	num = sfb_pending_allocs(sfb, hwc);
+	if (!num)
+		return;
+	num_old = sfb->num_sdb;
+
+	/* Disable the sampling facility to reset any states and also
+	 * clear pending measurement alerts.
+	 */
+	sf_disable();
+
+	/* Extend the sampling buffer.
+	 * This memory allocation typically happens in an atomic context when
+	 * called by perf.  Because this is a reallocation, it is fine if the
+	 * new SDB-request cannot be satisfied immediately.
+	 */
+	rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
+	if (rc)
+		debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+				    __func__, rc);
+
+	if (sfb_has_pending_allocs(sfb, hwc))
+		debug_sprintf_event(sfdbg, 5, "%s: "
+				    "req %lu alloc %lu remaining %lu\n",
+				    __func__, num, sfb->num_sdb - num_old,
+				    sfb_pending_allocs(sfb, hwc));
+}
+
+/* Number of perf events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+#define PMC_INIT      0
+#define PMC_RELEASE   1
+#define PMC_FAILURE   2
+static void setup_pmc_cpu(void *flags)
+{
+	struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
+	int err = 0;
+
+	switch (*((int *)flags)) {
+	case PMC_INIT:
+		memset(cpusf, 0, sizeof(*cpusf));
+		err = qsi(&cpusf->qsi);
+		if (err)
+			break;
+		cpusf->flags |= PMU_F_RESERVED;
+		err = sf_disable();
+		break;
+	case PMC_RELEASE:
+		cpusf->flags &= ~PMU_F_RESERVED;
+		err = sf_disable();
+		if (!err)
+			deallocate_buffers(cpusf);
+		break;
+	}
+	if (err) {
+		*((int *)flags) |= PMC_FAILURE;
+		pr_err("Switching off the sampling facility failed with rc %i\n", err);
+	}
+}
+
+static void release_pmc_hardware(void)
+{
+	int flags = PMC_RELEASE;
+
+	irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+}
+
+static int reserve_pmc_hardware(void)
+{
+	int flags = PMC_INIT;
+
+	on_each_cpu(setup_pmc_cpu, &flags, 1);
+	if (flags & PMC_FAILURE) {
+		release_pmc_hardware();
+		return -ENODEV;
+	}
+	irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+	return 0;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	/* Release PMC if this is the last perf event */
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static void hw_init_period(struct hw_perf_event *hwc, u64 period)
+{
+	hwc->sample_period = period;
+	hwc->last_period = hwc->sample_period;
+	local64_set(&hwc->period_left, hwc->sample_period);
+}
+
+static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
+				   unsigned long rate)
+{
+	return clamp_t(unsigned long, rate,
+		       si->min_sampl_rate, si->max_sampl_rate);
+}
+
+static u32 cpumsf_pid_type(struct perf_event *event,
+			   u32 pid, enum pid_type type)
+{
+	struct task_struct *tsk;
+
+	/* Idle process */
+	if (!pid)
+		goto out;
+
+	tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+	pid = -1;
+	if (tsk) {
+		/*
+		 * Only top level events contain the pid namespace in which
+		 * they are created.
+		 */
+		if (event->parent)
+			event = event->parent;
+		pid = __task_pid_nr_ns(tsk, type, event->ns);
+		/*
+		 * See also 1d953111b648
+		 * "perf/core: Don't report zero PIDs for exiting tasks".
+		 */
+		if (!pid && !pid_alive(tsk))
+			pid = -1;
+	}
+out:
+	return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	u32 pid;
+	struct perf_event_header header;
+	struct perf_output_handle handle;
+
+	/*
+	 * Obtain the PID from the basic-sampling data entry and
+	 * correct the data->tid_entry.pid value.
+	 */
+	pid = data->tid_entry.pid;
+
+	/* Protect callchain buffers, tasks */
+	rcu_read_lock();
+
+	perf_prepare_sample(data, event, regs);
+	perf_prepare_header(&header, data, event, regs);
+	if (perf_output_begin(&handle, data, event, header.size))
+		goto out;
+
+	/* Update the process ID (see also kernel/events/core.c) */
+	data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID);
+	data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+	perf_output_sample(&handle, &header, data, event);
+	perf_output_end(&handle);
+out:
+	rcu_read_unlock();
+}
+
+static unsigned long getrate(bool freq, unsigned long sample,
+			     struct hws_qsi_info_block *si)
+{
+	unsigned long rate;
+
+	if (freq) {
+		rate = freq_to_sample_rate(si, sample);
+		rate = hw_limit_rate(si, rate);
+	} else {
+		/* The min/max sampling rates specifies the valid range
+		 * of sample periods.  If the specified sample period is
+		 * out of range, limit the period to the range boundary.
+		 */
+		rate = hw_limit_rate(si, sample);
+
+		/* The perf core maintains a maximum sample rate that is
+		 * configurable through the sysctl interface.  Ensure the
+		 * sampling rate does not exceed this value.  This also helps
+		 * to avoid throttling when pushing samples with
+		 * perf_event_overflow().
+		 */
+		if (sample_rate_to_freq(si, rate) >
+		    sysctl_perf_event_sample_rate) {
+			debug_sprintf_event(sfdbg, 1, "%s: "
+					    "Sampling rate exceeds maximum "
+					    "perf sample rate\n", __func__);
+			rate = 0;
+		}
+	}
+	return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed.  So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ *	interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+				     struct hws_qsi_info_block *si)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned long rate;
+
+	if (attr->freq) {
+		if (!attr->sample_freq)
+			return -EINVAL;
+		rate = getrate(attr->freq, attr->sample_freq, si);
+		attr->freq = 0;		/* Don't call  perf_adjust_period() */
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+	} else {
+		rate = getrate(attr->freq, attr->sample_period, si);
+		if (!rate)
+			return -EINVAL;
+	}
+	attr->sample_period = rate;
+	SAMPL_RATE(hwc) = rate;
+	hw_init_period(hwc, SAMPL_RATE(hwc));
+	debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+			    __func__, event->cpu, event->attr.sample_period,
+			    event->attr.freq, SAMPLE_FREQ_MODE(hwc));
+	return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct cpu_hw_sf *cpuhw;
+	struct hws_qsi_info_block si;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu, err;
+
+	/* Reserve CPU-measurement sampling facility */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 && reserve_pmc_hardware())
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		goto out;
+
+	/* Access per-CPU sampling information (query sampling info) */
+	/*
+	 * The event->cpu value can be -1 to count on every CPU, for example,
+	 * when attaching to a task.  If this is specified, use the query
+	 * sampling info from the current CPU, otherwise use event->cpu to
+	 * retrieve the per-CPU information.
+	 * Later, cpuhw indicates whether to allocate sampling buffers for a
+	 * particular CPU (cpuhw!=NULL) or each online CPU (cpuw==NULL).
+	 */
+	memset(&si, 0, sizeof(si));
+	cpuhw = NULL;
+	if (event->cpu == -1)
+		qsi(&si);
+	else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+		si = cpuhw->qsi;
+	}
+
+	/* Check sampling facility authorization and, if not authorized,
+	 * fall back to other PMUs.  It is safe to check any CPU because
+	 * the authorization is identical for all configured CPUs.
+	 */
+	if (!si.as) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
+		pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
+		err = -EBUSY;
+		goto out;
+	}
+
+	/* Always enable basic sampling */
+	SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
+
+	/* Check if diagnostic sampling is requested.  Deny if the required
+	 * sampling authorization is missing.
+	 */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG) {
+		if (!si.ad) {
+			err = -EPERM;
+			goto out;
+		}
+		SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE;
+	}
+
+	err =  __hw_perf_event_init_rate(event, &si);
+	if (err)
+		goto out;
+
+	/* Initialize sample data overflow accounting */
+	hwc->extra_reg.reg = REG_OVERFLOW;
+	OVERFLOW_REG(hwc) = 0;
+
+	/* Use AUX buffer. No need to allocate it by ourself */
+	if (attr->config == PERF_EVENT_CPUM_SF_DIAG)
+		return 0;
+
+	/* Allocate the per-CPU sampling buffer using the CPU information
+	 * from the event.  If the event is not pinned to a particular
+	 * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling
+	 * buffers for each online CPU.
+	 */
+	if (cpuhw)
+		/* Event is pinned to a particular CPU */
+		err = allocate_buffers(cpuhw, hwc);
+	else {
+		/* Event is not pinned, allocate sampling buffer on
+		 * each online CPU
+		 */
+		for_each_online_cpu(cpu) {
+			cpuhw = &per_cpu(cpu_hw_sf, cpu);
+			err = allocate_buffers(cpuhw, hwc);
+			if (err)
+				break;
+		}
+	}
+
+	/* If PID/TID sampling is active, replace the default overflow
+	 * handler to extract and resolve the PIDs from the basic-sampling
+	 * data entries.
+	 */
+	if (event->attr.sample_type & PERF_SAMPLE_TID)
+		if (is_default_overflow_handler(event))
+			event->overflow_handler = cpumsf_output_event_pid;
+out:
+	return err;
+}
+
+static bool is_callchain_event(struct perf_event *event)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	return sample_type & (PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER |
+			      PERF_SAMPLE_STACK_USER);
+}
+
+static int cpumsf_pmu_event_init(struct perf_event *event)
+{
+	int err;
+
+	/* No support for taken branch sampling */
+	/* No support for callchain, stacks and registers */
+	if (has_branch_stack(event) || is_callchain_event(event))
+		return -EOPNOTSUPP;
+
+	switch (event->attr.type) {
+	case PERF_TYPE_RAW:
+		if ((event->attr.config != PERF_EVENT_CPUM_SF) &&
+		    (event->attr.config != PERF_EVENT_CPUM_SF_DIAG))
+			return -ENOENT;
+		break;
+	case PERF_TYPE_HARDWARE:
+		/* Support sampling of CPU cycles in addition to the
+		 * counter facility.  However, the counter facility
+		 * is more precise and, hence, restrict this PMU to
+		 * sampling events only.
+		 */
+		if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES)
+			return -ENOENT;
+		if (!is_sampling_event(event))
+			return -ENOENT;
+		break;
+	default:
+		return -ENOENT;
+	}
+
+	/* Force reset of idle/hv excludes regardless of what the
+	 * user requested.
+	 */
+	if (event->attr.exclude_hv)
+		event->attr.exclude_hv = 0;
+	if (event->attr.exclude_idle)
+		event->attr.exclude_idle = 0;
+
+	err = __hw_perf_event_init(event);
+	if (unlikely(err))
+		if (event->destroy)
+			event->destroy(event);
+	return err;
+}
+
+static void cpumsf_pmu_enable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hw_perf_event *hwc;
+	int err;
+
+	if (cpuhw->flags & PMU_F_ENABLED)
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Check whether to extent the sampling buffer.
+	 *
+	 * Two conditions trigger an increase of the sampling buffer for a
+	 * perf event:
+	 *    1. Postponed buffer allocations from the event initialization.
+	 *    2. Sampling overflows that contribute to pending allocations.
+	 *
+	 * Note that the extend_sampling_buffer() function disables the sampling
+	 * facility, but it can be fully re-enabled using sampling controls that
+	 * have been saved in cpumsf_pmu_disable().
+	 */
+	if (cpuhw->event) {
+		hwc = &cpuhw->event->hw;
+		if (!(SAMPL_DIAG_MODE(hwc))) {
+			/*
+			 * Account number of overflow-designated
+			 * buffer extents
+			 */
+			sfb_account_overflows(cpuhw, hwc);
+			extend_sampling_buffer(&cpuhw->sfb, hwc);
+		}
+		/* Rate may be adjusted with ioctl() */
+		cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
+	}
+
+	/* (Re)enable the PMU and sampling facility */
+	cpuhw->flags |= PMU_F_ENABLED;
+	barrier();
+
+	err = lsctl(&cpuhw->lsctl);
+	if (err) {
+		cpuhw->flags &= ~PMU_F_ENABLED;
+		pr_err("Loading sampling controls failed: op 1 err %i\n", err);
+		return;
+	}
+
+	/* Load current program parameter */
+	lpp(&S390_lowcore.lpp);
+
+	debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+			    "interval %#lx tear %#lx dear %#lx\n", __func__,
+			    cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
+			    cpuhw->lsctl.cd, cpuhw->lsctl.interval,
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+}
+
+static void cpumsf_pmu_disable(struct pmu *pmu)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct hws_lsctl_request_block inactive;
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!(cpuhw->flags & PMU_F_ENABLED))
+		return;
+
+	if (cpuhw->flags & PMU_F_ERR_MASK)
+		return;
+
+	/* Switch off sampling activation control */
+	inactive = cpuhw->lsctl;
+	inactive.cs = 0;
+	inactive.cd = 0;
+
+	err = lsctl(&inactive);
+	if (err) {
+		pr_err("Loading sampling controls failed: op 2 err %i\n", err);
+		return;
+	}
+
+	/* Save state of TEAR and DEAR register contents */
+	err = qsi(&si);
+	if (!err) {
+		/* TEAR/DEAR values are valid only if the sampling facility is
+		 * enabled.  Note that cpumsf_pmu_disable() might be called even
+		 * for a disabled sampling facility because cpumsf_pmu_enable()
+		 * controls the enable/disable state.
+		 */
+		if (si.es) {
+			cpuhw->lsctl.tear = si.tear;
+			cpuhw->lsctl.dear = si.dear;
+		}
+	} else
+		debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+				    __func__, err);
+
+	cpuhw->flags &= ~PMU_F_ENABLED;
+}
+
+/* perf_exclude_event() - Filter event
+ * @event:	The perf event
+ * @regs:	pt_regs structure
+ * @sde_regs:	Sample-data-entry (sde) regs structure
+ *
+ * Filter perf events according to their exclude specification.
+ *
+ * Return non-zero if the event shall be excluded.
+ */
+static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs,
+			      struct perf_sf_sde_regs *sde_regs)
+{
+	if (event->attr.exclude_user && user_mode(regs))
+		return 1;
+	if (event->attr.exclude_kernel && !user_mode(regs))
+		return 1;
+	if (event->attr.exclude_guest && sde_regs->in_guest)
+		return 1;
+	if (event->attr.exclude_host && !sde_regs->in_guest)
+		return 1;
+	return 0;
+}
+
+/* perf_push_sample() - Push samples to perf
+ * @event:	The perf event
+ * @sample:	Hardware sample data
+ *
+ * Use the hardware sample data to create perf event sample.  The sample
+ * is the pushed to the event subsystem and the function checks for
+ * possible event overflows.  If an event overflow occurs, the PMU is
+ * stopped.
+ *
+ * Return non-zero if an event overflow occurred.
+ */
+static int perf_push_sample(struct perf_event *event,
+			    struct hws_basic_entry *basic)
+{
+	int overflow;
+	struct pt_regs regs;
+	struct perf_sf_sde_regs *sde_regs;
+	struct perf_sample_data data;
+
+	/* Setup perf sample */
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+
+	/* Setup pt_regs to look like an CPU-measurement external interrupt
+	 * using the Program Request Alert code.  The regs.int_parm_long
+	 * field which is unused contains additional sample-data-entry related
+	 * indicators.
+	 */
+	memset(&regs, 0, sizeof(regs));
+	regs.int_code = 0x1407;
+	regs.int_parm = CPU_MF_INT_SF_PRA;
+	sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long;
+
+	psw_bits(regs.psw).ia	= basic->ia;
+	psw_bits(regs.psw).dat	= basic->T;
+	psw_bits(regs.psw).wait = basic->W;
+	psw_bits(regs.psw).pstate = basic->P;
+	psw_bits(regs.psw).as	= basic->AS;
+
+	/*
+	 * Use the hardware provided configuration level to decide if the
+	 * sample belongs to a guest or host. If that is not available,
+	 * fall back to the following heuristics:
+	 * A non-zero guest program parameter always indicates a guest
+	 * sample. Some early samples or samples from guests without
+	 * lpp usage would be misaccounted to the host. We use the asn
+	 * value as an addon heuristic to detect most of these guest samples.
+	 * If the value differs from 0xffff (the host value), we assume to
+	 * be a KVM guest.
+	 */
+	switch (basic->CL) {
+	case 1: /* logical partition */
+		sde_regs->in_guest = 0;
+		break;
+	case 2: /* virtual machine */
+		sde_regs->in_guest = 1;
+		break;
+	default: /* old machine, use heuristics */
+		if (basic->gpp || basic->prim_asn != 0xffff)
+			sde_regs->in_guest = 1;
+		break;
+	}
+
+	/*
+	 * Store the PID value from the sample-data-entry to be
+	 * processed and resolved by cpumsf_output_event_pid().
+	 */
+	data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
+	overflow = 0;
+	if (perf_exclude_event(event, &regs, sde_regs))
+		goto out;
+	if (perf_event_overflow(event, &data, &regs)) {
+		overflow = 1;
+		event->pmu->stop(event, 0);
+	}
+	perf_event_update_userpage(event);
+out:
+	return overflow;
+}
+
+static void perf_event_count_update(struct perf_event *event, u64 count)
+{
+	local64_add(count, &event->count);
+}
+
+/* hw_collect_samples() - Walk through a sample-data-block and collect samples
+ * @event:	The perf event
+ * @sdbt:	Sample-data-block table
+ * @overflow:	Event overflow counter
+ *
+ * Walks through a sample-data-block and collects sampling data entries that are
+ * then pushed to the perf event subsystem.  Depending on the sampling function,
+ * there can be either basic-sampling or combined-sampling data entries.  A
+ * combined-sampling data entry consists of a basic- and a diagnostic-sampling
+ * data entry.	The sampling function is determined by the flags in the perf
+ * event hardware structure.  The function always works with a combined-sampling
+ * data entry but ignores the the diagnostic portion if it is not available.
+ *
+ * Note that the implementation focuses on basic-sampling data entries and, if
+ * such an entry is not valid, the entire combined-sampling data entry is
+ * ignored.
+ *
+ * The overflow variables counts the number of samples that has been discarded
+ * due to a perf event overflow.
+ */
+static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
+			       unsigned long long *overflow)
+{
+	struct hws_trailer_entry *te;
+	struct hws_basic_entry *sample;
+
+	te = trailer_entry_ptr((unsigned long)sdbt);
+	sample = (struct hws_basic_entry *)sdbt;
+	while ((unsigned long *)sample < (unsigned long *)te) {
+		/* Check for an empty sample */
+		if (!sample->def || sample->LS)
+			break;
+
+		/* Update perf event period */
+		perf_event_count_update(event, SAMPL_RATE(&event->hw));
+
+		/* Check whether sample is valid */
+		if (sample->def == 0x0001) {
+			/* If an event overflow occurred, the PMU is stopped to
+			 * throttle event delivery.  Remaining sample data is
+			 * discarded.
+			 */
+			if (!*overflow) {
+				/* Check whether sample is consistent */
+				if (sample->I == 0 && sample->W == 0) {
+					/* Deliver sample data to perf */
+					*overflow = perf_push_sample(event,
+								     sample);
+				}
+			} else
+				/* Count discarded samples */
+				*overflow += 1;
+		} else {
+			debug_sprintf_event(sfdbg, 4,
+					    "%s: Found unknown"
+					    " sampling data entry: te->f %i"
+					    " basic.def %#4x (%p)\n", __func__,
+					    te->header.f, sample->def, sample);
+			/* Sample slot is not yet written or other record.
+			 *
+			 * This condition can occur if the buffer was reused
+			 * from a combined basic- and diagnostic-sampling.
+			 * If only basic-sampling is then active, entries are
+			 * written into the larger diagnostic entries.
+			 * This is typically the case for sample-data-blocks
+			 * that are not full.  Stop processing if the first
+			 * invalid format was detected.
+			 */
+			if (!te->header.f)
+				break;
+		}
+
+		/* Reset sample slot and advance to next sample */
+		sample->def = 0;
+		sample++;
+	}
+}
+
+/* hw_perf_event_update() - Process sampling buffer
+ * @event:	The perf event
+ * @flush_all:	Flag to also flush partially filled sample-data-blocks
+ *
+ * Processes the sampling buffer and create perf event samples.
+ * The sampling buffer position are retrieved and saved in the TEAR_REG
+ * register of the specified perf event.
+ *
+ * Only full sample-data-blocks are processed.	Specify the flush_all flag
+ * to also walk through partially filled sample-data-blocks.
+ */
+static void hw_perf_event_update(struct perf_event *event, int flush_all)
+{
+	unsigned long long event_overflow, sampl_overflow, num_sdb;
+	union hws_trailer_header old, prev, new;
+	struct hw_perf_event *hwc = &event->hw;
+	struct hws_trailer_entry *te;
+	unsigned long *sdbt, sdb;
+	int done;
+
+	/*
+	 * AUX buffer is used when in diagnostic sampling mode.
+	 * No perf events/samples are created.
+	 */
+	if (SAMPL_DIAG_MODE(&event->hw))
+		return;
+
+	sdbt = (unsigned long *)TEAR_REG(hwc);
+	done = event_overflow = sampl_overflow = num_sdb = 0;
+	while (!done) {
+		/* Get the trailer entry of the sample-data-block */
+		sdb = (unsigned long)phys_to_virt(*sdbt);
+		te = trailer_entry_ptr(sdb);
+
+		/* Leave loop if no more work to do (block full indicator) */
+		if (!te->header.f) {
+			done = 1;
+			if (!flush_all)
+				break;
+		}
+
+		/* Check the sample overflow count */
+		if (te->header.overflow)
+			/* Account sample overflows and, if a particular limit
+			 * is reached, extend the sampling buffer.
+			 * For details, see sfb_account_overflows().
+			 */
+			sampl_overflow += te->header.overflow;
+
+		/* Timestamps are valid for full sample-data-blocks only */
+		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
+				    "overflow %llu timestamp %#llx\n",
+				    __func__, sdb, (unsigned long)sdbt,
+				    te->header.overflow,
+				    (te->header.f) ? trailer_timestamp(te) : 0ULL);
+
+		/* Collect all samples from a single sample-data-block and
+		 * flag if an (perf) event overflow happened.  If so, the PMU
+		 * is stopped and remaining samples will be discarded.
+		 */
+		hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
+		num_sdb++;
+
+		/* Reset trailer (using compare-double-and-swap) */
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+		do {
+			old.val = prev.val;
+			new.val = prev.val;
+			new.f = 0;
+			new.a = 1;
+			new.overflow = 0;
+			prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+		} while (prev.val != old.val);
+
+		/* Advance to next sample-data-block */
+		sdbt++;
+		if (is_link_entry(sdbt))
+			sdbt = get_next_sdbt(sdbt);
+
+		/* Update event hardware registers */
+		TEAR_REG(hwc) = (unsigned long) sdbt;
+
+		/* Stop processing sample-data if all samples of the current
+		 * sample-data-block were flushed even if it was not full.
+		 */
+		if (flush_all && done)
+			break;
+	}
+
+	/* Account sample overflows in the event hardware structure */
+	if (sampl_overflow)
+		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
+						 sampl_overflow, 1 + num_sdb);
+
+	/* Perf_event_overflow() and perf_event_account_interrupt() limit
+	 * the interrupt rate to an upper limit. Roughly 1000 samples per
+	 * task tick.
+	 * Hitting this limit results in a large number
+	 * of throttled REF_REPORT_THROTTLE entries and the samples
+	 * are dropped.
+	 * Slightly increase the interval to avoid hitting this limit.
+	 */
+	if (event_overflow) {
+		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+				    __func__,
+				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+	}
+
+	if (sampl_overflow || event_overflow)
+		debug_sprintf_event(sfdbg, 4, "%s: "
+				    "overflows: sample %llu event %llu"
+				    " total %llu num_sdb %llu\n",
+				    __func__, sampl_overflow, event_overflow,
+				    OVERFLOW_REG(hwc), num_sdb);
+}
+
+static inline unsigned long aux_sdb_index(struct aux_buffer *aux,
+					  unsigned long i)
+{
+	return i % aux->sfb.num_sdb;
+}
+
+static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end)
+{
+	return end >= start ? end - start + 1 : 0;
+}
+
+static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->alert_mark);
+}
+
+static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux)
+{
+	return aux_sdb_num(aux->head, aux->empty_mark);
+}
+
+/*
+ * Get trailer entry by index of SDB.
+ */
+static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux,
+						 unsigned long index)
+{
+	unsigned long sdb;
+
+	index = aux_sdb_index(aux, index);
+	sdb = aux->sdb_index[index];
+	return trailer_entry_ptr(sdb);
+}
+
+/*
+ * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu
+ * disabled. Collect the full SDBs in AUX buffer which have not reached
+ * the point of alert indicator. And ignore the SDBs which are not
+ * full.
+ *
+ * 1. Scan SDBs to see how much data is there and consume them.
+ * 2. Remove alert indicator in the buffer.
+ */
+static void aux_output_end(struct perf_output_handle *handle)
+{
+	unsigned long i, range_scan, idx;
+	struct aux_buffer *aux;
+	struct hws_trailer_entry *te;
+
+	aux = perf_get_aux(handle);
+	if (!aux)
+		return;
+
+	range_scan = aux_sdb_num_alert(aux);
+	for (i = 0, idx = aux->head; i < range_scan; i++, idx++) {
+		te = aux_sdb_trailer(aux, idx);
+		if (!te->header.f)
+			break;
+	}
+	/* i is num of SDBs which are full */
+	perf_aux_output_end(handle, i << PAGE_SHIFT);
+
+	/* Remove alert indicators in the buffer */
+	te = aux_sdb_trailer(aux, aux->alert_mark);
+	te->header.a = 0;
+
+	debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+			    __func__, i, range_scan, aux->head);
+}
+
+/*
+ * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event
+ * is first added to the CPU or rescheduled again to the CPU. It is called
+ * with pmu disabled.
+ *
+ * 1. Reset the trailer of SDBs to get ready for new data.
+ * 2. Tell the hardware where to put the data by reset the SDBs buffer
+ *    head(tear/dear).
+ */
+static int aux_output_begin(struct perf_output_handle *handle,
+			    struct aux_buffer *aux,
+			    struct cpu_hw_sf *cpuhw)
+{
+	unsigned long range, i, range_scan, idx, head, base, offset;
+	struct hws_trailer_entry *te;
+
+	if (WARN_ON_ONCE(handle->head & ~PAGE_MASK))
+		return -EINVAL;
+
+	aux->head = handle->head >> PAGE_SHIFT;
+	range = (handle->size + 1) >> PAGE_SHIFT;
+	if (range <= 1)
+		return -ENOMEM;
+
+	/*
+	 * SDBs between aux->head and aux->empty_mark are already ready
+	 * for new data. range_scan is num of SDBs not within them.
+	 */
+	debug_sprintf_event(sfdbg, 6,
+			    "%s: range %ld head %ld alert %ld empty %ld\n",
+			    __func__, range, aux->head, aux->alert_mark,
+			    aux->empty_mark);
+	if (range > aux_sdb_num_empty(aux)) {
+		range_scan = range - aux_sdb_num_empty(aux);
+		idx = aux->empty_mark + 1;
+		for (i = 0; i < range_scan; i++, idx++) {
+			te = aux_sdb_trailer(aux, idx);
+			te->header.f = 0;
+			te->header.a = 0;
+			te->header.overflow = 0;
+		}
+		/* Save the position of empty SDBs */
+		aux->empty_mark = aux->head + range - 1;
+	}
+
+	/* Set alert indicator */
+	aux->alert_mark = aux->head + range/2 - 1;
+	te = aux_sdb_trailer(aux, aux->alert_mark);
+	te->header.a = 1;
+
+	/* Reset hardware buffer head */
+	head = aux_sdb_index(aux, aux->head);
+	base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE];
+	offset = head % CPUM_SF_SDB_PER_TABLE;
+	cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long);
+	cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]);
+
+	debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+			    "index %ld tear %#lx dear %#lx\n", __func__,
+			    aux->head, aux->alert_mark, aux->empty_mark,
+			    head / CPUM_SF_SDB_PER_TABLE,
+			    cpuhw->lsctl.tear, cpuhw->lsctl.dear);
+
+	return 0;
+}
+
+/*
+ * Set alert indicator on SDB at index @alert_index while sampler is running.
+ *
+ * Return true if successfully.
+ * Return false if full indicator is already set by hardware sampler.
+ */
+static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
+			  unsigned long long *overflow)
+{
+	union hws_trailer_header old, prev, new;
+	struct hws_trailer_entry *te;
+
+	te = aux_sdb_trailer(aux, alert_index);
+	prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+	do {
+		old.val = prev.val;
+		new.val = prev.val;
+		*overflow = old.overflow;
+		if (old.f) {
+			/*
+			 * SDB is already set by hardware.
+			 * Abort and try to set somewhere
+			 * behind.
+			 */
+			return false;
+		}
+		new.a = 1;
+		new.overflow = 0;
+		prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+	} while (prev.val != old.val);
+	return true;
+}
+
+/*
+ * aux_reset_buffer() - Scan and setup SDBs for new samples
+ * @aux:	The AUX buffer to set
+ * @range:	The range of SDBs to scan started from aux->head
+ * @overflow:	Set to overflow count
+ *
+ * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is
+ * marked as empty, check if it is already set full by the hardware sampler.
+ * If yes, that means new data is already there before we can set an alert
+ * indicator. Caller should try to set alert indicator to some position behind.
+ *
+ * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used
+ * previously and have already been consumed by user space. Reset these SDBs
+ * (clear full indicator and alert indicator) for new data.
+ * If aux->alert_mark fall in this area, just set it. Overflow count is
+ * recorded while scanning.
+ *
+ * SDBs between aux->head and aux->empty_mark are already reset at last time.
+ * and ready for new samples. So scanning on this area could be skipped.
+ *
+ * Return true if alert indicator is set successfully and false if not.
+ */
+static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
+			     unsigned long long *overflow)
+{
+	unsigned long i, range_scan, idx, idx_old;
+	union hws_trailer_header old, prev, new;
+	unsigned long long orig_overflow;
+	struct hws_trailer_entry *te;
+
+	debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+			    "empty %ld\n", __func__, range, aux->head,
+			    aux->alert_mark, aux->empty_mark);
+	if (range <= aux_sdb_num_empty(aux))
+		/*
+		 * No need to scan. All SDBs in range are marked as empty.
+		 * Just set alert indicator. Should check race with hardware
+		 * sampler.
+		 */
+		return aux_set_alert(aux, aux->alert_mark, overflow);
+
+	if (aux->alert_mark <= aux->empty_mark)
+		/*
+		 * Set alert indicator on empty SDB. Should check race
+		 * with hardware sampler.
+		 */
+		if (!aux_set_alert(aux, aux->alert_mark, overflow))
+			return false;
+
+	/*
+	 * Scan the SDBs to clear full and alert indicator used previously.
+	 * Start scanning from one SDB behind empty_mark. If the new alert
+	 * indicator fall into this range, set it.
+	 */
+	range_scan = range - aux_sdb_num_empty(aux);
+	idx_old = idx = aux->empty_mark + 1;
+	for (i = 0; i < range_scan; i++, idx++) {
+		te = aux_sdb_trailer(aux, idx);
+		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
+		do {
+			old.val = prev.val;
+			new.val = prev.val;
+			orig_overflow = old.overflow;
+			new.f = 0;
+			new.overflow = 0;
+			if (idx == aux->alert_mark)
+				new.a = 1;
+			else
+				new.a = 0;
+			prev.val = cmpxchg128(&te->header.val, old.val, new.val);
+		} while (prev.val != old.val);
+		*overflow += orig_overflow;
+	}
+
+	/* Update empty_mark to new position */
+	aux->empty_mark = aux->head + range - 1;
+
+	debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+			    "empty %ld\n", __func__, range_scan, idx_old,
+			    idx - 1, aux->empty_mark);
+	return true;
+}
+
+/*
+ * Measurement alert handler for diagnostic mode sampling.
+ */
+static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
+{
+	struct aux_buffer *aux;
+	int done = 0;
+	unsigned long range = 0, size;
+	unsigned long long overflow = 0;
+	struct perf_output_handle *handle = &cpuhw->handle;
+	unsigned long num_sdb;
+
+	aux = perf_get_aux(handle);
+	if (WARN_ON_ONCE(!aux))
+		return;
+
+	/* Inform user space new data arrived */
+	size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+	debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+			    size >> PAGE_SHIFT);
+	perf_aux_output_end(handle, size);
+
+	num_sdb = aux->sfb.num_sdb;
+	while (!done) {
+		/* Get an output handle */
+		aux = perf_aux_output_begin(handle, cpuhw->event);
+		if (handle->size == 0) {
+			pr_err("The AUX buffer with %lu pages for the "
+			       "diagnostic-sampling mode is full\n",
+				num_sdb);
+			break;
+		}
+		if (WARN_ON_ONCE(!aux))
+			return;
+
+		/* Update head and alert_mark to new position */
+		aux->head = handle->head >> PAGE_SHIFT;
+		range = (handle->size + 1) >> PAGE_SHIFT;
+		if (range == 1)
+			aux->alert_mark = aux->head;
+		else
+			aux->alert_mark = aux->head + range/2 - 1;
+
+		if (aux_reset_buffer(aux, range, &overflow)) {
+			if (!overflow) {
+				done = 1;
+				break;
+			}
+			size = range << PAGE_SHIFT;
+			perf_aux_output_end(&cpuhw->handle, size);
+			pr_err("Sample data caused the AUX buffer with %lu "
+			       "pages to overflow\n", aux->sfb.num_sdb);
+			debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+					    "overflow %lld\n", __func__,
+					    aux->head, range, overflow);
+		} else {
+			size = aux_sdb_num_alert(aux) << PAGE_SHIFT;
+			perf_aux_output_end(&cpuhw->handle, size);
+			debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+					    "already full, try another\n",
+					    __func__,
+					    aux->head, aux->alert_mark);
+		}
+	}
+
+	if (done)
+		debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+				    "empty %ld\n", __func__, aux->head,
+				    aux->alert_mark, aux->empty_mark);
+}
+
+/*
+ * Callback when freeing AUX buffers.
+ */
+static void aux_buffer_free(void *data)
+{
+	struct aux_buffer *aux = data;
+	unsigned long i, num_sdbt;
+
+	if (!aux)
+		return;
+
+	/* Free SDBT. SDB is freed by the caller */
+	num_sdbt = aux->sfb.num_sdbt;
+	for (i = 0; i < num_sdbt; i++)
+		free_page(aux->sdbt_index[i]);
+
+	kfree(aux->sdbt_index);
+	kfree(aux->sdb_index);
+	kfree(aux);
+
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
+}
+
+static void aux_sdb_init(unsigned long sdb)
+{
+	struct hws_trailer_entry *te;
+
+	te = trailer_entry_ptr(sdb);
+
+	/* Save clock base */
+	te->clock_base = 1;
+	te->progusage2 = tod_clock_base.tod;
+}
+
+/*
+ * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling
+ * @event:	Event the buffer is setup for, event->cpu == -1 means current
+ * @pages:	Array of pointers to buffer pages passed from perf core
+ * @nr_pages:	Total pages
+ * @snapshot:	Flag for snapshot mode
+ *
+ * This is the callback when setup an event using AUX buffer. Perf tool can
+ * trigger this by an additional mmap() call on the event. Unlike the buffer
+ * for basic samples, AUX buffer belongs to the event. It is scheduled with
+ * the task among online cpus when it is a per-thread event.
+ *
+ * Return the private AUX buffer structure if success or NULL if fails.
+ */
+static void *aux_buffer_setup(struct perf_event *event, void **pages,
+			      int nr_pages, bool snapshot)
+{
+	struct sf_buffer *sfb;
+	struct aux_buffer *aux;
+	unsigned long *new, *tail;
+	int i, n_sdbt;
+
+	if (!nr_pages || !pages)
+		return NULL;
+
+	if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+		pr_err("AUX buffer size (%i pages) is larger than the "
+		       "maximum sampling buffer limit\n",
+		       nr_pages);
+		return NULL;
+	} else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) {
+		pr_err("AUX buffer size (%i pages) is less than the "
+		       "minimum sampling buffer limit\n",
+		       nr_pages);
+		return NULL;
+	}
+
+	/* Allocate aux_buffer struct for the event */
+	aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+	if (!aux)
+		goto no_aux;
+	sfb = &aux->sfb;
+
+	/* Allocate sdbt_index for fast reference */
+	n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
+	aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
+	if (!aux->sdbt_index)
+		goto no_sdbt_index;
+
+	/* Allocate sdb_index for fast reference */
+	aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
+	if (!aux->sdb_index)
+		goto no_sdb_index;
+
+	/* Allocate the first SDBT */
+	sfb->num_sdbt = 0;
+	sfb->sdbt = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!sfb->sdbt)
+		goto no_sdbt;
+	aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt;
+	tail = sfb->tail = sfb->sdbt;
+
+	/*
+	 * Link the provided pages of AUX buffer to SDBT.
+	 * Allocate SDBT if needed.
+	 */
+	for (i = 0; i < nr_pages; i++, tail++) {
+		if (require_table_link(tail)) {
+			new = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+			if (!new)
+				goto no_sdbt;
+			aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new;
+			/* Link current page to tail of chain */
+			*tail = virt_to_phys(new) + 1;
+			tail = new;
+		}
+		/* Tail is the entry in a SDBT */
+		*tail = virt_to_phys(pages[i]);
+		aux->sdb_index[i] = (unsigned long)pages[i];
+		aux_sdb_init((unsigned long)pages[i]);
+	}
+	sfb->num_sdb = nr_pages;
+
+	/* Link the last entry in the SDBT to the first SDBT */
+	*tail = virt_to_phys(sfb->sdbt) + 1;
+	sfb->tail = tail;
+
+	/*
+	 * Initial all SDBs are zeroed. Mark it as empty.
+	 * So there is no need to clear the full indicator
+	 * when this event is first added.
+	 */
+	aux->empty_mark = sfb->num_sdb - 1;
+
+	debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
+			    sfb->num_sdbt, sfb->num_sdb);
+
+	return aux;
+
+no_sdbt:
+	/* SDBs (AUX buffer pages) are freed by caller */
+	for (i = 0; i < sfb->num_sdbt; i++)
+		free_page(aux->sdbt_index[i]);
+	kfree(aux->sdb_index);
+no_sdb_index:
+	kfree(aux->sdbt_index);
+no_sdbt_index:
+	kfree(aux);
+no_aux:
+	return NULL;
+}
+
+static void cpumsf_pmu_read(struct perf_event *event)
+{
+	/* Nothing to do ... updates are interrupt-driven */
+}
+
+/* Check if the new sampling period/frequency is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+	struct hws_qsi_info_block si;
+	unsigned long rate;
+	bool do_freq;
+
+	memset(&si, 0, sizeof(si));
+	if (event->cpu == -1) {
+		if (qsi(&si))
+			return -ENODEV;
+	} else {
+		/* Event is pinned to a particular CPU, retrieve the per-CPU
+		 * sampling structure for accessing the CPU-specific QSI.
+		 */
+		struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+		si = cpuhw->qsi;
+	}
+
+	do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+	rate = getrate(do_freq, value, &si);
+	if (!rate)
+		return -EINVAL;
+
+	event->attr.sample_period = rate;
+	SAMPL_RATE(&event->hw) = rate;
+	hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+	debug_sprintf_event(sfdbg, 4, "%s:"
+			    " cpu %d value %#llx period %#llx freq %d\n",
+			    __func__, event->cpu, value,
+			    event->attr.sample_period, do_freq);
+	return 0;
+}
+
+/* Activate sampling control.
+ * Next call of pmu_enable() starts sampling.
+ */
+static void cpumsf_pmu_start(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
+		return;
+
+	if (flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	perf_pmu_disable(event->pmu);
+	event->hw.state = 0;
+	cpuhw->lsctl.cs = 1;
+	if (SAMPL_DIAG_MODE(&event->hw))
+		cpuhw->lsctl.cd = 1;
+	perf_pmu_enable(event->pmu);
+}
+
+/* Deactivate sampling control.
+ * Next call of pmu_enable() stops sampling.
+ */
+static void cpumsf_pmu_stop(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
+	perf_pmu_disable(event->pmu);
+	cpuhw->lsctl.cs = 0;
+	cpuhw->lsctl.cd = 0;
+	event->hw.state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
+		hw_perf_event_update(event, 1);
+		event->hw.state |= PERF_HES_UPTODATE;
+	}
+	perf_pmu_enable(event->pmu);
+}
+
+static int cpumsf_pmu_add(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+	struct aux_buffer *aux;
+	int err;
+
+	if (cpuhw->flags & PMU_F_IN_USE)
+		return -EAGAIN;
+
+	if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt)
+		return -EINVAL;
+
+	err = 0;
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	/* Set up sampling controls.  Always program the sampling register
+	 * using the SDB-table start.  Reset TEAR_REG event hardware register
+	 * that is used by hw_perf_event_update() to store the sampling buffer
+	 * position after samples have been flushed.
+	 */
+	cpuhw->lsctl.s = 0;
+	cpuhw->lsctl.h = 1;
+	cpuhw->lsctl.interval = SAMPL_RATE(&event->hw);
+	if (!SAMPL_DIAG_MODE(&event->hw)) {
+		cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt);
+		cpuhw->lsctl.dear = *(unsigned long *)cpuhw->sfb.sdbt;
+		TEAR_REG(&event->hw) = (unsigned long)cpuhw->sfb.sdbt;
+	}
+
+	/* Ensure sampling functions are in the disabled state.  If disabled,
+	 * switch on sampling enable control. */
+	if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) {
+		err = -EAGAIN;
+		goto out;
+	}
+	if (SAMPL_DIAG_MODE(&event->hw)) {
+		aux = perf_aux_output_begin(&cpuhw->handle, event);
+		if (!aux) {
+			err = -EINVAL;
+			goto out;
+		}
+		err = aux_output_begin(&cpuhw->handle, aux, cpuhw);
+		if (err)
+			goto out;
+		cpuhw->lsctl.ed = 1;
+	}
+	cpuhw->lsctl.es = 1;
+
+	/* Set in_use flag and store event */
+	cpuhw->event = event;
+	cpuhw->flags |= PMU_F_IN_USE;
+
+	if (flags & PERF_EF_START)
+		cpumsf_pmu_start(event, PERF_EF_RELOAD);
+out:
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	return err;
+}
+
+static void cpumsf_pmu_del(struct perf_event *event, int flags)
+{
+	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	perf_pmu_disable(event->pmu);
+	cpumsf_pmu_stop(event, PERF_EF_UPDATE);
+
+	cpuhw->lsctl.es = 0;
+	cpuhw->lsctl.ed = 0;
+	cpuhw->flags &= ~PMU_F_IN_USE;
+	cpuhw->event = NULL;
+
+	if (SAMPL_DIAG_MODE(&event->hw))
+		aux_output_end(&cpuhw->handle);
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+}
+
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF);
+CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
+
+/* Attribute list for CPU_SF.
+ *
+ * The availablitiy depends on the CPU_MF sampling facility authorization
+ * for basic + diagnositic samples. This is determined at initialization
+ * time by the sampling facility device driver.
+ * If the authorization for basic samples is turned off, it should be
+ * also turned off for diagnostic sampling.
+ *
+ * During initialization of the device driver, check the authorization
+ * level for diagnostic sampling and installs the attribute
+ * file for diagnostic sampling if necessary.
+ *
+ * For now install a placeholder to reference all possible attributes:
+ * SF_CYCLES_BASIC and SF_CYCLES_BASIC_DIAG.
+ * Add another entry for the final NULL pointer.
+ */
+enum {
+	SF_CYCLES_BASIC_ATTR_IDX = 0,
+	SF_CYCLES_BASIC_DIAG_ATTR_IDX,
+	SF_CYCLES_ATTR_MAX
+};
+
+static struct attribute *cpumsf_pmu_events_attr[SF_CYCLES_ATTR_MAX + 1] = {
+	[SF_CYCLES_BASIC_ATTR_IDX] = CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC)
+};
+
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *cpumsf_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group cpumsf_pmu_events_group = {
+	.name = "events",
+	.attrs = cpumsf_pmu_events_attr,
+};
+
+static struct attribute_group cpumsf_pmu_format_group = {
+	.name = "format",
+	.attrs = cpumsf_pmu_format_attr,
+};
+
+static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
+	&cpumsf_pmu_events_group,
+	&cpumsf_pmu_format_group,
+	NULL,
+};
+
+static struct pmu cpumf_sampling = {
+	.pmu_enable   = cpumsf_pmu_enable,
+	.pmu_disable  = cpumsf_pmu_disable,
+
+	.event_init   = cpumsf_pmu_event_init,
+	.add	      = cpumsf_pmu_add,
+	.del	      = cpumsf_pmu_del,
+
+	.start	      = cpumsf_pmu_start,
+	.stop	      = cpumsf_pmu_stop,
+	.read	      = cpumsf_pmu_read,
+
+	.attr_groups  = cpumsf_pmu_attr_groups,
+
+	.setup_aux    = aux_buffer_setup,
+	.free_aux     = aux_buffer_free,
+
+	.check_period = cpumsf_pmu_check_period,
+};
+
+static void cpumf_measurement_alert(struct ext_code ext_code,
+				    unsigned int alert, unsigned long unused)
+{
+	struct cpu_hw_sf *cpuhw;
+
+	if (!(alert & CPU_MF_INT_SF_MASK))
+		return;
+	inc_irq_stat(IRQEXT_CMS);
+	cpuhw = this_cpu_ptr(&cpu_hw_sf);
+
+	/* Measurement alerts are shared and might happen when the PMU
+	 * is not reserved.  Ignore these alerts in this case. */
+	if (!(cpuhw->flags & PMU_F_RESERVED))
+		return;
+
+	/* The processing below must take care of multiple alert events that
+	 * might be indicated concurrently. */
+
+	/* Program alert request */
+	if (alert & CPU_MF_INT_SF_PRA) {
+		if (cpuhw->flags & PMU_F_IN_USE)
+			if (SAMPL_DIAG_MODE(&cpuhw->event->hw))
+				hw_collect_aux(cpuhw);
+			else
+				hw_perf_event_update(cpuhw->event, 0);
+		else
+			WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE));
+	}
+
+	/* Report measurement alerts only for non-PRA codes */
+	if (alert != CPU_MF_INT_SF_PRA)
+		debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+				    alert);
+
+	/* Sampling authorization change request */
+	if (alert & CPU_MF_INT_SF_SACA)
+		qsi(&cpuhw->qsi);
+
+	/* Loss of sample data due to high-priority machine activities */
+	if (alert & CPU_MF_INT_SF_LSDA) {
+		pr_err("Sample data was lost\n");
+		cpuhw->flags |= PMU_F_ERR_LSDA;
+		sf_disable();
+	}
+
+	/* Invalid sampling buffer entry */
+	if (alert & (CPU_MF_INT_SF_IAE|CPU_MF_INT_SF_ISE)) {
+		pr_err("A sampling buffer entry is incorrect (alert=0x%x)\n",
+		       alert);
+		cpuhw->flags |= PMU_F_ERR_IBE;
+		sf_disable();
+	}
+}
+
+static int cpusf_pmu_setup(unsigned int cpu, int flags)
+{
+	/* Ignore the notification if no events are scheduled on the PMU.
+	 * This might be racy...
+	 */
+	if (!atomic_read(&num_events))
+		return 0;
+
+	local_irq_disable();
+	setup_pmc_cpu(&flags);
+	local_irq_enable();
+	return 0;
+}
+
+static int s390_pmu_sf_online_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_INIT);
+}
+
+static int s390_pmu_sf_offline_cpu(unsigned int cpu)
+{
+	return cpusf_pmu_setup(cpu, PMC_RELEASE);
+}
+
+static int param_get_sfb_size(char *buffer, const struct kernel_param *kp)
+{
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	return sprintf(buffer, "%lu,%lu", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+}
+
+static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
+{
+	int rc;
+	unsigned long min, max;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+	if (!val || !strlen(val))
+		return -EINVAL;
+
+	/* Valid parameter values: "min,max" or "max" */
+	min = CPUM_SF_MIN_SDB;
+	max = CPUM_SF_MAX_SDB;
+	if (strchr(val, ','))
+		rc = (sscanf(val, "%lu,%lu", &min, &max) == 2) ? 0 : -EINVAL;
+	else
+		rc = kstrtoul(val, 10, &max);
+
+	if (min < 2 || min >= max || max > get_num_physpages())
+		rc = -EINVAL;
+	if (rc)
+		return rc;
+
+	sfb_set_limits(min, max);
+	pr_info("The sampling buffer limits have changed to: "
+		"min %lu max %lu (diag %lu)\n",
+		CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
+	return 0;
+}
+
+#define param_check_sfb_size(name, p) __param_check(name, p, void)
+static const struct kernel_param_ops param_ops_sfb_size = {
+	.set = param_set_sfb_size,
+	.get = param_get_sfb_size,
+};
+
+#define RS_INIT_FAILURE_QSI	  0x0001
+#define RS_INIT_FAILURE_BSDES	  0x0002
+#define RS_INIT_FAILURE_ALRT	  0x0003
+#define RS_INIT_FAILURE_PERF	  0x0004
+static void __init pr_cpumsf_err(unsigned int reason)
+{
+	pr_err("Sampling facility support for perf is not available: "
+	       "reason %#x\n", reason);
+}
+
+static int __init init_cpum_sampling_pmu(void)
+{
+	struct hws_qsi_info_block si;
+	int err;
+
+	if (!cpum_sf_avail())
+		return -ENODEV;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_QSI);
+		return -ENODEV;
+	}
+
+	if (!si.as && !si.ad)
+		return -ENODEV;
+
+	if (si.bsdes != sizeof(struct hws_basic_entry)) {
+		pr_cpumsf_err(RS_INIT_FAILURE_BSDES);
+		return -EINVAL;
+	}
+
+	if (si.ad) {
+		sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+		/* Sampling of diagnostic data authorized,
+		 * install event into attribute list of PMU device.
+		 */
+		cpumsf_pmu_events_attr[SF_CYCLES_BASIC_DIAG_ATTR_IDX] =
+			CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+	}
+
+	sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
+	if (!sfdbg) {
+		pr_err("Registering for s390dbf failed\n");
+		return -ENOMEM;
+	}
+	debug_register_view(sfdbg, &debug_sprintf_view);
+
+	err = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+				    cpumf_measurement_alert);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_ALRT);
+		debug_unregister(sfdbg);
+		goto out;
+	}
+
+	err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW);
+	if (err) {
+		pr_cpumsf_err(RS_INIT_FAILURE_PERF);
+		unregister_external_irq(EXT_IRQ_MEASURE_ALERT,
+					cpumf_measurement_alert);
+		debug_unregister(sfdbg);
+		goto out;
+	}
+
+	cpuhp_setup_state(CPUHP_AP_PERF_S390_SF_ONLINE, "perf/s390/sf:online",
+			  s390_pmu_sf_online_cpu, s390_pmu_sf_offline_cpu);
+out:
+	return err;
+}
+
+arch_initcall(init_cpum_sampling_pmu);
+core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
new file mode 100644
index 0000000000..c27321cb09
--- /dev/null
+++ b/arch/s390/kernel/perf_event.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support for s390x
+ *
+ *  Copyright IBM Corp. 2012, 2013
+ *  Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ */
+#define KMSG_COMPONENT	"perf"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/kvm_host.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/sysfs.h>
+#include <asm/irq.h>
+#include <asm/cpu_mf.h>
+#include <asm/lowcore.h>
+#include <asm/processor.h>
+#include <asm/sysinfo.h>
+#include <asm/unwind.h>
+
+static struct kvm_s390_sie_block *sie_block(struct pt_regs *regs)
+{
+	struct stack_frame *stack = (struct stack_frame *) regs->gprs[15];
+
+	if (!stack)
+		return NULL;
+
+	return (struct kvm_s390_sie_block *)stack->sie_control_block;
+}
+
+static bool is_in_guest(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return false;
+#if IS_ENABLED(CONFIG_KVM)
+	return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+	return false;
+#endif
+}
+
+static unsigned long guest_is_user_mode(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.mask & PSW_MASK_PSTATE;
+}
+
+static unsigned long instruction_pointer_guest(struct pt_regs *regs)
+{
+	return sie_block(regs)->gpsw.addr;
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	return is_in_guest(regs) ? instruction_pointer_guest(regs)
+				 : instruction_pointer(regs);
+}
+
+static unsigned long perf_misc_guest_flags(struct pt_regs *regs)
+{
+	return guest_is_user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+}
+
+static unsigned long perf_misc_flags_sf(struct pt_regs *regs)
+{
+	struct perf_sf_sde_regs *sde_regs;
+	unsigned long flags;
+
+	sde_regs = (struct perf_sf_sde_regs *) &regs->int_parm_long;
+	if (sde_regs->in_guest)
+		flags = user_mode(regs) ? PERF_RECORD_MISC_GUEST_USER
+					: PERF_RECORD_MISC_GUEST_KERNEL;
+	else
+		flags = user_mode(regs) ? PERF_RECORD_MISC_USER
+					: PERF_RECORD_MISC_KERNEL;
+	return flags;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	/* Check if the cpum_sf PMU has created the pt_regs structure.
+	 * In this case, perf misc flags can be easily extracted.  Otherwise,
+	 * do regular checks on the pt_regs content.
+	 */
+	if (regs->int_code == 0x1407 && regs->int_parm == CPU_MF_INT_SF_PRA)
+		if (!regs->gprs[15])
+			return perf_misc_flags_sf(regs);
+
+	if (is_in_guest(regs))
+		return perf_misc_guest_flags(regs);
+
+	return user_mode(regs) ? PERF_RECORD_MISC_USER
+			       : PERF_RECORD_MISC_KERNEL;
+}
+
+static void print_debug_cf(void)
+{
+	struct cpumf_ctr_info cf_info;
+	int cpu = smp_processor_id();
+
+	memset(&cf_info, 0, sizeof(cf_info));
+	if (!qctri(&cf_info))
+		pr_info("CPU[%i] CPUM_CF: ver=%u.%u A=%04x E=%04x C=%04x\n",
+			cpu, cf_info.cfvn, cf_info.csvn,
+			cf_info.auth_ctl, cf_info.enable_ctl, cf_info.act_ctl);
+}
+
+static void print_debug_sf(void)
+{
+	struct hws_qsi_info_block si;
+	int cpu = smp_processor_id();
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%lu max=%lu cpu_speed=%u\n",
+		cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate,
+		si.cpu_speed);
+
+	if (si.as)
+		pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i"
+			" bsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.as, si.es, si.cs, si.bsdes, si.tear, si.dear);
+	if (si.ad)
+		pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i"
+			" dsdes=%i tear=%016lx dear=%016lx\n", cpu,
+			si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear);
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (cpum_cf_avail())
+		print_debug_cf();
+	if (cpum_sf_avail())
+		print_debug_sf();
+	local_irq_restore(flags);
+}
+
+/* Service level infrastructure */
+static void sl_print_counter(struct seq_file *m)
+{
+	struct cpumf_ctr_info ci;
+
+	memset(&ci, 0, sizeof(ci));
+	if (qctri(&ci))
+		return;
+
+	seq_printf(m, "CPU-MF: Counter facility: version=%u.%u "
+		   "authorization=%04x\n", ci.cfvn, ci.csvn, ci.auth_ctl);
+}
+
+static void sl_print_sampling(struct seq_file *m)
+{
+	struct hws_qsi_info_block si;
+
+	memset(&si, 0, sizeof(si));
+	if (qsi(&si))
+		return;
+
+	if (!si.as && !si.ad)
+		return;
+
+	seq_printf(m, "CPU-MF: Sampling facility: min_rate=%lu max_rate=%lu"
+		   " cpu_speed=%u\n", si.min_sampl_rate, si.max_sampl_rate,
+		   si.cpu_speed);
+	if (si.as)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=basic"
+			   " sample_size=%u\n", si.bsdes);
+	if (si.ad)
+		seq_printf(m, "CPU-MF: Sampling facility: mode=diagnostic"
+			   " sample_size=%u\n", si.dsdes);
+}
+
+static void service_level_perf_print(struct seq_file *m,
+				     struct service_level *sl)
+{
+	if (cpum_cf_avail())
+		sl_print_counter(m);
+	if (cpum_sf_avail())
+		sl_print_sampling(m);
+}
+
+static struct service_level service_level_perf = {
+	.seq_print = service_level_perf_print,
+};
+
+static int __init service_level_perf_register(void)
+{
+	return register_service_level(&service_level_perf);
+}
+arch_initcall(service_level_perf_register);
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+			   struct pt_regs *regs)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, current, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
+}
+
+/* Perf definitions for PMU event attributes in sysfs */
+ssize_t cpumf_events_sysfs_show(struct device *dev,
+				struct device_attribute *attr, char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr;
+
+	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+	return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
+}
diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c
new file mode 100644
index 0000000000..fe7d1774de
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_crypto.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_crypto"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+static debug_info_t *cfm_dbg;
+static unsigned int paicrypt_cnt;	/* Size of the mapped counter sets */
+					/* extracted with QPACI instruction */
+
+DEFINE_STATIC_KEY_FALSE(pai_key);
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+struct paicrypt_map {
+	unsigned long *page;		/* Page for CPU to store counters */
+	struct pai_userdata *save;	/* Page to store no-zero counters */
+	unsigned int active_events;	/* # of PAI crypto users */
+	refcount_t refcnt;		/* Reference count mapped buffers */
+	enum paievt_mode mode;		/* Type of event */
+	struct perf_event *event;	/* Perf event for sampling */
+};
+
+static DEFINE_PER_CPU(struct paicrypt_map, paicrypt_map);
+
+/* Release the PMU if event is the last perf event */
+static DEFINE_MUTEX(pai_reserve_mutex);
+
+/* Adjust usage counters and remove allocated memory when all users are
+ * gone.
+ */
+static void paicrypt_event_destroy(struct perf_event *event)
+{
+	struct paicrypt_map *cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+
+	cpump->event = NULL;
+	static_branch_dec(&pai_key);
+	mutex_lock(&pai_reserve_mutex);
+	debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d"
+			    " mode %d refcnt %u\n", __func__,
+			    event->attr.config, event->cpu,
+			    cpump->active_events, cpump->mode,
+			    refcount_read(&cpump->refcnt));
+	if (refcount_dec_and_test(&cpump->refcnt)) {
+		debug_sprintf_event(cfm_dbg, 4, "%s page %#lx save %p\n",
+				    __func__, (unsigned long)cpump->page,
+				    cpump->save);
+		free_page((unsigned long)cpump->page);
+		cpump->page = NULL;
+		kvfree(cpump->save);
+		cpump->save = NULL;
+		cpump->mode = PAI_MODE_NONE;
+	}
+	mutex_unlock(&pai_reserve_mutex);
+}
+
+static u64 paicrypt_getctr(struct paicrypt_map *cpump, int nr, bool kernel)
+{
+	if (kernel)
+		nr += PAI_CRYPTO_MAXCTR;
+	return cpump->page[nr];
+}
+
+/* Read the counter values. Return value from location in CMP. For event
+ * CRYPTO_ALL sum up all events.
+ */
+static u64 paicrypt_getdata(struct perf_event *event, bool kernel)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_CRYPTO_BASE) {
+		return paicrypt_getctr(cpump,
+				       event->attr.config - PAI_CRYPTO_BASE,
+				       kernel);
+	}
+
+	for (i = 1; i <= paicrypt_cnt; i++) {
+		u64 val = paicrypt_getctr(cpump, i, kernel);
+
+		if (!val)
+			continue;
+		sum += val;
+	}
+	return sum;
+}
+
+static u64 paicrypt_getall(struct perf_event *event)
+{
+	u64 sum = 0;
+
+	if (!event->attr.exclude_kernel)
+		sum += paicrypt_getdata(event, true);
+	if (!event->attr.exclude_user)
+		sum += paicrypt_getdata(event, false);
+
+	return sum;
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for crypto events
+ *
+ * Only one instance of event pai_crypto/CRYPTO_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is save to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paicrypt_busy(struct perf_event_attr *a, struct paicrypt_map *cpump)
+{
+	int rc = 0;
+
+	mutex_lock(&pai_reserve_mutex);
+	if (a->sample_period) {		/* Sampling requested */
+		if (cpump->mode != PAI_MODE_NONE)
+			rc = -EBUSY;	/* ... sampling/counting active */
+	} else {			/* Counting requested */
+		if (cpump->mode == PAI_MODE_SAMPLING)
+			rc = -EBUSY;	/* ... and sampling active */
+	}
+	if (rc)
+		goto unlock;
+
+	/* Allocate memory for counter page and counter extraction.
+	 * Only the first counting event has to allocate a page.
+	 */
+	if (cpump->page) {
+		refcount_inc(&cpump->refcnt);
+		goto unlock;
+	}
+
+	rc = -ENOMEM;
+	cpump->page = (unsigned long *)get_zeroed_page(GFP_KERNEL);
+	if (!cpump->page)
+		goto unlock;
+	cpump->save = kvmalloc_array(paicrypt_cnt + 1,
+				     sizeof(struct pai_userdata), GFP_KERNEL);
+	if (!cpump->save) {
+		free_page((unsigned long)cpump->page);
+		cpump->page = NULL;
+		goto unlock;
+	}
+	rc = 0;
+	refcount_set(&cpump->refcnt, 1);
+
+unlock:
+	/* If rc is non-zero, do not set mode and reference count */
+	if (!rc) {
+		cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+					       : PAI_MODE_COUNTING;
+	}
+	debug_sprintf_event(cfm_dbg, 5, "%s sample_period %#llx users %d"
+			    " mode %d refcnt %u page %#lx save %p rc %d\n",
+			    __func__, a->sample_period, cpump->active_events,
+			    cpump->mode, refcount_read(&cpump->refcnt),
+			    (unsigned long)cpump->page, cpump->save, rc);
+	mutex_unlock(&pai_reserve_mutex);
+	return rc;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paicrypt_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	struct paicrypt_map *cpump;
+	int rc;
+
+	/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI crypto event must be in valid range */
+	if (a->config < PAI_CRYPTO_BASE ||
+	    a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+		return -EINVAL;
+	/* Allow only CPU wide operation, no process context for now. */
+	if (event->hw.target || event->cpu == -1)
+		return -ENOENT;
+	/* Allow only CRYPTO_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_CRYPTO_BASE)
+		return -EINVAL;
+
+	cpump = per_cpu_ptr(&paicrypt_map, event->cpu);
+	rc = paicrypt_busy(a, cpump);
+	if (rc)
+		return rc;
+
+	/* Event initialization sets last_tag to 0. When later on the events
+	 * are deleted and re-added, do not reset the event count value to zero.
+	 * Events are added, deleted and re-added when 2 or more events
+	 * are active at the same time.
+	 */
+	event->hw.last_tag = 0;
+	cpump->event = event;
+	event->destroy = paicrypt_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which contain the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	static_branch_inc(&pai_key);
+	return 0;
+}
+
+static void paicrypt_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paicrypt_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = (prev <= new) ? new - prev
+			      : (-1ULL - prev) + new + 1;	 /* overflow */
+	local64_add(delta, &event->count);
+}
+
+static void paicrypt_start(struct perf_event *event, int flags)
+{
+	u64 sum;
+
+	if (!event->hw.last_tag) {
+		event->hw.last_tag = 1;
+		sum = paicrypt_getall(event);		/* Get current value */
+		local64_set(&event->count, 0);
+		local64_set(&event->hw.prev_count, sum);
+	}
+}
+
+static int paicrypt_add(struct perf_event *event, int flags)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	unsigned long ccd;
+
+	if (++cpump->active_events == 1) {
+		ccd = virt_to_phys(cpump->page) | PAI_CRYPTO_KERNEL_OFFSET;
+		WRITE_ONCE(S390_lowcore.ccd, ccd);
+		__ctl_set_bit(0, 50);
+	}
+	cpump->event = event;
+	if (flags & PERF_EF_START && !event->attr.sample_period) {
+		/* Only counting needs initial counter value */
+		paicrypt_start(event, PERF_EF_RELOAD);
+	}
+	event->hw.state = 0;
+	if (event->attr.sample_period)
+		perf_sched_cb_inc(event->pmu);
+	return 0;
+}
+
+static void paicrypt_stop(struct perf_event *event, int flags)
+{
+	paicrypt_read(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paicrypt_del(struct perf_event *event, int flags)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+
+	if (event->attr.sample_period)
+		perf_sched_cb_dec(event->pmu);
+	if (!event->attr.sample_period)
+		/* Only counting needs to read counter */
+		paicrypt_stop(event, PERF_EF_UPDATE);
+	if (--cpump->active_events == 0) {
+		__ctl_clear_bit(0, 50);
+		WRITE_ONCE(S390_lowcore.ccd, 0);
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paicrypt_copy(struct pai_userdata *userdata,
+			    struct paicrypt_map *cpump,
+			    bool exclude_user, bool exclude_kernel)
+{
+	int i, outidx = 0;
+
+	for (i = 1; i <= paicrypt_cnt; i++) {
+		u64 val = 0;
+
+		if (!exclude_kernel)
+			val += paicrypt_getctr(cpump, i, true);
+		if (!exclude_user)
+			val += paicrypt_getctr(cpump, i, false);
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(struct pai_userdata);
+}
+
+static int paicrypt_push_sample(void)
+{
+	struct paicrypt_map *cpump = this_cpu_ptr(&paicrypt_map);
+	struct perf_event *event = cpump->event;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	size_t rawsize;
+	int overflow;
+
+	if (!cpump->event)		/* No event active */
+		return 0;
+	rawsize = paicrypt_copy(cpump->save, cpump,
+				cpump->event->attr.exclude_user,
+				cpump->event->attr.exclude_kernel);
+	if (!rawsize)			/* No incremented counters */
+		return 0;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU) {
+		data.cpu_entry.cpu = smp_processor_id();
+		data.cpu_entry.reserved = 0;
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Clear lowcore page after read */
+	memset(cpump->page, 0, PAGE_SIZE);
+	return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event CRYPTO_ALL is allowed.
+ */
+static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, clear values.
+	 */
+	if (!sched_in)
+		paicrypt_push_sample();
+}
+
+/* Attribute definitions for paicrypt interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1000 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paicrypt_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paicrypt_events_group = {
+	.name = "events",
+	.attrs = NULL			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paicrypt_format_group = {
+	.name = "format",
+	.attrs = paicrypt_format_attr,
+};
+
+static const struct attribute_group *paicrypt_attr_groups[] = {
+	&paicrypt_events_group,
+	&paicrypt_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paicrypt = {
+	.task_ctx_nr  = perf_invalid_context,
+	.event_init   = paicrypt_event_init,
+	.add	      = paicrypt_add,
+	.del	      = paicrypt_del,
+	.start	      = paicrypt_start,
+	.stop	      = paicrypt_stop,
+	.read	      = paicrypt_read,
+	.sched_task   = paicrypt_sched_task,
+	.attr_groups  = paicrypt_attr_groups
+};
+
+/* List of symbolic PAI counter names. */
+static const char * const paicrypt_ctrnames[] = {
+	[0] = "CRYPTO_ALL",
+	[1] = "KM_DEA",
+	[2] = "KM_TDEA_128",
+	[3] = "KM_TDEA_192",
+	[4] = "KM_ENCRYPTED_DEA",
+	[5] = "KM_ENCRYPTED_TDEA_128",
+	[6] = "KM_ENCRYPTED_TDEA_192",
+	[7] = "KM_AES_128",
+	[8] = "KM_AES_192",
+	[9] = "KM_AES_256",
+	[10] = "KM_ENCRYPTED_AES_128",
+	[11] = "KM_ENCRYPTED_AES_192",
+	[12] = "KM_ENCRYPTED_AES_256",
+	[13] = "KM_XTS_AES_128",
+	[14] = "KM_XTS_AES_256",
+	[15] = "KM_XTS_ENCRYPTED_AES_128",
+	[16] = "KM_XTS_ENCRYPTED_AES_256",
+	[17] = "KMC_DEA",
+	[18] = "KMC_TDEA_128",
+	[19] = "KMC_TDEA_192",
+	[20] = "KMC_ENCRYPTED_DEA",
+	[21] = "KMC_ENCRYPTED_TDEA_128",
+	[22] = "KMC_ENCRYPTED_TDEA_192",
+	[23] = "KMC_AES_128",
+	[24] = "KMC_AES_192",
+	[25] = "KMC_AES_256",
+	[26] = "KMC_ENCRYPTED_AES_128",
+	[27] = "KMC_ENCRYPTED_AES_192",
+	[28] = "KMC_ENCRYPTED_AES_256",
+	[29] = "KMC_PRNG",
+	[30] = "KMA_GCM_AES_128",
+	[31] = "KMA_GCM_AES_192",
+	[32] = "KMA_GCM_AES_256",
+	[33] = "KMA_GCM_ENCRYPTED_AES_128",
+	[34] = "KMA_GCM_ENCRYPTED_AES_192",
+	[35] = "KMA_GCM_ENCRYPTED_AES_256",
+	[36] = "KMF_DEA",
+	[37] = "KMF_TDEA_128",
+	[38] = "KMF_TDEA_192",
+	[39] = "KMF_ENCRYPTED_DEA",
+	[40] = "KMF_ENCRYPTED_TDEA_128",
+	[41] = "KMF_ENCRYPTED_TDEA_192",
+	[42] = "KMF_AES_128",
+	[43] = "KMF_AES_192",
+	[44] = "KMF_AES_256",
+	[45] = "KMF_ENCRYPTED_AES_128",
+	[46] = "KMF_ENCRYPTED_AES_192",
+	[47] = "KMF_ENCRYPTED_AES_256",
+	[48] = "KMCTR_DEA",
+	[49] = "KMCTR_TDEA_128",
+	[50] = "KMCTR_TDEA_192",
+	[51] = "KMCTR_ENCRYPTED_DEA",
+	[52] = "KMCTR_ENCRYPTED_TDEA_128",
+	[53] = "KMCTR_ENCRYPTED_TDEA_192",
+	[54] = "KMCTR_AES_128",
+	[55] = "KMCTR_AES_192",
+	[56] = "KMCTR_AES_256",
+	[57] = "KMCTR_ENCRYPTED_AES_128",
+	[58] = "KMCTR_ENCRYPTED_AES_192",
+	[59] = "KMCTR_ENCRYPTED_AES_256",
+	[60] = "KMO_DEA",
+	[61] = "KMO_TDEA_128",
+	[62] = "KMO_TDEA_192",
+	[63] = "KMO_ENCRYPTED_DEA",
+	[64] = "KMO_ENCRYPTED_TDEA_128",
+	[65] = "KMO_ENCRYPTED_TDEA_192",
+	[66] = "KMO_AES_128",
+	[67] = "KMO_AES_192",
+	[68] = "KMO_AES_256",
+	[69] = "KMO_ENCRYPTED_AES_128",
+	[70] = "KMO_ENCRYPTED_AES_192",
+	[71] = "KMO_ENCRYPTED_AES_256",
+	[72] = "KIMD_SHA_1",
+	[73] = "KIMD_SHA_256",
+	[74] = "KIMD_SHA_512",
+	[75] = "KIMD_SHA3_224",
+	[76] = "KIMD_SHA3_256",
+	[77] = "KIMD_SHA3_384",
+	[78] = "KIMD_SHA3_512",
+	[79] = "KIMD_SHAKE_128",
+	[80] = "KIMD_SHAKE_256",
+	[81] = "KIMD_GHASH",
+	[82] = "KLMD_SHA_1",
+	[83] = "KLMD_SHA_256",
+	[84] = "KLMD_SHA_512",
+	[85] = "KLMD_SHA3_224",
+	[86] = "KLMD_SHA3_256",
+	[87] = "KLMD_SHA3_384",
+	[88] = "KLMD_SHA3_512",
+	[89] = "KLMD_SHAKE_128",
+	[90] = "KLMD_SHAKE_256",
+	[91] = "KMAC_DEA",
+	[92] = "KMAC_TDEA_128",
+	[93] = "KMAC_TDEA_192",
+	[94] = "KMAC_ENCRYPTED_DEA",
+	[95] = "KMAC_ENCRYPTED_TDEA_128",
+	[96] = "KMAC_ENCRYPTED_TDEA_192",
+	[97] = "KMAC_AES_128",
+	[98] = "KMAC_AES_192",
+	[99] = "KMAC_AES_256",
+	[100] = "KMAC_ENCRYPTED_AES_128",
+	[101] = "KMAC_ENCRYPTED_AES_192",
+	[102] = "KMAC_ENCRYPTED_AES_256",
+	[103] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_DEA",
+	[104] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_128",
+	[105] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_TDEA_192",
+	[106] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_DEA",
+	[107] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_128",
+	[108] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_TDEA_192",
+	[109] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_128",
+	[110] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_192",
+	[111] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_AES_256",
+	[112] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_128",
+	[113] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_192",
+	[114] = "PCC_COMPUTE_LAST_BLOCK_CMAC_USING_ENCRYPTED_AES_256A",
+	[115] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_128",
+	[116] = "PCC_COMPUTE_XTS_PARAMETER_USING_AES_256",
+	[117] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_128",
+	[118] = "PCC_COMPUTE_XTS_PARAMETER_USING_ENCRYPTED_AES_256",
+	[119] = "PCC_SCALAR_MULTIPLY_P256",
+	[120] = "PCC_SCALAR_MULTIPLY_P384",
+	[121] = "PCC_SCALAR_MULTIPLY_P521",
+	[122] = "PCC_SCALAR_MULTIPLY_ED25519",
+	[123] = "PCC_SCALAR_MULTIPLY_ED448",
+	[124] = "PCC_SCALAR_MULTIPLY_X25519",
+	[125] = "PCC_SCALAR_MULTIPLY_X448",
+	[126] = "PRNO_SHA_512_DRNG",
+	[127] = "PRNO_TRNG_QUERY_RAW_TO_CONDITIONED_RATIO",
+	[128] = "PRNO_TRNG",
+	[129] = "KDSA_ECDSA_VERIFY_P256",
+	[130] = "KDSA_ECDSA_VERIFY_P384",
+	[131] = "KDSA_ECDSA_VERIFY_P521",
+	[132] = "KDSA_ECDSA_SIGN_P256",
+	[133] = "KDSA_ECDSA_SIGN_P384",
+	[134] = "KDSA_ECDSA_SIGN_P521",
+	[135] = "KDSA_ENCRYPTED_ECDSA_SIGN_P256",
+	[136] = "KDSA_ENCRYPTED_ECDSA_SIGN_P384",
+	[137] = "KDSA_ENCRYPTED_ECDSA_SIGN_P521",
+	[138] = "KDSA_EDDSA_VERIFY_ED25519",
+	[139] = "KDSA_EDDSA_VERIFY_ED448",
+	[140] = "KDSA_EDDSA_SIGN_ED25519",
+	[141] = "KDSA_EDDSA_SIGN_ED448",
+	[142] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED25519",
+	[143] = "KDSA_ENCRYPTED_EDDSA_SIGN_ED448",
+	[144] = "PCKMO_ENCRYPT_DEA_KEY",
+	[145] = "PCKMO_ENCRYPT_TDEA_128_KEY",
+	[146] = "PCKMO_ENCRYPT_TDEA_192_KEY",
+	[147] = "PCKMO_ENCRYPT_AES_128_KEY",
+	[148] = "PCKMO_ENCRYPT_AES_192_KEY",
+	[149] = "PCKMO_ENCRYPT_AES_256_KEY",
+	[150] = "PCKMO_ENCRYPT_ECC_P256_KEY",
+	[151] = "PCKMO_ENCRYPT_ECC_P384_KEY",
+	[152] = "PCKMO_ENCRYPT_ECC_P521_KEY",
+	[153] = "PCKMO_ENCRYPT_ECC_ED25519_KEY",
+	[154] = "PCKMO_ENCRYPT_ECC_ED448_KEY",
+	[155] = "IBM_RESERVED_155",
+	[156] = "IBM_RESERVED_156",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		struct device_attribute *dap;
+
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_CRYPTO_BASE + num;
+	pa->attr.attr.name = paicrypt_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(ARRAY_SIZE(paicrypt_ctrnames) + 1, sizeof(*attrs),
+			      GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i < ARRAY_SIZE(paicrypt_ctrnames); i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i - 1);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paicrypt_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paicrypt_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc;
+
+	if (!test_facility(196))
+		return 0;
+
+	qpaci(&ib);
+	paicrypt_cnt = ib.num_cc;
+	if (paicrypt_cnt == 0)
+		return 0;
+	if (paicrypt_cnt >= PAI_CRYPTO_MAXCTR)
+		paicrypt_cnt = PAI_CRYPTO_MAXCTR - 1;
+
+	rc = attr_event_init();		/* Export known PAI crypto events */
+	if (rc) {
+		pr_err("Creation of PMU pai_crypto /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	cfm_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!cfm_dbg) {
+		pr_err("Registration of s390dbf pai_crypto failed\n");
+		return -ENOMEM;
+	}
+	debug_register_view(cfm_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paicrypt, "pai_crypto", -1);
+	if (rc) {
+		pr_err("Registering the pai_crypto PMU failed with rc=%i\n",
+		       rc);
+		debug_unregister_view(cfm_dbg, &debug_sprintf_view);
+		debug_unregister(cfm_dbg);
+		return rc;
+	}
+	return 0;
+}
+
+device_initcall(paicrypt_init);
diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c
new file mode 100644
index 0000000000..c57c1a2032
--- /dev/null
+++ b/arch/s390/kernel/perf_pai_ext.c
@@ -0,0 +1,667 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Performance event support - Processor Activity Instrumentation Extension
+ * Facility
+ *
+ *  Copyright IBM Corp. 2022
+ *  Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+#define KMSG_COMPONENT	"pai_ext"
+#define pr_fmt(fmt)	KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/perf_event.h>
+
+#include <asm/ctl_reg.h>
+#include <asm/pai.h>
+#include <asm/debug.h>
+
+#define	PAIE1_CB_SZ		0x200	/* Size of PAIE1 control block */
+#define	PAIE1_CTRBLOCK_SZ	0x400	/* Size of PAIE1 counter blocks */
+
+static debug_info_t *paiext_dbg;
+static unsigned int paiext_cnt;	/* Extracted with QPACI instruction */
+
+struct pai_userdata {
+	u16 num;
+	u64 value;
+} __packed;
+
+/* Create the PAI extension 1 control block area.
+ * The PAI extension control block 1 is pointed to by lowcore
+ * address 0x1508 for each CPU. This control block is 512 bytes in size
+ * and requires a 512 byte boundary alignment.
+ */
+struct paiext_cb {		/* PAI extension 1 control block */
+	u64 header;		/* Not used */
+	u64 reserved1;
+	u64 acc;		/* Addr to analytics counter control block */
+	u8 reserved2[488];
+} __packed;
+
+struct paiext_map {
+	unsigned long *area;		/* Area for CPU to store counters */
+	struct pai_userdata *save;	/* Area to store non-zero counters */
+	enum paievt_mode mode;		/* Type of event */
+	unsigned int active_events;	/* # of PAI Extension users */
+	refcount_t refcnt;
+	struct perf_event *event;	/* Perf event for sampling */
+	struct paiext_cb *paiext_cb;	/* PAI extension control block area */
+};
+
+struct paiext_mapptr {
+	struct paiext_map *mapptr;
+};
+
+static struct paiext_root {		/* Anchor to per CPU data */
+	refcount_t refcnt;		/* Overall active events */
+	struct paiext_mapptr __percpu *mapptr;
+} paiext_root;
+
+/* Free per CPU data when the last event is removed. */
+static void paiext_root_free(void)
+{
+	if (refcount_dec_and_test(&paiext_root.refcnt)) {
+		free_percpu(paiext_root.mapptr);
+		paiext_root.mapptr = NULL;
+	}
+}
+
+/* On initialization of first event also allocate per CPU data dynamically.
+ * Start with an array of pointers, the array size is the maximum number of
+ * CPUs possible, which might be larger than the number of CPUs currently
+ * online.
+ */
+static int paiext_root_alloc(void)
+{
+	if (!refcount_inc_not_zero(&paiext_root.refcnt)) {
+		/* The memory is already zeroed. */
+		paiext_root.mapptr = alloc_percpu(struct paiext_mapptr);
+		if (!paiext_root.mapptr) {
+			/* Returning without refcnt adjustment is ok. The
+			 * error code is handled by paiext_alloc() which
+			 * decrements refcnt when an event can not be
+			 * created.
+			 */
+			return -ENOMEM;
+		}
+		refcount_set(&paiext_root.refcnt, 1);
+	}
+	return 0;
+}
+
+/* Protects against concurrent increment of sampler and counter member
+ * increments at the same time and prohibits concurrent execution of
+ * counting and sampling events.
+ * Ensures that analytics counter block is deallocated only when the
+ * sampling and counting on that cpu is zero.
+ * For details see paiext_alloc().
+ */
+static DEFINE_MUTEX(paiext_reserve_mutex);
+
+/* Free all memory allocated for event counting/sampling setup */
+static void paiext_free(struct paiext_mapptr *mp)
+{
+	kfree(mp->mapptr->area);
+	kfree(mp->mapptr->paiext_cb);
+	kvfree(mp->mapptr->save);
+	kfree(mp->mapptr);
+	mp->mapptr = NULL;
+}
+
+/* Release the PMU if event is the last perf event */
+static void paiext_event_destroy(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	struct paiext_map *cpump = mp->mapptr;
+
+	mutex_lock(&paiext_reserve_mutex);
+	cpump->event = NULL;
+	if (refcount_dec_and_test(&cpump->refcnt))	/* Last reference gone */
+		paiext_free(mp);
+	paiext_root_free();
+	mutex_unlock(&paiext_reserve_mutex);
+	debug_sprintf_event(paiext_dbg, 4, "%s cpu %d mapptr %p\n", __func__,
+			    event->cpu, mp->mapptr);
+
+}
+
+/* Used to avoid races in checking concurrent access of counting and
+ * sampling for pai_extension events.
+ *
+ * Only one instance of event pai_ext/NNPA_ALL/ for sampling is
+ * allowed and when this event is running, no counting event is allowed.
+ * Several counting events are allowed in parallel, but no sampling event
+ * is allowed while one (or more) counting events are running.
+ *
+ * This function is called in process context and it is safe to block.
+ * When the event initialization functions fails, no other call back will
+ * be invoked.
+ *
+ * Allocate the memory for the event.
+ */
+static int paiext_alloc(struct perf_event_attr *a, struct perf_event *event)
+{
+	struct paiext_mapptr *mp;
+	struct paiext_map *cpump;
+	int rc;
+
+	mutex_lock(&paiext_reserve_mutex);
+
+	rc = paiext_root_alloc();
+	if (rc)
+		goto unlock;
+
+	mp = per_cpu_ptr(paiext_root.mapptr, event->cpu);
+	cpump = mp->mapptr;
+	if (!cpump) {			/* Paiext_map allocated? */
+		rc = -ENOMEM;
+		cpump = kzalloc(sizeof(*cpump), GFP_KERNEL);
+		if (!cpump)
+			goto undo;
+
+		/* Allocate memory for counter area and counter extraction.
+		 * These are
+		 * - a 512 byte block and requires 512 byte boundary alignment.
+		 * - a 1KB byte block and requires 1KB boundary alignment.
+		 * Only the first counting event has to allocate the area.
+		 *
+		 * Note: This works with commit 59bb47985c1d by default.
+		 * Backporting this to kernels without this commit might
+		 * need adjustment.
+		 */
+		mp->mapptr = cpump;
+		cpump->area = kzalloc(PAIE1_CTRBLOCK_SZ, GFP_KERNEL);
+		cpump->paiext_cb = kzalloc(PAIE1_CB_SZ, GFP_KERNEL);
+		cpump->save = kvmalloc_array(paiext_cnt + 1,
+					     sizeof(struct pai_userdata),
+					     GFP_KERNEL);
+		if (!cpump->save || !cpump->area || !cpump->paiext_cb) {
+			paiext_free(mp);
+			goto undo;
+		}
+		refcount_set(&cpump->refcnt, 1);
+		cpump->mode = a->sample_period ? PAI_MODE_SAMPLING
+					       : PAI_MODE_COUNTING;
+	} else {
+		/* Multiple invocation, check what is active.
+		 * Supported are multiple counter events or only one sampling
+		 * event concurrently at any one time.
+		 */
+		if (cpump->mode == PAI_MODE_SAMPLING ||
+		    (cpump->mode == PAI_MODE_COUNTING && a->sample_period)) {
+			rc = -EBUSY;
+			goto undo;
+		}
+		refcount_inc(&cpump->refcnt);
+	}
+
+	rc = 0;
+	cpump->event = event;
+
+undo:
+	if (rc) {
+		/* Error in allocation of event, decrement anchor. Since
+		 * the event in not created, its destroy() function is never
+		 * invoked. Adjust the reference counter for the anchor.
+		 */
+		paiext_root_free();
+	}
+unlock:
+	mutex_unlock(&paiext_reserve_mutex);
+	/* If rc is non-zero, no increment of counter/sampler was done. */
+	return rc;
+}
+
+/* The PAI extension 1 control block supports up to 128 entries. Return
+ * the index within PAIE1_CB given the event number. Also validate event
+ * number.
+ */
+static int paiext_event_valid(struct perf_event *event)
+{
+	u64 cfg = event->attr.config;
+
+	if (cfg >= PAI_NNPA_BASE && cfg <= PAI_NNPA_BASE + paiext_cnt) {
+		/* Offset NNPA in paiext_cb */
+		event->hw.config_base = offsetof(struct paiext_cb, acc);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* Might be called on different CPU than the one the event is intended for. */
+static int paiext_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *a = &event->attr;
+	int rc;
+
+	/* PMU pai_ext registered as PERF_TYPE_RAW, check event type */
+	if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
+		return -ENOENT;
+	/* PAI extension event must be valid and in supported range */
+	rc = paiext_event_valid(event);
+	if (rc)
+		return rc;
+	/* Allow only CPU wide operation, no process context for now. */
+	if (event->hw.target || event->cpu == -1)
+		return -ENOENT;
+	/* Allow only event NNPA_ALL for sampling. */
+	if (a->sample_period && a->config != PAI_NNPA_BASE)
+		return -EINVAL;
+	/* Prohibit exclude_user event selection */
+	if (a->exclude_user)
+		return -EINVAL;
+
+	rc = paiext_alloc(a, event);
+	if (rc)
+		return rc;
+	event->hw.last_tag = 0;
+	event->destroy = paiext_event_destroy;
+
+	if (a->sample_period) {
+		a->sample_period = 1;
+		a->freq = 0;
+		/* Register for paicrypt_sched_task() to be called */
+		event->attach_state |= PERF_ATTACH_SCHED_CB;
+		/* Add raw data which are the memory mapped counters */
+		a->sample_type |= PERF_SAMPLE_RAW;
+		/* Turn off inheritance */
+		a->inherit = 0;
+	}
+
+	return 0;
+}
+
+static u64 paiext_getctr(struct paiext_map *cpump, int nr)
+{
+	return cpump->area[nr];
+}
+
+/* Read the counter values. Return value from location in buffer. For event
+ * NNPA_ALL sum up all events.
+ */
+static u64 paiext_getdata(struct perf_event *event)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	u64 sum = 0;
+	int i;
+
+	if (event->attr.config != PAI_NNPA_BASE)
+		return paiext_getctr(cpump, event->attr.config - PAI_NNPA_BASE);
+
+	for (i = 1; i <= paiext_cnt; i++)
+		sum += paiext_getctr(cpump, i);
+
+	return sum;
+}
+
+static u64 paiext_getall(struct perf_event *event)
+{
+	return paiext_getdata(event);
+}
+
+static void paiext_read(struct perf_event *event)
+{
+	u64 prev, new, delta;
+
+	prev = local64_read(&event->hw.prev_count);
+	new = paiext_getall(event);
+	local64_set(&event->hw.prev_count, new);
+	delta = new - prev;
+	local64_add(delta, &event->count);
+}
+
+static void paiext_start(struct perf_event *event, int flags)
+{
+	u64 sum;
+
+	if (event->hw.last_tag)
+		return;
+	event->hw.last_tag = 1;
+	sum = paiext_getall(event);		/* Get current value */
+	local64_set(&event->hw.prev_count, sum);
+	local64_set(&event->count, 0);
+}
+
+static int paiext_add(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (++cpump->active_events == 1) {
+		S390_lowcore.aicd = virt_to_phys(cpump->paiext_cb);
+		pcb->acc = virt_to_phys(cpump->area) | 0x1;
+		/* Enable CPU instruction lookup for PAIE1 control block */
+		__ctl_set_bit(0, 49);
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+	if (flags & PERF_EF_START && !event->attr.sample_period) {
+		/* Only counting needs initial counter value */
+		paiext_start(event, PERF_EF_RELOAD);
+	}
+	event->hw.state = 0;
+	if (event->attr.sample_period) {
+		cpump->event = event;
+		perf_sched_cb_inc(event->pmu);
+	}
+	return 0;
+}
+
+static void paiext_stop(struct perf_event *event, int flags)
+{
+	paiext_read(event);
+	event->hw.state = PERF_HES_STOPPED;
+}
+
+static void paiext_del(struct perf_event *event, int flags)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct paiext_cb *pcb = cpump->paiext_cb;
+
+	if (event->attr.sample_period)
+		perf_sched_cb_dec(event->pmu);
+	if (!event->attr.sample_period) {
+		/* Only counting needs to read counter */
+		paiext_stop(event, PERF_EF_UPDATE);
+	}
+	if (--cpump->active_events == 0) {
+		/* Disable CPU instruction lookup for PAIE1 control block */
+		__ctl_clear_bit(0, 49);
+		pcb->acc = 0;
+		S390_lowcore.aicd = 0;
+		debug_sprintf_event(paiext_dbg, 4, "%s 1508 %llx acc %llx\n",
+				    __func__, S390_lowcore.aicd, pcb->acc);
+	}
+}
+
+/* Create raw data and save it in buffer. Returns number of bytes copied.
+ * Saves only positive counter entries of the form
+ * 2 bytes: Number of counter
+ * 8 bytes: Value of counter
+ */
+static size_t paiext_copy(struct paiext_map *cpump)
+{
+	struct pai_userdata *userdata = cpump->save;
+	int i, outidx = 0;
+
+	for (i = 1; i <= paiext_cnt; i++) {
+		u64 val = paiext_getctr(cpump, i);
+
+		if (val) {
+			userdata[outidx].num = i;
+			userdata[outidx].value = val;
+			outidx++;
+		}
+	}
+	return outidx * sizeof(*userdata);
+}
+
+/* Write sample when one or more counters values are nonzero.
+ *
+ * Note: The function paiext_sched_task() and paiext_push_sample() are not
+ * invoked after function paiext_del() has been called because of function
+ * perf_sched_cb_dec().
+ * The function paiext_sched_task() and paiext_push_sample() are only
+ * called when sampling is active. Function perf_sched_cb_inc()
+ * has been invoked to install function paiext_sched_task() as call back
+ * to run at context switch time (see paiext_add()).
+ *
+ * This causes function perf_event_context_sched_out() and
+ * perf_event_context_sched_in() to check whether the PMU has installed an
+ * sched_task() callback. That callback is not active after paiext_del()
+ * returns and has deleted the event on that CPU.
+ */
+static int paiext_push_sample(void)
+{
+	struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr);
+	struct paiext_map *cpump = mp->mapptr;
+	struct perf_event *event = cpump->event;
+	struct perf_sample_data data;
+	struct perf_raw_record raw;
+	struct pt_regs regs;
+	size_t rawsize;
+	int overflow;
+
+	rawsize = paiext_copy(cpump);
+	if (!rawsize)			/* No incremented counters */
+		return 0;
+
+	/* Setup perf sample */
+	memset(&regs, 0, sizeof(regs));
+	memset(&raw, 0, sizeof(raw));
+	memset(&data, 0, sizeof(data));
+	perf_sample_data_init(&data, 0, event->hw.last_period);
+	if (event->attr.sample_type & PERF_SAMPLE_TID) {
+		data.tid_entry.pid = task_tgid_nr(current);
+		data.tid_entry.tid = task_pid_nr(current);
+	}
+	if (event->attr.sample_type & PERF_SAMPLE_TIME)
+		data.time = event->clock();
+	if (event->attr.sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
+		data.id = event->id;
+	if (event->attr.sample_type & PERF_SAMPLE_CPU)
+		data.cpu_entry.cpu = smp_processor_id();
+	if (event->attr.sample_type & PERF_SAMPLE_RAW) {
+		raw.frag.size = rawsize;
+		raw.frag.data = cpump->save;
+		perf_sample_save_raw_data(&data, &raw);
+	}
+
+	overflow = perf_event_overflow(event, &data, &regs);
+	perf_event_update_userpage(event);
+	/* Clear lowcore area after read */
+	memset(cpump->area, 0, PAIE1_CTRBLOCK_SZ);
+	return overflow;
+}
+
+/* Called on schedule-in and schedule-out. No access to event structure,
+ * but for sampling only event NNPA_ALL is allowed.
+ */
+static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
+{
+	/* We started with a clean page on event installation. So read out
+	 * results on schedule_out and if page was dirty, clear values.
+	 */
+	if (!sched_in)
+		paiext_push_sample();
+}
+
+/* Attribute definitions for pai extension1 interface. As with other CPU
+ * Measurement Facilities, there is one attribute per mapped counter.
+ * The number of mapped counters may vary per machine generation. Use
+ * the QUERY PROCESSOR ACTIVITY COUNTER INFORMATION (QPACI) instruction
+ * to determine the number of mapped counters. The instructions returns
+ * a positive number, which is the highest number of supported counters.
+ * All counters less than this number are also supported, there are no
+ * holes. A returned number of zero means no support for mapped counters.
+ *
+ * The identification of the counter is a unique number. The chosen range
+ * is 0x1800 + offset in mapped kernel page.
+ * All CPU Measurement Facility counters identifiers must be unique and
+ * the numbers from 0 to 496 are already used for the CPU Measurement
+ * Counter facility. Number 0x1000 to 0x103e are used for PAI cryptography
+ * counters.
+ * Numbers 0xb0000, 0xbc000 and 0xbd000 are already
+ * used for the CPU Measurement Sampling facility.
+ */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *paiext_format_attr[] = {
+	&format_attr_event.attr,
+	NULL,
+};
+
+static struct attribute_group paiext_events_group = {
+	.name = "events",
+	.attrs = NULL,			/* Filled in attr_event_init() */
+};
+
+static struct attribute_group paiext_format_group = {
+	.name = "format",
+	.attrs = paiext_format_attr,
+};
+
+static const struct attribute_group *paiext_attr_groups[] = {
+	&paiext_events_group,
+	&paiext_format_group,
+	NULL,
+};
+
+/* Performance monitoring unit for mapped counters */
+static struct pmu paiext = {
+	.task_ctx_nr  = perf_invalid_context,
+	.event_init   = paiext_event_init,
+	.add	      = paiext_add,
+	.del	      = paiext_del,
+	.start	      = paiext_start,
+	.stop	      = paiext_stop,
+	.read	      = paiext_read,
+	.sched_task   = paiext_sched_task,
+	.attr_groups  = paiext_attr_groups,
+};
+
+/* List of symbolic PAI extension 1 NNPA counter names. */
+static const char * const paiext_ctrnames[] = {
+	[0] = "NNPA_ALL",
+	[1] = "NNPA_ADD",
+	[2] = "NNPA_SUB",
+	[3] = "NNPA_MUL",
+	[4] = "NNPA_DIV",
+	[5] = "NNPA_MIN",
+	[6] = "NNPA_MAX",
+	[7] = "NNPA_LOG",
+	[8] = "NNPA_EXP",
+	[9] = "NNPA_IBM_RESERVED_9",
+	[10] = "NNPA_RELU",
+	[11] = "NNPA_TANH",
+	[12] = "NNPA_SIGMOID",
+	[13] = "NNPA_SOFTMAX",
+	[14] = "NNPA_BATCHNORM",
+	[15] = "NNPA_MAXPOOL2D",
+	[16] = "NNPA_AVGPOOL2D",
+	[17] = "NNPA_LSTMACT",
+	[18] = "NNPA_GRUACT",
+	[19] = "NNPA_CONVOLUTION",
+	[20] = "NNPA_MATMUL_OP",
+	[21] = "NNPA_MATMUL_OP_BCAST23",
+	[22] = "NNPA_SMALLBATCH",
+	[23] = "NNPA_LARGEDIM",
+	[24] = "NNPA_SMALLTENSOR",
+	[25] = "NNPA_1MFRAME",
+	[26] = "NNPA_2GFRAME",
+	[27] = "NNPA_ACCESSEXCEPT",
+};
+
+static void __init attr_event_free(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+	struct device_attribute *dap;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		dap = container_of(attrs[i], struct device_attribute, attr);
+		pa = container_of(dap, struct perf_pmu_events_attr, attr);
+		kfree(pa);
+	}
+	kfree(attrs);
+}
+
+static int __init attr_event_init_one(struct attribute **attrs, int num)
+{
+	struct perf_pmu_events_attr *pa;
+
+	pa = kzalloc(sizeof(*pa), GFP_KERNEL);
+	if (!pa)
+		return -ENOMEM;
+
+	sysfs_attr_init(&pa->attr.attr);
+	pa->id = PAI_NNPA_BASE + num;
+	pa->attr.attr.name = paiext_ctrnames[num];
+	pa->attr.attr.mode = 0444;
+	pa->attr.show = cpumf_events_sysfs_show;
+	pa->attr.store = NULL;
+	attrs[num] = &pa->attr.attr;
+	return 0;
+}
+
+/* Create PMU sysfs event attributes on the fly. */
+static int __init attr_event_init(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kmalloc_array(ARRAY_SIZE(paiext_ctrnames) + 1, sizeof(*attrs),
+			      GFP_KERNEL);
+	if (!attrs)
+		return -ENOMEM;
+	for (i = 0; i < ARRAY_SIZE(paiext_ctrnames); i++) {
+		ret = attr_event_init_one(attrs, i);
+		if (ret) {
+			attr_event_free(attrs, i - 1);
+			return ret;
+		}
+	}
+	attrs[i] = NULL;
+	paiext_events_group.attrs = attrs;
+	return 0;
+}
+
+static int __init paiext_init(void)
+{
+	struct qpaci_info_block ib;
+	int rc = -ENOMEM;
+
+	if (!test_facility(197))
+		return 0;
+
+	qpaci(&ib);
+	paiext_cnt = ib.num_nnpa;
+	if (paiext_cnt >= PAI_NNPA_MAXCTR)
+		paiext_cnt = PAI_NNPA_MAXCTR;
+	if (!paiext_cnt)
+		return 0;
+
+	rc = attr_event_init();
+	if (rc) {
+		pr_err("Creation of PMU " KMSG_COMPONENT " /sysfs failed\n");
+		return rc;
+	}
+
+	/* Setup s390dbf facility */
+	paiext_dbg = debug_register(KMSG_COMPONENT, 2, 256, 128);
+	if (!paiext_dbg) {
+		pr_err("Registration of s390dbf " KMSG_COMPONENT " failed\n");
+		rc = -ENOMEM;
+		goto out_init;
+	}
+	debug_register_view(paiext_dbg, &debug_sprintf_view);
+
+	rc = perf_pmu_register(&paiext, KMSG_COMPONENT, -1);
+	if (rc) {
+		pr_err("Registration of " KMSG_COMPONENT " PMU failed with "
+		       "rc=%i\n", rc);
+		goto out_pmu;
+	}
+
+	return 0;
+
+out_pmu:
+	debug_unregister_view(paiext_dbg, &debug_sprintf_view);
+	debug_unregister(paiext_dbg);
+out_init:
+	attr_event_free(paiext_events_group.attrs,
+			ARRAY_SIZE(paiext_ctrnames) + 1);
+	return rc;
+}
+
+device_initcall(paiext_init);
diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c
new file mode 100644
index 0000000000..6e9e5d5e92
--- /dev/null
+++ b/arch/s390/kernel/perf_regs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <linux/perf_regs.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/ptrace.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/types.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	freg_t fp;
+
+	if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15)
+		return regs->gprs[idx];
+
+	if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) {
+		if (!user_mode(regs))
+			return 0;
+
+		idx -= PERF_REG_S390_FP0;
+		fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx)
+				    : current->thread.fpu.fprs[idx];
+		return fp.ui;
+	}
+
+	if (idx == PERF_REG_S390_MASK)
+		return regs->psw.mask;
+	if (idx == PERF_REG_S390_PC)
+		return regs->psw.addr;
+
+	WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX);
+	return 0;
+}
+
+#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return PERF_SAMPLE_REGS_ABI_32;
+
+	return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs)
+{
+	/*
+	 * Use the regs from the first interruption and let
+	 * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()).
+	 *
+	 * Also save FPU registers for user-space tasks only.
+	 */
+	regs_user->regs = task_pt_regs(current);
+	if (user_mode(regs_user->regs))
+		save_fpu_regs();
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
new file mode 100644
index 0000000000..2580004177
--- /dev/null
+++ b/arch/s390/kernel/process.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file handles the architecture dependent parts of process handling.
+ *
+ *    Copyright IBM Corp. 1999, 2009
+ *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *		 Hartmut Penner <hp@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/elf-randomize.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/elfcore.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/tick.h>
+#include <linux/personality.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <linux/kprobes.h>
+#include <linux/random.h>
+#include <linux/export.h>
+#include <linux/init_task.h>
+#include <linux/entry-common.h>
+#include <linux/io.h>
+#include <asm/cpu_mf.h>
+#include <asm/processor.h>
+#include <asm/vtimer.h>
+#include <asm/exec.h>
+#include <asm/irq.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
+#include "entry.h"
+
+void ret_from_fork(void) asm("ret_from_fork");
+
+void __ret_from_fork(struct task_struct *prev, struct pt_regs *regs)
+{
+	void (*func)(void *arg);
+
+	schedule_tail(prev);
+
+	if (!user_mode(regs)) {
+		/* Kernel thread */
+		func = (void *)regs->gprs[9];
+		func((void *)regs->gprs[10]);
+	}
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
+	syscall_exit_to_user_mode(regs);
+}
+
+void flush_thread(void)
+{
+}
+
+void arch_setup_new_exec(void)
+{
+	if (S390_lowcore.current_pid != current->pid) {
+		S390_lowcore.current_pid = current->pid;
+		if (test_facility(40))
+			lpp(&S390_lowcore.lpp);
+	}
+}
+
+void arch_release_task_struct(struct task_struct *tsk)
+{
+	runtime_instr_release(tsk);
+	guarded_storage_release(tsk);
+}
+
+int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+{
+	/*
+	 * Save the floating-point or vector register state of the current
+	 * task and set the CIF_FPU flag to lazy restore the FPU register
+	 * state when returning to user space.
+	 */
+	save_fpu_regs();
+
+	memcpy(dst, src, arch_task_struct_size);
+	dst->thread.fpu.regs = dst->thread.fpu.fprs;
+
+	/*
+	 * Don't transfer over the runtime instrumentation or the guarded
+	 * storage control block pointers. These fields are cleared here instead
+	 * of in copy_thread() to avoid premature freeing of associated memory
+	 * on fork() failure. Wait to clear the RI flag because ->stack still
+	 * refers to the source thread.
+	 */
+	dst->thread.ri_cb = NULL;
+	dst->thread.gs_cb = NULL;
+	dst->thread.gs_bc_cb = NULL;
+
+	return 0;
+}
+
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
+{
+	unsigned long clone_flags = args->flags;
+	unsigned long new_stackp = args->stack;
+	unsigned long tls = args->tls;
+	struct fake_frame
+	{
+		struct stack_frame sf;
+		struct pt_regs childregs;
+	} *frame;
+
+	frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
+	p->thread.ksp = (unsigned long) frame;
+	/* Save access registers to new thread structure. */
+	save_access_regs(&p->thread.acrs[0]);
+	/* start new process with ar4 pointing to the correct address space */
+	/* Don't copy debug registers */
+	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+	p->thread.per_flags = 0;
+	/* Initialize per thread user and system timer values */
+	p->thread.user_timer = 0;
+	p->thread.guest_timer = 0;
+	p->thread.system_timer = 0;
+	p->thread.hardirq_timer = 0;
+	p->thread.softirq_timer = 0;
+	p->thread.last_break = 1;
+
+	frame->sf.back_chain = 0;
+	frame->sf.gprs[11 - 6] = (unsigned long)&frame->childregs;
+	frame->sf.gprs[12 - 6] = (unsigned long)p;
+	/* new return point is ret_from_fork */
+	frame->sf.gprs[14 - 6] = (unsigned long)ret_from_fork;
+	/* fake return stack for resume(), don't go back to schedule */
+	frame->sf.gprs[15 - 6] = (unsigned long)frame;
+
+	/* Store access registers to kernel stack of new process. */
+	if (unlikely(args->fn)) {
+		/* kernel thread */
+		memset(&frame->childregs, 0, sizeof(struct pt_regs));
+		frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+					    PSW_MASK_EXT | PSW_MASK_MCHECK;
+		frame->childregs.gprs[9] = (unsigned long)args->fn;
+		frame->childregs.gprs[10] = (unsigned long)args->fn_arg;
+		frame->childregs.orig_gpr2 = -1;
+		frame->childregs.last_break = 1;
+		return 0;
+	}
+	frame->childregs = *current_pt_regs();
+	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
+	frame->childregs.flags = 0;
+	if (new_stackp)
+		frame->childregs.gprs[15] = new_stackp;
+	/*
+	 * Clear the runtime instrumentation flag after the above childregs
+	 * copy. The CB pointer was already cleared in arch_dup_task_struct().
+	 */
+	frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
+	/* Set a new TLS ?  */
+	if (clone_flags & CLONE_SETTLS) {
+		if (is_compat_task()) {
+			p->thread.acrs[0] = (unsigned int)tls;
+		} else {
+			p->thread.acrs[0] = (unsigned int)(tls >> 32);
+			p->thread.acrs[1] = (unsigned int)tls;
+		}
+	}
+	/*
+	 * s390 stores the svc return address in arch_data when calling
+	 * sigreturn()/restart_syscall() via vdso. 1 means no valid address
+	 * stored.
+	 */
+	p->restart_block.arch_data = 1;
+	return 0;
+}
+
+void execve_tail(void)
+{
+	current->thread.fpu.fpc = 0;
+	asm volatile("sfpc %0" : : "d" (0));
+}
+
+unsigned long __get_wchan(struct task_struct *p)
+{
+	struct unwind_state state;
+	unsigned long ip = 0;
+
+	if (!task_stack_page(p))
+		return 0;
+
+	if (!try_get_task_stack(p))
+		return 0;
+
+	unwind_for_each_frame(&state, p, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK) {
+			ip = 0;
+			break;
+		}
+
+		ip = unwind_get_return_address(&state);
+		if (!ip)
+			break;
+
+		if (!in_sched_functions(ip))
+			break;
+	}
+
+	put_task_stack(p);
+	return ip;
+}
+
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_u32_below(PAGE_SIZE);
+	return sp & ~0xf;
+}
+
+static inline unsigned long brk_rnd(void)
+{
+	return (get_random_u16() & BRK_RND_MASK) << PAGE_SHIFT;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long ret;
+
+	ret = PAGE_ALIGN(mm->brk + brk_rnd());
+	return (ret > mm->brk) ? ret : mm->brk;
+}
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
new file mode 100644
index 0000000000..0a999c8226
--- /dev/null
+++ b/arch/s390/kernel/processor.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/stop_machine.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/sched/mm.h>
+#include <linux/init.h>
+#include <linux/seq_file.h>
+#include <linux/mm_types.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+
+#include <asm/diag.h>
+#include <asm/facility.h>
+#include <asm/elf.h>
+#include <asm/lowcore.h>
+#include <asm/param.h>
+#include <asm/sclp.h>
+#include <asm/smp.h>
+
+unsigned long __read_mostly elf_hwcap;
+char elf_platform[ELF_PLATFORM_SIZE];
+
+struct cpu_info {
+	unsigned int cpu_mhz_dynamic;
+	unsigned int cpu_mhz_static;
+	struct cpuid cpu_id;
+};
+
+static DEFINE_PER_CPU(struct cpu_info, cpu_info);
+static DEFINE_PER_CPU(int, cpu_relax_retry);
+
+static bool machine_has_cpu_mhz;
+
+void __init cpu_detect_mhz_feature(void)
+{
+	if (test_facility(34) && __ecag(ECAG_CPU_ATTRIBUTE, 0) != -1UL)
+		machine_has_cpu_mhz = true;
+}
+
+static void update_cpu_mhz(void *arg)
+{
+	unsigned long mhz;
+	struct cpu_info *c;
+
+	mhz = __ecag(ECAG_CPU_ATTRIBUTE, 0);
+	c = this_cpu_ptr(&cpu_info);
+	c->cpu_mhz_dynamic = mhz >> 32;
+	c->cpu_mhz_static = mhz & 0xffffffff;
+}
+
+void s390_update_cpu_mhz(void)
+{
+	s390_adjust_jiffies();
+	if (machine_has_cpu_mhz)
+		on_each_cpu(update_cpu_mhz, NULL, 0);
+}
+
+void notrace stop_machine_yield(const struct cpumask *cpumask)
+{
+	int cpu, this_cpu;
+
+	this_cpu = smp_processor_id();
+	if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
+		__this_cpu_write(cpu_relax_retry, 0);
+		cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
+		if (cpu >= nr_cpu_ids)
+			return;
+		if (arch_vcpu_is_preempted(cpu))
+			smp_yield_cpu(cpu);
+	}
+}
+
+/*
+ * cpu_init - initializes state that is per-CPU.
+ */
+void cpu_init(void)
+{
+	struct cpuid *id = this_cpu_ptr(&cpu_info.cpu_id);
+
+	get_cpu_id(id);
+	if (machine_has_cpu_mhz)
+		update_cpu_mhz(NULL);
+	mmgrab(&init_mm);
+	current->active_mm = &init_mm;
+	BUG_ON(current->mm);
+	enter_lazy_tlb(&init_mm, current);
+}
+
+static void show_facilities(struct seq_file *m)
+{
+	unsigned int bit;
+
+	seq_puts(m, "facilities      :");
+	for_each_set_bit_inv(bit, (long *)&stfle_fac_list, MAX_FACILITY_BIT)
+		seq_printf(m, " %d", bit);
+	seq_putc(m, '\n');
+}
+
+static void show_cpu_summary(struct seq_file *m, void *v)
+{
+	static const char *hwcap_str[] = {
+		[HWCAP_NR_ESAN3]	= "esan3",
+		[HWCAP_NR_ZARCH]	= "zarch",
+		[HWCAP_NR_STFLE]	= "stfle",
+		[HWCAP_NR_MSA]		= "msa",
+		[HWCAP_NR_LDISP]	= "ldisp",
+		[HWCAP_NR_EIMM]		= "eimm",
+		[HWCAP_NR_DFP]		= "dfp",
+		[HWCAP_NR_HPAGE]	= "edat",
+		[HWCAP_NR_ETF3EH]	= "etf3eh",
+		[HWCAP_NR_HIGH_GPRS]	= "highgprs",
+		[HWCAP_NR_TE]		= "te",
+		[HWCAP_NR_VXRS]		= "vx",
+		[HWCAP_NR_VXRS_BCD]	= "vxd",
+		[HWCAP_NR_VXRS_EXT]	= "vxe",
+		[HWCAP_NR_GS]		= "gs",
+		[HWCAP_NR_VXRS_EXT2]	= "vxe2",
+		[HWCAP_NR_VXRS_PDE]	= "vxp",
+		[HWCAP_NR_SORT]		= "sort",
+		[HWCAP_NR_DFLT]		= "dflt",
+		[HWCAP_NR_VXRS_PDE2]	= "vxp2",
+		[HWCAP_NR_NNPA]		= "nnpa",
+		[HWCAP_NR_PCI_MIO]	= "pcimio",
+		[HWCAP_NR_SIE]		= "sie",
+	};
+	int i, cpu;
+
+	BUILD_BUG_ON(ARRAY_SIZE(hwcap_str) != HWCAP_NR_MAX);
+	seq_printf(m, "vendor_id       : IBM/S390\n"
+		   "# processors    : %i\n"
+		   "bogomips per cpu: %lu.%02lu\n",
+		   num_online_cpus(), loops_per_jiffy/(500000/HZ),
+		   (loops_per_jiffy/(5000/HZ))%100);
+	seq_printf(m, "max thread id   : %d\n", smp_cpu_mtid);
+	seq_puts(m, "features\t: ");
+	for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
+		if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
+			seq_printf(m, "%s ", hwcap_str[i]);
+	seq_puts(m, "\n");
+	show_facilities(m);
+	show_cacheinfo(m);
+	for_each_online_cpu(cpu) {
+		struct cpuid *id = &per_cpu(cpu_info.cpu_id, cpu);
+
+		seq_printf(m, "processor %d: "
+			   "version = %02X,  "
+			   "identification = %06X,  "
+			   "machine = %04X\n",
+			   cpu, id->version, id->ident, id->machine);
+	}
+}
+
+static int __init setup_hwcaps(void)
+{
+	/* instructions named N3, "backported" to esa-mode */
+	elf_hwcap |= HWCAP_ESAN3;
+
+	/* z/Architecture mode active */
+	elf_hwcap |= HWCAP_ZARCH;
+
+	/* store-facility-list-extended */
+	if (test_facility(7))
+		elf_hwcap |= HWCAP_STFLE;
+
+	/* message-security assist */
+	if (test_facility(17))
+		elf_hwcap |= HWCAP_MSA;
+
+	/* long-displacement */
+	if (test_facility(19))
+		elf_hwcap |= HWCAP_LDISP;
+
+	/* extended-immediate */
+	elf_hwcap |= HWCAP_EIMM;
+
+	/* extended-translation facility 3 enhancement */
+	if (test_facility(22) && test_facility(30))
+		elf_hwcap |= HWCAP_ETF3EH;
+
+	/* decimal floating point & perform floating point operation */
+	if (test_facility(42) && test_facility(44))
+		elf_hwcap |= HWCAP_DFP;
+
+	/* huge page support */
+	if (MACHINE_HAS_EDAT1)
+		elf_hwcap |= HWCAP_HPAGE;
+
+	/* 64-bit register support for 31-bit processes */
+	elf_hwcap |= HWCAP_HIGH_GPRS;
+
+	/* transactional execution */
+	if (MACHINE_HAS_TE)
+		elf_hwcap |= HWCAP_TE;
+
+	/*
+	 * Vector extension can be disabled with the "novx" parameter.
+	 * Use MACHINE_HAS_VX instead of facility bit 129.
+	 */
+	if (MACHINE_HAS_VX) {
+		elf_hwcap |= HWCAP_VXRS;
+		if (test_facility(134))
+			elf_hwcap |= HWCAP_VXRS_BCD;
+		if (test_facility(135))
+			elf_hwcap |= HWCAP_VXRS_EXT;
+		if (test_facility(148))
+			elf_hwcap |= HWCAP_VXRS_EXT2;
+		if (test_facility(152))
+			elf_hwcap |= HWCAP_VXRS_PDE;
+		if (test_facility(192))
+			elf_hwcap |= HWCAP_VXRS_PDE2;
+	}
+
+	if (test_facility(150))
+		elf_hwcap |= HWCAP_SORT;
+
+	if (test_facility(151))
+		elf_hwcap |= HWCAP_DFLT;
+
+	if (test_facility(165))
+		elf_hwcap |= HWCAP_NNPA;
+
+	/* guarded storage */
+	if (MACHINE_HAS_GS)
+		elf_hwcap |= HWCAP_GS;
+
+	if (MACHINE_HAS_PCI_MIO)
+		elf_hwcap |= HWCAP_PCI_MIO;
+
+	/* virtualization support */
+	if (sclp.has_sief2)
+		elf_hwcap |= HWCAP_SIE;
+
+	return 0;
+}
+arch_initcall(setup_hwcaps);
+
+static int __init setup_elf_platform(void)
+{
+	struct cpuid cpu_id;
+
+	get_cpu_id(&cpu_id);
+	add_device_randomness(&cpu_id, sizeof(cpu_id));
+	switch (cpu_id.machine) {
+	default:	/* Use "z10" as default. */
+		strcpy(elf_platform, "z10");
+		break;
+	case 0x2817:
+	case 0x2818:
+		strcpy(elf_platform, "z196");
+		break;
+	case 0x2827:
+	case 0x2828:
+		strcpy(elf_platform, "zEC12");
+		break;
+	case 0x2964:
+	case 0x2965:
+		strcpy(elf_platform, "z13");
+		break;
+	case 0x3906:
+	case 0x3907:
+		strcpy(elf_platform, "z14");
+		break;
+	case 0x8561:
+	case 0x8562:
+		strcpy(elf_platform, "z15");
+		break;
+	case 0x3931:
+	case 0x3932:
+		strcpy(elf_platform, "z16");
+		break;
+	}
+	return 0;
+}
+arch_initcall(setup_elf_platform);
+
+static void show_cpu_topology(struct seq_file *m, unsigned long n)
+{
+#ifdef CONFIG_SCHED_TOPOLOGY
+	seq_printf(m, "physical id     : %d\n", topology_physical_package_id(n));
+	seq_printf(m, "core id         : %d\n", topology_core_id(n));
+	seq_printf(m, "book id         : %d\n", topology_book_id(n));
+	seq_printf(m, "drawer id       : %d\n", topology_drawer_id(n));
+	seq_printf(m, "dedicated       : %d\n", topology_cpu_dedicated(n));
+	seq_printf(m, "address         : %d\n", smp_cpu_get_cpu_address(n));
+	seq_printf(m, "siblings        : %d\n", cpumask_weight(topology_core_cpumask(n)));
+	seq_printf(m, "cpu cores       : %d\n", topology_booted_cores(n));
+#endif /* CONFIG_SCHED_TOPOLOGY */
+}
+
+static void show_cpu_ids(struct seq_file *m, unsigned long n)
+{
+	struct cpuid *id = &per_cpu(cpu_info.cpu_id, n);
+
+	seq_printf(m, "version         : %02X\n", id->version);
+	seq_printf(m, "identification  : %06X\n", id->ident);
+	seq_printf(m, "machine         : %04X\n", id->machine);
+}
+
+static void show_cpu_mhz(struct seq_file *m, unsigned long n)
+{
+	struct cpu_info *c = per_cpu_ptr(&cpu_info, n);
+
+	if (!machine_has_cpu_mhz)
+		return;
+	seq_printf(m, "cpu MHz dynamic : %d\n", c->cpu_mhz_dynamic);
+	seq_printf(m, "cpu MHz static  : %d\n", c->cpu_mhz_static);
+}
+
+/*
+ * show_cpuinfo - Get information on one CPU for use by procfs.
+ */
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+	unsigned long n = (unsigned long) v - 1;
+	unsigned long first = cpumask_first(cpu_online_mask);
+
+	if (n == first)
+		show_cpu_summary(m, v);
+	seq_printf(m, "\ncpu number      : %ld\n", n);
+	show_cpu_topology(m, n);
+	show_cpu_ids(m, n);
+	show_cpu_mhz(m, n);
+	return 0;
+}
+
+static inline void *c_update(loff_t *pos)
+{
+	if (*pos)
+		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	else
+		*pos = cpumask_first(cpu_online_mask);
+	return *pos < nr_cpu_ids ? (void *)*pos + 1 : NULL;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+	cpus_read_lock();
+	return c_update(pos);
+}
+
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	++*pos;
+	return c_update(pos);
+}
+
+static void c_stop(struct seq_file *m, void *v)
+{
+	cpus_read_unlock();
+}
+
+const struct seq_operations cpuinfo_op = {
+	.start	= c_start,
+	.next	= c_next,
+	.stop	= c_stop,
+	.show	= show_cpuinfo,
+};
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
new file mode 100644
index 0000000000..ea244a73ef
--- /dev/null
+++ b/arch/s390/kernel/ptrace.c
@@ -0,0 +1,1608 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Ptrace user space interface.
+ *
+ *    Copyright IBM Corp. 1999, 2010
+ *    Author(s): Denis Joseph Barrow
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include "asm/ptrace.h"
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/security.h>
+#include <linux/audit.h>
+#include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
+#include <linux/seccomp.h>
+#include <linux/compat.h>
+#include <trace/syscall.h>
+#include <asm/page.h>
+#include <linux/uaccess.h>
+#include <asm/unistd.h>
+#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
+#include <asm/facility.h>
+
+#include "entry.h"
+
+#ifdef CONFIG_COMPAT
+#include "compat_ptrace.h"
+#endif
+
+void update_cr_regs(struct task_struct *task)
+{
+	struct pt_regs *regs = task_pt_regs(task);
+	struct thread_struct *thread = &task->thread;
+	struct per_regs old, new;
+	union ctlreg0 cr0_old, cr0_new;
+	union ctlreg2 cr2_old, cr2_new;
+	int cr0_changed, cr2_changed;
+
+	__ctl_store(cr0_old.val, 0, 0);
+	__ctl_store(cr2_old.val, 2, 2);
+	cr0_new = cr0_old;
+	cr2_new = cr2_old;
+	/* Take care of the enable/disable of transactional execution. */
+	if (MACHINE_HAS_TE) {
+		/* Set or clear transaction execution TXC bit 8. */
+		cr0_new.tcx = 1;
+		if (task->thread.per_flags & PER_FLAG_NO_TE)
+			cr0_new.tcx = 0;
+		/* Set or clear transaction execution TDC bits 62 and 63. */
+		cr2_new.tdc = 0;
+		if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+			if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
+				cr2_new.tdc = 1;
+			else
+				cr2_new.tdc = 2;
+		}
+	}
+	/* Take care of enable/disable of guarded storage. */
+	if (MACHINE_HAS_GS) {
+		cr2_new.gse = 0;
+		if (task->thread.gs_cb)
+			cr2_new.gse = 1;
+	}
+	/* Load control register 0/2 iff changed */
+	cr0_changed = cr0_new.val != cr0_old.val;
+	cr2_changed = cr2_new.val != cr2_old.val;
+	if (cr0_changed)
+		__ctl_load(cr0_new.val, 0, 0);
+	if (cr2_changed)
+		__ctl_load(cr2_new.val, 2, 2);
+	/* Copy user specified PER registers */
+	new.control = thread->per_user.control;
+	new.start = thread->per_user.start;
+	new.end = thread->per_user.end;
+
+	/* merge TIF_SINGLE_STEP into user specified PER registers. */
+	if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+	    test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
+		if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
+			new.control |= PER_EVENT_BRANCH;
+		else
+			new.control |= PER_EVENT_IFETCH;
+		new.control |= PER_CONTROL_SUSPENSION;
+		new.control |= PER_EVENT_TRANSACTION_END;
+		if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+			new.control |= PER_EVENT_IFETCH;
+		new.start = 0;
+		new.end = -1UL;
+	}
+
+	/* Take care of the PER enablement bit in the PSW. */
+	if (!(new.control & PER_EVENT_MASK)) {
+		regs->psw.mask &= ~PSW_MASK_PER;
+		return;
+	}
+	regs->psw.mask |= PSW_MASK_PER;
+	__ctl_store(old, 9, 11);
+	if (memcmp(&new, &old, sizeof(struct per_regs)) != 0)
+		__ctl_load(new, 9, 11);
+}
+
+void user_enable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_BLOCK_STEP);
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	set_tsk_thread_flag(task, TIF_BLOCK_STEP);
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Clear all debugging related fields.
+ */
+void ptrace_disable(struct task_struct *task)
+{
+	memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+	memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	clear_tsk_thread_flag(task, TIF_PER_TRAP);
+	task->thread.per_flags = 0;
+}
+
+#define __ADDR_MASK 7
+
+static inline unsigned long __peek_user_per(struct task_struct *child,
+					    addr_t addr)
+{
+	if (addr == offsetof(struct per_struct_kernel, cr9))
+		/* Control bits of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == offsetof(struct per_struct_kernel, cr10))
+		/* Start address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == offsetof(struct per_struct_kernel, cr11))
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			-1UL : child->thread.per_user.end;
+	else if (addr == offsetof(struct per_struct_kernel, bits))
+		/* Single-step bit. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			(1UL << (BITS_PER_LONG - 1)) : 0;
+	else if (addr == offsetof(struct per_struct_kernel, starting_addr))
+		/* Start address of the user specified per set. */
+		return child->thread.per_user.start;
+	else if (addr == offsetof(struct per_struct_kernel, ending_addr))
+		/* End address of the user specified per set. */
+		return child->thread.per_user.end;
+	else if (addr == offsetof(struct per_struct_kernel, perc_atmid))
+		/* PER code, ATMID and AI of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.cause << (BITS_PER_LONG - 16);
+	else if (addr == offsetof(struct per_struct_kernel, address))
+		/* Address of the last PER trap */
+		return child->thread.per_event.address;
+	else if (addr == offsetof(struct per_struct_kernel, access_id))
+		/* Access id of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.paid << (BITS_PER_LONG - 8);
+	return 0;
+}
+
+/*
+ * Read the word at offset addr from the user area of a process. The
+ * trouble here is that the information is littered over different
+ * locations. The process registers are found on the kernel stack,
+ * the floating point stuff and the trace settings are stored in
+ * the task structure. In addition the different structures in
+ * struct user contain pad bytes that should be read as zeroes.
+ * Lovely...
+ */
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
+{
+	addr_t offset, tmp;
+
+	if (addr < offsetof(struct user, regs.acrs)) {
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		tmp = *(addr_t *)((addr_t) &task_pt_regs(child)->psw + addr);
+		if (addr == offsetof(struct user, regs.psw.mask)) {
+			/* Return a clean psw mask. */
+			tmp &= PSW_MASK_USER | PSW_MASK_RI;
+			tmp |= PSW_USER_BITS;
+		}
+
+	} else if (addr < offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct user, regs.acrs);
+		/*
+		 * Very special case: old & broken 64 bit gdb reading
+		 * from acrs[15]. Result is a 64 bit value. Read the
+		 * 32 bit acrs[15] value and shift it by 32. Sick...
+		 */
+		if (addr == offsetof(struct user, regs.acrs[15]))
+			tmp = ((unsigned long) child->thread.acrs[15]) << 32;
+		else
+			tmp = *(addr_t *)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = (addr_t) task_pt_regs(child)->orig_gpr2;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs)) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fpu.fpc;
+		tmp <<= BITS_PER_LONG - 32;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
+		else
+			tmp = *(addr_t *)
+			       ((addr_t) child->thread.fpu.fprs + offset);
+
+	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct user, regs.per_info);
+		tmp = __peek_user_per(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t tmp, mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= offsetof(struct user, regs.acrs) &&
+	    addr < offsetof(struct user, regs.orig_gpr2))
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	tmp = __peek_user(child, addr);
+	return put_user(tmp, (addr_t __user *) data);
+}
+
+static inline void __poke_user_per(struct task_struct *child,
+				   addr_t addr, addr_t data)
+{
+	/*
+	 * There are only three fields in the per_info struct that the
+	 * debugger user can write to.
+	 * 1) cr9: the debugger wants to set a new PER event mask
+	 * 2) starting_addr: the debugger wants to set a new starting
+	 *    address to use with the PER event mask.
+	 * 3) ending_addr: the debugger wants to set a new ending
+	 *    address to use with the PER event mask.
+	 * The user specified PER event mask and the start and end
+	 * addresses are used only if single stepping is not in effect.
+	 * Writes to any other field in per_info are ignored.
+	 */
+	if (addr == offsetof(struct per_struct_kernel, cr9))
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == offsetof(struct per_struct_kernel, starting_addr))
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == offsetof(struct per_struct_kernel, ending_addr))
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t offset;
+
+
+	if (addr < offsetof(struct user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == offsetof(struct user, regs.psw.mask)) {
+			unsigned long mask = PSW_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW_MASK_RI : 0;
+			if ((data ^ PSW_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW_MASK_ASC) == PSW_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			if ((data & PSW_MASK_EA) && !(data & PSW_MASK_BA))
+				/* Invalid addressing mode bits */
+				return -EINVAL;
+		}
+
+		if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+			addr == offsetof(struct user, regs.gprs[2])) {
+			struct pt_regs *regs = task_pt_regs(child);
+
+			regs->int_code = 0x20000 | (data & 0xffff);
+		}
+		*(addr_t *)((addr_t) &regs->psw + addr) = data;
+	} else if (addr < offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct user, regs.acrs);
+		/*
+		 * Very special case: old & broken 64 bit gdb writing
+		 * to acrs[15] with a 64 bit value. Ignore the lower
+		 * half of the value and write the upper 32 bit to
+		 * acrs[15]. Sick...
+		 */
+		if (addr == offsetof(struct user, regs.acrs[15]))
+			child->thread.acrs[15] = (unsigned int) (data >> 32);
+		else
+			*(addr_t *)((addr_t) &child->thread.acrs + offset) = data;
+
+	} else if (addr == offsetof(struct user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		task_pt_regs(child)->orig_gpr2 = data;
+
+	} else if (addr < offsetof(struct user, regs.fp_regs)) {
+		/*
+		 * prevent writes of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == offsetof(struct user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if ((unsigned int) data != 0 ||
+		    test_fp_ctl(data >> (BITS_PER_LONG - 32)))
+			return -EINVAL;
+		child->thread.fpu.fpc = data >> (BITS_PER_LONG - 32);
+
+	} else if (addr < offsetof(struct user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			*(addr_t *)((addr_t)
+				child->thread.fpu.vxrs + 2*offset) = data;
+		else
+			*(addr_t *)((addr_t)
+				child->thread.fpu.fprs + offset) = data;
+
+	} else if (addr < offsetof(struct user, regs.per_info) + sizeof(per_struct)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct user, regs.per_info);
+		__poke_user_per(child, addr, data);
+
+	}
+
+	return 0;
+}
+
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	addr_t mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell indeed...
+	 */
+	mask = __ADDR_MASK;
+	if (addr >= offsetof(struct user, regs.acrs) &&
+	    addr < offsetof(struct user, regs.orig_gpr2))
+		mask = 3;
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	return __poke_user(child, addr, data);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+		 unsigned long addr, unsigned long data)
+{
+	ptrace_area parea; 
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user(child, addr, data);
+			else {
+				addr_t utmp;
+				if (get_user(utmp,
+					     (addr_t __force __user *) data))
+					return -EFAULT;
+				ret = poke_user(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned long);
+			data += sizeof(unsigned long);
+			copied += sizeof(unsigned long);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		return put_user(child->thread.last_break, (unsigned long __user *)data);
+	case PTRACE_ENABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags &= ~PER_FLAG_NO_TE;
+		return 0;
+	case PTRACE_DISABLE_TE:
+		if (!MACHINE_HAS_TE)
+			return -EIO;
+		child->thread.per_flags |= PER_FLAG_NO_TE;
+		child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+		return 0;
+	case PTRACE_TE_ABORT_RAND:
+		if (!MACHINE_HAS_TE || (child->thread.per_flags & PER_FLAG_NO_TE))
+			return -EIO;
+		switch (data) {
+		case 0UL:
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND;
+			break;
+		case 1UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		case 2UL:
+			child->thread.per_flags |= PER_FLAG_TE_ABORT_RAND;
+			child->thread.per_flags &= ~PER_FLAG_TE_ABORT_RAND_TEND;
+			break;
+		default:
+			return -EINVAL;
+		}
+		return 0;
+	default:
+		return ptrace_request(child, request, addr, data);
+	}
+}
+
+#ifdef CONFIG_COMPAT
+/*
+ * Now the fun part starts... a 31 bit program running in the
+ * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
+ * PTRACE_PEEKDATA, PTRACE_POKETEXT and PTRACE_POKEDATA are easy
+ * to handle, the difference to the 64 bit versions of the requests
+ * is that the access is done in multiples of 4 byte instead of
+ * 8 bytes (sizeof(unsigned long) on 31/64 bit).
+ * The ugly part are PTRACE_PEEKUSR, PTRACE_PEEKUSR_AREA,
+ * PTRACE_POKEUSR and PTRACE_POKEUSR_AREA. If the traced program
+ * is a 31 bit program too, the content of struct user can be
+ * emulated. A 31 bit program peeking into the struct user of
+ * a 64 bit program is a no-no.
+ */
+
+/*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+					   addr_t addr)
+{
+	if (addr == offsetof(struct compat_per_struct_kernel, cr9))
+		/* Control bits of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == offsetof(struct compat_per_struct_kernel, cr10))
+		/* Start address of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == offsetof(struct compat_per_struct_kernel, cr11))
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW32_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == offsetof(struct compat_per_struct_kernel, bits))
+		/* Single-step bit. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0x80000000 : 0;
+	else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
+		/* Start address of the user specified per set. */
+		return (__u32) child->thread.per_user.start;
+	else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
+		/* End address of the user specified per set. */
+		return (__u32) child->thread.per_user.end;
+	else if (addr == offsetof(struct compat_per_struct_kernel, perc_atmid))
+		/* PER code, ATMID and AI of the last PER trap */
+		return (__u32) child->thread.per_event.cause << 16;
+	else if (addr == offsetof(struct compat_per_struct_kernel, address))
+		/* Address of the last PER trap */
+		return (__u32) child->thread.per_event.address;
+	else if (addr == offsetof(struct compat_per_struct_kernel, access_id))
+		/* Access id of the last PER trap */
+		return (__u32) child->thread.per_event.paid << 24;
+	return 0;
+}
+
+/*
+ * Same as peek_user but for a 31 bit program.
+ */
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
+{
+	addr_t offset;
+	__u32 tmp;
+
+	if (addr < offsetof(struct compat_user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw and gprs are stored on the stack
+		 */
+		if (addr == offsetof(struct compat_user, regs.psw.mask)) {
+			/* Fake a 31 bit psw mask. */
+			tmp = (__u32)(regs->psw.mask >> 32);
+			tmp &= PSW32_MASK_USER | PSW32_MASK_RI;
+			tmp |= PSW32_USER_BITS;
+		} else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
+			/* Fake a 31 bit psw address. */
+			tmp = (__u32) regs->psw.addr |
+				(__u32)(regs->psw.mask & PSW_MASK_BA);
+		} else {
+			/* gpr 0-15 */
+			tmp = *(__u32 *)((addr_t) &regs->psw + addr*2 + 4);
+		}
+	} else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct compat_user, regs.acrs);
+		tmp = *(__u32*)((addr_t) &child->thread.acrs + offset);
+
+	} else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		tmp = *(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4);
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
+		/*
+		 * prevent reads of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		tmp = 0;
+
+	} else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		tmp = child->thread.fpu.fpc;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
+		else
+			tmp = *(__u32 *)
+			       ((addr_t) child->thread.fpu.fprs + offset);
+
+	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct compat_user, regs.per_info);
+		tmp = __peek_user_per_compat(child, addr);
+
+	} else
+		tmp = 0;
+
+	return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	__u32 tmp;
+
+	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user) - 3)
+		return -EIO;
+
+	tmp = __peek_user_compat(child, addr);
+	return put_user(tmp, (__u32 __user *) data);
+}
+
+/*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+					  addr_t addr, __u32 data)
+{
+	if (addr == offsetof(struct compat_per_struct_kernel, cr9))
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == offsetof(struct compat_per_struct_kernel, starting_addr))
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == offsetof(struct compat_per_struct_kernel, ending_addr))
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
+ * Same as poke_user but for a 31 bit program.
+ */
+static int __poke_user_compat(struct task_struct *child,
+			      addr_t addr, addr_t data)
+{
+	__u32 tmp = (__u32) data;
+	addr_t offset;
+
+	if (addr < offsetof(struct compat_user, regs.acrs)) {
+		struct pt_regs *regs = task_pt_regs(child);
+		/*
+		 * psw, gprs, acrs and orig_gpr2 are stored on the stack
+		 */
+		if (addr == offsetof(struct compat_user, regs.psw.mask)) {
+			__u32 mask = PSW32_MASK_USER;
+
+			mask |= is_ri_task(child) ? PSW32_MASK_RI : 0;
+			/* Build a 64 bit psw mask from 31 bit mask. */
+			if ((tmp ^ PSW32_USER_BITS) & ~mask)
+				/* Invalid psw mask. */
+				return -EINVAL;
+			if ((data & PSW32_MASK_ASC) == PSW32_ASC_HOME)
+				/* Invalid address-space-control bits */
+				return -EINVAL;
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
+				(regs->psw.mask & PSW_MASK_BA) |
+				(__u64)(tmp & mask) << 32;
+		} else if (addr == offsetof(struct compat_user, regs.psw.addr)) {
+			/* Build a 64 bit psw address from 31 bit address. */
+			regs->psw.addr = (__u64) tmp & PSW32_ADDR_INSN;
+			/* Transfer 31 bit amode bit to psw mask. */
+			regs->psw.mask = (regs->psw.mask & ~PSW_MASK_BA) |
+				(__u64)(tmp & PSW32_ADDR_AMODE);
+		} else {
+			if (test_pt_regs_flag(regs, PIF_SYSCALL) &&
+				addr == offsetof(struct compat_user, regs.gprs[2])) {
+				struct pt_regs *regs = task_pt_regs(child);
+
+				regs->int_code = 0x20000 | (data & 0xffff);
+			}
+			/* gpr 0-15 */
+			*(__u32*)((addr_t) &regs->psw + addr*2 + 4) = tmp;
+		}
+	} else if (addr < offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * access registers are stored in the thread structure
+		 */
+		offset = addr - offsetof(struct compat_user, regs.acrs);
+		*(__u32*)((addr_t) &child->thread.acrs + offset) = tmp;
+
+	} else if (addr == offsetof(struct compat_user, regs.orig_gpr2)) {
+		/*
+		 * orig_gpr2 is stored on the kernel stack
+		 */
+		*(__u32*)((addr_t) &task_pt_regs(child)->orig_gpr2 + 4) = tmp;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs)) {
+		/*
+		 * prevent writess of padding hole between
+		 * orig_gpr2 and fp_regs on s390.
+		 */
+		return 0;
+
+	} else if (addr == offsetof(struct compat_user, regs.fp_regs.fpc)) {
+		/*
+		 * floating point control reg. is in the thread structure
+		 */
+		if (test_fp_ctl(tmp))
+			return -EINVAL;
+		child->thread.fpu.fpc = data;
+
+	} else if (addr < offsetof(struct compat_user, regs.fp_regs) + sizeof(s390_fp_regs)) {
+		/*
+		 * floating point regs. are either in child->thread.fpu
+		 * or the child->thread.fpu.vxrs array
+		 */
+		offset = addr - offsetof(struct compat_user, regs.fp_regs.fprs);
+		if (MACHINE_HAS_VX)
+			*(__u32 *)((addr_t)
+				child->thread.fpu.vxrs + 2*offset) = tmp;
+		else
+			*(__u32 *)((addr_t)
+				child->thread.fpu.fprs + offset) = tmp;
+
+	} else if (addr < offsetof(struct compat_user, regs.per_info) + sizeof(struct compat_per_struct_kernel)) {
+		/*
+		 * Handle access to the per_info structure.
+		 */
+		addr -= offsetof(struct compat_user, regs.per_info);
+		__poke_user_per_compat(child, addr, data);
+	}
+
+	return 0;
+}
+
+static int poke_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	if (!is_compat_task() || (addr & 3) ||
+	    addr > sizeof(struct compat_user) - 3)
+		return -EIO;
+
+	return __poke_user_compat(child, addr, data);
+}
+
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
+{
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
+	compat_ptrace_area parea;
+	int copied, ret;
+
+	switch (request) {
+	case PTRACE_PEEKUSR:
+		/* read the word at location addr in the USER area. */
+		return peek_user_compat(child, addr, data);
+
+	case PTRACE_POKEUSR:
+		/* write the word at location addr in the USER area */
+		return poke_user_compat(child, addr, data);
+
+	case PTRACE_PEEKUSR_AREA:
+	case PTRACE_POKEUSR_AREA:
+		if (copy_from_user(&parea, (void __force __user *) addr,
+							sizeof(parea)))
+			return -EFAULT;
+		addr = parea.kernel_addr;
+		data = parea.process_addr;
+		copied = 0;
+		while (copied < parea.len) {
+			if (request == PTRACE_PEEKUSR_AREA)
+				ret = peek_user_compat(child, addr, data);
+			else {
+				__u32 utmp;
+				if (get_user(utmp,
+					     (__u32 __force __user *) data))
+					return -EFAULT;
+				ret = poke_user_compat(child, addr, utmp);
+			}
+			if (ret)
+				return ret;
+			addr += sizeof(unsigned int);
+			data += sizeof(unsigned int);
+			copied += sizeof(unsigned int);
+		}
+		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		return put_user(child->thread.last_break, (unsigned int __user *)data);
+	}
+	return compat_ptrace_request(child, request, addr, data);
+}
+#endif
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 struct membuf to)
+{
+	unsigned pos;
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	for (pos = 0; pos < sizeof(s390_regs); pos += sizeof(long))
+		membuf_store(&to, __peek_user(target, pos));
+	return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const unsigned long *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const unsigned long  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+			   const struct user_regset *regset,
+			   struct membuf to)
+{
+	_s390_fp_regs fp_regs;
+
+	if (target == current)
+		save_fpu_regs();
+
+	fp_regs.fpc = target->thread.fpu.fpc;
+	fpregs_store(&fp_regs, &target->thread.fpu);
+
+	return membuf_write(&to, &fp_regs, sizeof(fp_regs));
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, const void *kbuf,
+			   const void __user *ubuf)
+{
+	int rc = 0;
+	freg_t fprs[__NUM_FPRS];
+
+	if (target == current)
+		save_fpu_regs();
+
+	if (MACHINE_HAS_VX)
+		convert_vx_to_fp(fprs, target->thread.fpu.vxrs);
+	else
+		memcpy(&fprs, target->thread.fpu.fprs, sizeof(fprs));
+
+	/* If setting FPC, must validate it first. */
+	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+		u32 ufpc[2] = { target->thread.fpu.fpc, 0 };
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ufpc,
+					0, offsetof(s390_fp_regs, fprs));
+		if (rc)
+			return rc;
+		if (ufpc[1] != 0 || test_fp_ctl(ufpc[0]))
+			return -EINVAL;
+		target->thread.fpu.fpc = ufpc[0];
+	}
+
+	if (rc == 0 && count > 0)
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					fprs, offsetof(s390_fp_regs, fprs), -1);
+	if (rc)
+		return rc;
+
+	if (MACHINE_HAS_VX)
+		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
+	else
+		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
+
+	return rc;
+}
+
+static int s390_last_break_get(struct task_struct *target,
+			       const struct user_regset *regset,
+			       struct membuf to)
+{
+	return membuf_store(&to, target->thread.last_break);
+}
+
+static int s390_last_break_set(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_tdb_get(struct task_struct *target,
+			const struct user_regset *regset,
+			struct membuf to)
+{
+	struct pt_regs *regs = task_pt_regs(target);
+	size_t size;
+
+	if (!(regs->int_code & 0x200))
+		return -ENODATA;
+	size = sizeof(target->thread.trap_tdb.data);
+	return membuf_write(&to, target->thread.trap_tdb.data, size);
+}
+
+static int s390_tdb_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+			     const struct user_regset *regset,
+			     struct membuf to)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = target->thread.fpu.vxrs[i].low;
+	return membuf_write(&to, vxrs, sizeof(vxrs));
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+			     const struct user_regset *regset,
+			     unsigned int pos, unsigned int count,
+			     const void *kbuf, const void __user *ubuf)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i, rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = target->thread.fpu.vxrs[i].low;
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+	if (rc == 0)
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			target->thread.fpu.vxrs[i].low = vxrs[i];
+
+	return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+			      const struct user_regset *regset,
+			      struct membuf to)
+{
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+	return membuf_write(&to, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
+			    __NUM_VXRS_HIGH * sizeof(__vector128));
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+			      const struct user_regset *regset,
+			      unsigned int pos, unsigned int count,
+			      const void *kbuf, const void __user *ubuf)
+{
+	int rc;
+
+	if (!MACHINE_HAS_VX)
+		return -ENODEV;
+	if (target == current)
+		save_fpu_regs();
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				target->thread.fpu.vxrs + __NUM_VXRS_LOW, 0, -1);
+	return rc;
+}
+
+static int s390_system_call_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	return membuf_store(&to, target->thread.system_call);
+}
+
+static int s390_system_call_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	unsigned int *data = &target->thread.system_call;
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(unsigned int));
+}
+
+static int s390_gs_cb_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  struct membuf to)
+{
+	struct gs_cb *data = target->thread.gs_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	if (target == current)
+		save_gs_cb(data);
+	return membuf_write(&to, data, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb gs_cb = { }, *data = NULL;
+	int rc;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!target->thread.gs_cb) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+	}
+	if (!target->thread.gs_cb)
+		gs_cb.gsd = 25;
+	else if (target == current)
+		save_gs_cb(&gs_cb);
+	else
+		gs_cb = *target->thread.gs_cb;
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&gs_cb, 0, sizeof(gs_cb));
+	if (rc) {
+		kfree(data);
+		return -EFAULT;
+	}
+	preempt_disable();
+	if (!target->thread.gs_cb)
+		target->thread.gs_cb = data;
+	*target->thread.gs_cb = gs_cb;
+	if (target == current) {
+		__ctl_set_bit(2, 4);
+		restore_gs_cb(target->thread.gs_cb);
+	}
+	preempt_enable();
+	return rc;
+}
+
+static int s390_gs_bc_get(struct task_struct *target,
+			  const struct user_regset *regset,
+			  struct membuf to)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+	return membuf_write(&to, data, sizeof(struct gs_cb));
+}
+
+static int s390_gs_bc_set(struct task_struct *target,
+			  const struct user_regset *regset,
+			  unsigned int pos, unsigned int count,
+			  const void *kbuf, const void __user *ubuf)
+{
+	struct gs_cb *data = target->thread.gs_bc_cb;
+
+	if (!MACHINE_HAS_GS)
+		return -ENODEV;
+	if (!data) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+		target->thread.gs_bc_cb = data;
+	}
+	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				  data, 0, sizeof(struct gs_cb));
+}
+
+static bool is_ri_cb_valid(struct runtime_instr_cb *cb)
+{
+	return (cb->rca & 0x1f) == 0 &&
+		(cb->roa & 0xfff) == 0 &&
+		(cb->rla & 0xfff) == 0xfff &&
+		cb->s == 1 &&
+		cb->k == 1 &&
+		cb->h == 0 &&
+		cb->reserved1 == 0 &&
+		cb->ps == 1 &&
+		cb->qs == 0 &&
+		cb->pc == 1 &&
+		cb->qc == 0 &&
+		cb->reserved2 == 0 &&
+		cb->reserved3 == 0 &&
+		cb->reserved4 == 0 &&
+		cb->reserved5 == 0 &&
+		cb->reserved6 == 0 &&
+		cb->reserved7 == 0 &&
+		cb->reserved8 == 0 &&
+		cb->rla >= cb->roa &&
+		cb->rca >= cb->roa &&
+		cb->rca <= cb->rla+1 &&
+		cb->m < 3;
+}
+
+static int s390_runtime_instr_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	struct runtime_instr_cb *data = target->thread.ri_cb;
+
+	if (!test_facility(64))
+		return -ENODEV;
+	if (!data)
+		return -ENODATA;
+
+	return membuf_write(&to, data, sizeof(struct runtime_instr_cb));
+}
+
+static int s390_runtime_instr_set(struct task_struct *target,
+				  const struct user_regset *regset,
+				  unsigned int pos, unsigned int count,
+				  const void *kbuf, const void __user *ubuf)
+{
+	struct runtime_instr_cb ri_cb = { }, *data = NULL;
+	int rc;
+
+	if (!test_facility(64))
+		return -ENODEV;
+
+	if (!target->thread.ri_cb) {
+		data = kzalloc(sizeof(*data), GFP_KERNEL);
+		if (!data)
+			return -ENOMEM;
+	}
+
+	if (target->thread.ri_cb) {
+		if (target == current)
+			store_runtime_instr_cb(&ri_cb);
+		else
+			ri_cb = *target->thread.ri_cb;
+	}
+
+	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+				&ri_cb, 0, sizeof(struct runtime_instr_cb));
+	if (rc) {
+		kfree(data);
+		return -EFAULT;
+	}
+
+	if (!is_ri_cb_valid(&ri_cb)) {
+		kfree(data);
+		return -EINVAL;
+	}
+	/*
+	 * Override access key in any case, since user space should
+	 * not be able to set it, nor should it care about it.
+	 */
+	ri_cb.key = PAGE_DEFAULT_KEY >> 4;
+	preempt_disable();
+	if (!target->thread.ri_cb)
+		target->thread.ri_cb = data;
+	*target->thread.ri_cb = ri_cb;
+	if (target == current)
+		load_runtime_instr_cb(target->thread.ri_cb);
+	preempt_enable();
+
+	return 0;
+}
+
+static const struct user_regset s390_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_regs_get,
+		.set = s390_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(unsigned int),
+		.align = sizeof(unsigned int),
+		.regset_get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_last_break_get,
+		.set = s390_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.regset_get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.regset_get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_BC,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_bc_get,
+		.set = s390_gs_bc_set,
+	},
+	{
+		.core_note_type = NT_S390_RI_CB,
+		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_runtime_instr_get,
+		.set = s390_runtime_instr_set,
+	},
+};
+
+static const struct user_regset_view user_s390_view = {
+	.name = "s390x",
+	.e_machine = EM_S390,
+	.regsets = s390_regsets,
+	.n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+				const struct user_regset *regset,
+				struct membuf to)
+{
+	unsigned n;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	for (n = 0; n < sizeof(s390_compat_regs); n += sizeof(compat_ulong_t))
+		membuf_store(&to, __peek_user_compat(target, n));
+	return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user_compat(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			compat_ulong_t word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user_compat(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_compat_regs_high_get(struct task_struct *target,
+				     const struct user_regset *regset,
+				     struct membuf to)
+{
+	compat_ulong_t *gprs_high;
+	int i;
+
+	gprs_high = (compat_ulong_t *)task_pt_regs(target)->gprs;
+	for (i = 0; i < NUM_GPRS; i++, gprs_high += 2)
+		membuf_store(&to, *gprs_high);
+	return 0;
+}
+
+static int s390_compat_regs_high_set(struct task_struct *target,
+				     const struct user_regset *regset,
+				     unsigned int pos, unsigned int count,
+				     const void *kbuf, const void __user *ubuf)
+{
+	compat_ulong_t *gprs_high;
+	int rc = 0;
+
+	gprs_high = (compat_ulong_t *)
+		&task_pt_regs(target)->gprs[pos / sizeof(compat_ulong_t)];
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*gprs_high = *k++;
+			*gprs_high += 2;
+			count -= sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			*gprs_high = word;
+			*gprs_high += 2;
+			count -= sizeof(*u);
+		}
+	}
+
+	return rc;
+}
+
+static int s390_compat_last_break_get(struct task_struct *target,
+				      const struct user_regset *regset,
+				      struct membuf to)
+{
+	compat_ulong_t last_break = target->thread.last_break;
+
+	return membuf_store(&to, (unsigned long)last_break);
+}
+
+static int s390_compat_last_break_set(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      const void *kbuf, const void __user *ubuf)
+{
+	return 0;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_compat_regs_get,
+		.set = s390_compat_regs_set,
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+	{
+		.core_note_type = NT_S390_SYSTEM_CALL,
+		.n = 1,
+		.size = sizeof(compat_uint_t),
+		.align = sizeof(compat_uint_t),
+		.regset_get = s390_system_call_get,
+		.set = s390_system_call_set,
+	},
+	{
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.regset_get = s390_compat_last_break_get,
+		.set = s390_compat_last_break_set,
+	},
+	{
+		.core_note_type = NT_S390_TDB,
+		.n = 1,
+		.size = 256,
+		.align = 1,
+		.regset_get = s390_tdb_get,
+		.set = s390_tdb_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_LOW,
+		.n = __NUM_VXRS_LOW,
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_vxrs_low_get,
+		.set = s390_vxrs_low_set,
+	},
+	{
+		.core_note_type = NT_S390_VXRS_HIGH,
+		.n = __NUM_VXRS_HIGH,
+		.size = sizeof(__vector128),
+		.align = sizeof(__vector128),
+		.regset_get = s390_vxrs_high_get,
+		.set = s390_vxrs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_HIGH_GPRS,
+		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.regset_get = s390_compat_regs_high_get,
+		.set = s390_compat_regs_high_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_CB,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_cb_get,
+		.set = s390_gs_cb_set,
+	},
+	{
+		.core_note_type = NT_S390_GS_BC,
+		.n = sizeof(struct gs_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_gs_bc_get,
+		.set = s390_gs_bc_set,
+	},
+	{
+		.core_note_type = NT_S390_RI_CB,
+		.n = sizeof(struct runtime_instr_cb) / sizeof(__u64),
+		.size = sizeof(__u64),
+		.align = sizeof(__u64),
+		.regset_get = s390_runtime_instr_get,
+		.set = s390_runtime_instr_set,
+	},
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+	.name = "s390",
+	.e_machine = EM_S390,
+	.regsets = s390_compat_regsets,
+	.n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return &user_s390_compat_view;
+#endif
+	return &user_s390_view;
+}
+
+static const char *gpr_names[NUM_GPRS] = {
+	"r0", "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+};
+
+unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return 0;
+	return regs->gprs[offset];
+}
+
+int regs_query_register_offset(const char *name)
+{
+	unsigned long offset;
+
+	if (!name || *name != 'r')
+		return -EINVAL;
+	if (kstrtoul(name + 1, 10, &offset))
+		return -EINVAL;
+	if (offset >= NUM_GPRS)
+		return -EINVAL;
+	return offset;
+}
+
+const char *regs_query_register_name(unsigned int offset)
+{
+	if (offset >= NUM_GPRS)
+		return NULL;
+	return gpr_names[offset];
+}
+
+static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+	unsigned long ksp = kernel_stack_pointer(regs);
+
+	return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:pt_regs which contains kernel stack pointer.
+ * @n:stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specifined by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+	unsigned long addr;
+
+	addr = kernel_stack_pointer(regs) + n * sizeof(long);
+	if (!regs_within_kernel_stack(regs, addr))
+		return 0;
+	return *(unsigned long *)addr;
+}
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
new file mode 100644
index 0000000000..88087a32eb
--- /dev/null
+++ b/arch/s390/kernel/reipl.S
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp 2000, 2011
+ *    Author(s): Holger Smolinski <Holger.Smolinski@de.ibm.com>,
+ *		 Denis Joseph Barrow,
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
+#include <asm/sigp.h>
+
+	GEN_BR_THUNK %r9
+
+#
+# Issue "store status" for the current CPU to its prefix page
+# and call passed function afterwards
+#
+# r2 = Function to be called after store status
+# r3 = Parameter for function
+#
+SYM_CODE_START(store_status)
+	/* Save register one and load save area base */
+	stg	%r1,__LC_SAVE_AREA_RESTART
+	/* General purpose registers */
+	lghi	%r1,__LC_GPREGS_SAVE_AREA
+	stmg	%r0,%r15,0(%r1)
+	mvc	8(8,%r1),__LC_SAVE_AREA_RESTART
+	/* Control registers */
+	lghi	%r1,__LC_CREGS_SAVE_AREA
+	stctg	%c0,%c15,0(%r1)
+	/* Access registers */
+	lghi	%r1,__LC_AREGS_SAVE_AREA
+	stam	%a0,%a15,0(%r1)
+	/* Floating point registers */
+	lghi	%r1,__LC_FPREGS_SAVE_AREA
+	std	%f0, 0x00(%r1)
+	std	%f1, 0x08(%r1)
+	std	%f2, 0x10(%r1)
+	std	%f3, 0x18(%r1)
+	std	%f4, 0x20(%r1)
+	std	%f5, 0x28(%r1)
+	std	%f6, 0x30(%r1)
+	std	%f7, 0x38(%r1)
+	std	%f8, 0x40(%r1)
+	std	%f9, 0x48(%r1)
+	std	%f10,0x50(%r1)
+	std	%f11,0x58(%r1)
+	std	%f12,0x60(%r1)
+	std	%f13,0x68(%r1)
+	std	%f14,0x70(%r1)
+	std	%f15,0x78(%r1)
+	/* Floating point control register */
+	lghi	%r1,__LC_FP_CREG_SAVE_AREA
+	stfpc	0(%r1)
+	/* CPU timer */
+	lghi	%r1,__LC_CPU_TIMER_SAVE_AREA
+	stpt	0(%r1)
+	/* Store prefix register */
+	lghi	%r1,__LC_PREFIX_SAVE_AREA
+	stpx	0(%r1)
+	/* Clock comparator - seven bytes */
+	lghi	%r1,__LC_CLOCK_COMP_SAVE_AREA
+	larl	%r4,clkcmp
+	stckc	0(%r4)
+	mvc	1(7,%r1),1(%r4)
+	/* Program status word */
+	lghi	%r1,__LC_PSW_SAVE_AREA
+	epsw	%r4,%r5
+	st	%r4,0(%r1)
+	st	%r5,4(%r1)
+	stg	%r2,8(%r1)
+	lgr	%r9,%r2
+	lgr	%r2,%r3
+	BR_EX	%r9
+SYM_CODE_END(store_status)
+
+	.section .bss
+	.balign	8
+SYM_DATA_LOCAL(clkcmp,	.quad 0x0000000000000000)
+	.previous
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
new file mode 100644
index 0000000000..0ae297c82a
--- /dev/null
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2005
+ *
+ * Author(s): Rolf Adelsberger
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/page.h>
+#include <asm/sigp.h>
+
+/*
+ * moves the new kernel to its destination...
+ * %r2 = pointer to first kimage_entry_t
+ * %r3 = start address - where to jump to after the job is done...
+ * %r4 = subcode
+ *
+ * %r5 will be used as temp. storage
+ * %r6 holds the destination address
+ * %r7 = PAGE_SIZE
+ * %r8 holds the source address
+ * %r9 = PAGE_SIZE
+ *
+ * 0xf000 is a page_mask
+ */
+
+	.text
+SYM_CODE_START(relocate_kernel)
+	basr	%r13,0		# base address
+.base:
+	lghi	%r7,PAGE_SIZE	# load PAGE_SIZE in r7
+	lghi	%r9,PAGE_SIZE	# load PAGE_SIZE in r9
+	lg	%r5,0(%r2)	# read another word for indirection page
+	aghi	%r2,8		# increment pointer
+	tml	%r5,0x1		# is it a destination page?
+	je	.indir_check	# NO, goto "indir_check"
+	lgr	%r6,%r5		# r6 = r5
+	nill	%r6,0xf000	# mask it out and...
+	j	.base		# ...next iteration
+.indir_check:
+	tml	%r5,0x2		# is it a indirection page?
+	je	.done_test	# NO, goto "done_test"
+	nill	%r5,0xf000	# YES, mask out,
+	lgr	%r2,%r5		# move it into the right register,
+	j	.base		# and read next...
+.done_test:
+	tml	%r5,0x4		# is it the done indicator?
+	je	.source_test	# NO! Well, then it should be the source indicator...
+	j	.done		# ok, lets finish it here...
+.source_test:
+	tml	%r5,0x8		# it should be a source indicator...
+	je	.base		# NO, ignore it...
+	lgr	%r8,%r5		# r8 = r5
+	nill	%r8,0xf000	# masking
+0:	mvcle	%r6,%r8,0x0	# copy PAGE_SIZE bytes from r8 to r6 - pad with 0
+	jo	0b
+	j	.base
+.done:
+	lgr	%r0,%r4		# subcode
+	cghi	%r3,0
+	je	.diag
+	la	%r4,load_psw-.base(%r13)	# load psw-address into the register
+	o	%r3,4(%r4)	# or load address into psw
+	st	%r3,4(%r4)
+	mvc	0(8,%r0),0(%r4)	# copy psw to absolute address 0
+.diag:
+	diag	%r0,%r0,0x308
+SYM_CODE_END(relocate_kernel)
+
+	.balign	8
+SYM_DATA_START_LOCAL(load_psw)
+	.long	0x00080000,0x80000000
+SYM_DATA_END_LABEL(load_psw, SYM_L_LOCAL, relocate_kernel_end)
+	.balign 8
+SYM_DATA(relocate_kernel_len, .quad relocate_kernel_end - relocate_kernel)
diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c
new file mode 100644
index 0000000000..af10e6bdd3
--- /dev/null
+++ b/arch/s390/kernel/rethook.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include "rethook.h"
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+	rh->ret_addr = regs->gprs[14];
+	rh->frame = regs->gprs[15];
+
+	/* Replace the return addr with trampoline addr */
+	regs->gprs[14] = (unsigned long)&arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+void arch_rethook_fixup_return(struct pt_regs *regs,
+			       unsigned long correct_ret_addr)
+{
+	/* Replace fake return address with real one. */
+	regs->gprs[14] = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+	return rethook_trampoline_handler(regs, regs->gprs[15]);
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/* assembler function that handles the rethook must not be probed itself */
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h
new file mode 100644
index 0000000000..32f069eed3
--- /dev/null
+++ b/arch/s390/kernel/rethook.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __S390_RETHOOK_H
+#define __S390_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#endif
diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c
new file mode 100644
index 0000000000..1788a5454b
--- /dev/null
+++ b/arch/s390/kernel/runtime_instr.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2012
+ * Author(s): Jan Glauber <jang@linux.vnet.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel_stat.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/runtime_instr.h>
+#include <asm/cpu_mf.h>
+#include <asm/irq.h>
+
+#include "entry.h"
+
+/* empty control block to disable RI by loading it */
+struct runtime_instr_cb runtime_instr_empty_cb;
+
+void runtime_instr_release(struct task_struct *tsk)
+{
+	kfree(tsk->thread.ri_cb);
+}
+
+static void disable_runtime_instr(void)
+{
+	struct task_struct *task = current;
+	struct pt_regs *regs;
+
+	if (!task->thread.ri_cb)
+		return;
+	regs = task_pt_regs(task);
+	preempt_disable();
+	load_runtime_instr_cb(&runtime_instr_empty_cb);
+	kfree(task->thread.ri_cb);
+	task->thread.ri_cb = NULL;
+	preempt_enable();
+
+	/*
+	 * Make sure the RI bit is deleted from the PSW. If the user did not
+	 * switch off RI before the system call the process will get a
+	 * specification exception otherwise.
+	 */
+	regs->psw.mask &= ~PSW_MASK_RI;
+}
+
+static void init_runtime_instr_cb(struct runtime_instr_cb *cb)
+{
+	cb->rla = 0xfff;
+	cb->s = 1;
+	cb->k = 1;
+	cb->ps = 1;
+	cb->pc = 1;
+	cb->key = PAGE_DEFAULT_KEY >> 4;
+	cb->v = 1;
+}
+
+/*
+ * The signum argument is unused. In older kernels it was used to
+ * specify a real-time signal. For backwards compatibility user space
+ * should pass a valid real-time signal number (the signum argument
+ * was checked in older kernels).
+ */
+SYSCALL_DEFINE2(s390_runtime_instr, int, command, int, signum)
+{
+	struct runtime_instr_cb *cb;
+
+	if (!test_facility(64))
+		return -EOPNOTSUPP;
+
+	if (command == S390_RUNTIME_INSTR_STOP) {
+		disable_runtime_instr();
+		return 0;
+	}
+
+	if (command != S390_RUNTIME_INSTR_START)
+		return -EINVAL;
+
+	if (!current->thread.ri_cb) {
+		cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+		if (!cb)
+			return -ENOMEM;
+	} else {
+		cb = current->thread.ri_cb;
+		memset(cb, 0, sizeof(*cb));
+	}
+
+	init_runtime_instr_cb(cb);
+
+	/* now load the control block to make it available */
+	preempt_disable();
+	current->thread.ri_cb = cb;
+	load_runtime_instr_cb(cb);
+	preempt_enable();
+	return 0;
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
new file mode 100644
index 0000000000..de6ad0fb23
--- /dev/null
+++ b/arch/s390/kernel/setup.c
@@ -0,0 +1,1012 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/setup.c"
+ *    Copyright (C) 1995, Linus Torvalds
+ */
+
+/*
+ * This file handles the architecture-dependent parts of initialization
+ */
+
+#define KMSG_COMPONENT "setup"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/random.h>
+#include <linux/user.h>
+#include <linux/tty.h>
+#include <linux/ioport.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/root_dev.h>
+#include <linux/console.h>
+#include <linux/kernel_stat.h>
+#include <linux/dma-map-ops.h>
+#include <linux/device.h>
+#include <linux/notifier.h>
+#include <linux/pfn.h>
+#include <linux/ctype.h>
+#include <linux/reboot.h>
+#include <linux/topology.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
+#include <linux/compat.h>
+#include <linux/start_kernel.h>
+#include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
+
+#include <asm/archrandom.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
+#include <asm/facility.h>
+#include <asm/smp.h>
+#include <asm/mmu_context.h>
+#include <asm/cpcmd.h>
+#include <asm/abs_lowcore.h>
+#include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/ebcdic.h>
+#include <asm/diag.h>
+#include <asm/os_info.h>
+#include <asm/sclp.h>
+#include <asm/stacktrace.h>
+#include <asm/sysinfo.h>
+#include <asm/numa.h>
+#include <asm/alternative.h>
+#include <asm/nospec-branch.h>
+#include <asm/physmem_info.h>
+#include <asm/maccess.h>
+#include <asm/uv.h>
+#include <asm/asm-offsets.h>
+#include "entry.h"
+
+/*
+ * Machine setup..
+ */
+unsigned int console_mode = 0;
+EXPORT_SYMBOL(console_mode);
+
+unsigned int console_devno = -1;
+EXPORT_SYMBOL(console_devno);
+
+unsigned int console_irq = -1;
+EXPORT_SYMBOL(console_irq);
+
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .amode31 section.
+ */
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
+struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
+
+/*
+ * Control registers CR2, CR5 and CR15 are initialized with addresses
+ * of tables that must be placed below 2G which is handled by the AMODE31
+ * sections.
+ * Because the AMODE31 sections are relocated below 2G at startup,
+ * the content of control registers CR2, CR5 and CR15 must be updated
+ * with new addresses after the relocation. The initial initialization of
+ * control registers occurs in head64.S and then gets updated again after AMODE31
+ * relocation. We must access the relevant AMODE31 tables indirectly via
+ * pointers placed in the .amode31.refs linker section. Those pointers get
+ * updated automatically during AMODE31 relocation and always contain a valid
+ * address within AMODE31 sections.
+ */
+
+static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
+
+static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
+	[1] = 0xffffffffffffffff
+};
+
+static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0,
+	0x80000000, 0, 0, 0
+};
+
+static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
+	0, 0, 0x89000000, 0,
+	0, 0, 0x8a000000, 0
+};
+
+static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
+static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
+static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
+static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
+
+unsigned long __bootdata_preserved(max_mappable);
+unsigned long __bootdata(ident_map_size);
+struct physmem_info __bootdata(physmem_info);
+
+unsigned long __bootdata_preserved(__kaslr_offset);
+int __bootdata_preserved(__kaslr_enabled);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
+u64 __bootdata_preserved(stfle_fac_list[16]);
+EXPORT_SYMBOL(stfle_fac_list);
+u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+struct oldmem_data __bootdata_preserved(oldmem_data);
+
+unsigned long VMALLOC_START;
+EXPORT_SYMBOL(VMALLOC_START);
+
+unsigned long VMALLOC_END;
+EXPORT_SYMBOL(VMALLOC_END);
+
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+unsigned long vmemmap_size;
+
+unsigned long MODULES_VADDR;
+unsigned long MODULES_END;
+
+/* An array with a pointer to the lowcore of every CPU. */
+struct lowcore *lowcore_ptr[NR_CPUS];
+EXPORT_SYMBOL(lowcore_ptr);
+
+DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
+
+/*
+ * The Write Back bit position in the physaddr is given by the SLPC PCI.
+ * Leaving the mask zero always uses write through which is safe
+ */
+unsigned long mio_wb_bit_mask __ro_after_init;
+
+/*
+ * This is set up by the setup-routine at boot-time
+ * for S390 need to find out, what we have to setup
+ * using address 0x10400 ...
+ */
+
+#include <asm/setup.h>
+
+/*
+ * condev= and conmode= setup parameter.
+ */
+
+static int __init condev_setup(char *str)
+{
+	int vdev;
+
+	vdev = simple_strtoul(str, &str, 0);
+	if (vdev >= 0 && vdev < 65536) {
+		console_devno = vdev;
+		console_irq = -1;
+	}
+	return 1;
+}
+
+__setup("condev=", condev_setup);
+
+static void __init set_preferred_console(void)
+{
+	if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
+		add_preferred_console("ttyS", 0, NULL);
+	else if (CONSOLE_IS_3270)
+		add_preferred_console("tty3270", 0, NULL);
+	else if (CONSOLE_IS_VT220)
+		add_preferred_console("ttysclp", 0, NULL);
+	else if (CONSOLE_IS_HVC)
+		add_preferred_console("hvc", 0, NULL);
+}
+
+static int __init conmode_setup(char *str)
+{
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+	if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
+                SET_CONSOLE_SCLP;
+#endif
+#if defined(CONFIG_TN3215_CONSOLE)
+	if (!strcmp(str, "3215"))
+		SET_CONSOLE_3215;
+#endif
+#if defined(CONFIG_TN3270_CONSOLE)
+	if (!strcmp(str, "3270"))
+		SET_CONSOLE_3270;
+#endif
+	set_preferred_console();
+        return 1;
+}
+
+__setup("conmode=", conmode_setup);
+
+static void __init conmode_default(void)
+{
+	char query_buffer[1024];
+	char *ptr;
+
+        if (MACHINE_IS_VM) {
+		cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
+		console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
+		ptr = strstr(query_buffer, "SUBCHANNEL =");
+		console_irq = simple_strtoul(ptr + 13, NULL, 16);
+		cpcmd("QUERY TERM", query_buffer, 1024, NULL);
+		ptr = strstr(query_buffer, "CONMODE");
+		/*
+		 * Set the conmode to 3215 so that the device recognition 
+		 * will set the cu_type of the console to 3215. If the
+		 * conmode is 3270 and we don't set it back then both
+		 * 3215 and the 3270 driver will try to access the console
+		 * device (3215 as console and 3270 as normal tty).
+		 */
+		cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
+		if (ptr == NULL) {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+			return;
+		}
+		if (str_has_prefix(ptr + 8, "3270")) {
+#if defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		} else if (str_has_prefix(ptr + 8, "3215")) {
+#if defined(CONFIG_TN3215_CONSOLE)
+			SET_CONSOLE_3215;
+#elif defined(CONFIG_TN3270_CONSOLE)
+			SET_CONSOLE_3270;
+#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+			SET_CONSOLE_SCLP;
+#endif
+		}
+	} else if (MACHINE_IS_KVM) {
+		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
+			SET_CONSOLE_VT220;
+		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
+			SET_CONSOLE_SCLP;
+		else
+			SET_CONSOLE_HVC;
+	} else {
+#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
+		SET_CONSOLE_SCLP;
+#endif
+	}
+}
+
+#ifdef CONFIG_CRASH_DUMP
+static void __init setup_zfcpdump(void)
+{
+	if (!is_ipl_type_dump())
+		return;
+	if (oldmem_data.start)
+		return;
+	strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
+	console_loglevel = 2;
+}
+#else
+static inline void setup_zfcpdump(void) {}
+#endif /* CONFIG_CRASH_DUMP */
+
+ /*
+ * Reboot, halt and power_off stubs. They just call _machine_restart,
+ * _machine_halt or _machine_power_off. 
+ */
+
+void machine_restart(char *command)
+{
+	if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_restart(command);
+}
+
+void machine_halt(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_halt();
+}
+
+void machine_power_off(void)
+{
+	if (!in_interrupt() || oops_in_progress)
+		/*
+		 * Only unblank the console if we are called in enabled
+		 * context or a bust_spinlocks cleared the way for us.
+		 */
+		console_unblank();
+	_machine_power_off();
+}
+
+/*
+ * Dummy power off function.
+ */
+void (*pm_power_off)(void) = machine_power_off;
+EXPORT_SYMBOL_GPL(pm_power_off);
+
+void *restart_stack;
+
+unsigned long stack_alloc(void)
+{
+#ifdef CONFIG_VMAP_STACK
+	void *ret;
+
+	ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+			     NUMA_NO_NODE, __builtin_return_address(0));
+	kmemleak_not_leak(ret);
+	return (unsigned long)ret;
+#else
+	return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+#endif
+}
+
+void stack_free(unsigned long stack)
+{
+#ifdef CONFIG_VMAP_STACK
+	vfree((void *) stack);
+#else
+	free_pages(stack, THREAD_SIZE_ORDER);
+#endif
+}
+
+void __init __noreturn arch_call_rest_init(void)
+{
+	smp_reinit_ipl_cpu();
+	rest_init();
+}
+
+static unsigned long __init stack_alloc_early(void)
+{
+	unsigned long stack;
+
+	stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
+	if (!stack) {
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, THREAD_SIZE, THREAD_SIZE);
+	}
+	return stack;
+}
+
+static void __init setup_lowcore(void)
+{
+	struct lowcore *lc, *abs_lc;
+
+	/*
+	 * Setup lowcore for boot cpu
+	 */
+	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
+	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
+	if (!lc)
+		panic("%s: Failed to allocate %zu bytes align=%zx\n",
+		      __func__, sizeof(*lc), sizeof(*lc));
+
+	lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+	lc->restart_psw.addr = __pa(restart_int_handler);
+	lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->external_new_psw.addr = (unsigned long) ext_int_handler;
+	lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->svc_new_psw.addr = (unsigned long) system_call;
+	lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
+	lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
+	lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
+	lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
+	lc->io_new_psw.addr = (unsigned long) io_int_handler;
+	lc->clock_comparator = clock_comparator_max;
+	lc->current_task = (unsigned long)&init_task;
+	lc->lpp = LPP_MAGIC;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->preempt_count = S390_lowcore.preempt_count;
+	nmi_alloc_mcesa_early(&lc->mcesad);
+	lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
+	lc->exit_timer = S390_lowcore.exit_timer;
+	lc->user_timer = S390_lowcore.user_timer;
+	lc->system_timer = S390_lowcore.system_timer;
+	lc->steal_timer = S390_lowcore.steal_timer;
+	lc->last_update_timer = S390_lowcore.last_update_timer;
+	lc->last_update_clock = S390_lowcore.last_update_clock;
+	/*
+	 * Allocate the global restart stack which is the same for
+	 * all CPUs in case *one* of them does a PSW restart.
+	 */
+	restart_stack = (void *)(stack_alloc_early() + STACK_INIT_OFFSET);
+	lc->mcck_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->async_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->nodat_stack = stack_alloc_early() + STACK_INIT_OFFSET;
+	lc->kernel_stack = S390_lowcore.kernel_stack;
+	/*
+	 * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
+	 * restart data to the absolute zero lowcore. This is necessary if
+	 * PSW restart is done on an offline CPU that has lowcore zero.
+	 */
+	lc->restart_stack = (unsigned long) restart_stack;
+	lc->restart_fn = (unsigned long) do_restart;
+	lc->restart_data = 0;
+	lc->restart_source = -1U;
+	__ctl_store(lc->cregs_save_area, 0, 15);
+	lc->spinlock_lockval = arch_spin_lockval(0);
+	lc->spinlock_index = 0;
+	arch_spin_lock_setup(0);
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
+	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = S390_lowcore.user_asce;
+
+	abs_lc = get_abs_lowcore();
+	abs_lc->restart_stack = lc->restart_stack;
+	abs_lc->restart_fn = lc->restart_fn;
+	abs_lc->restart_data = lc->restart_data;
+	abs_lc->restart_source = lc->restart_source;
+	abs_lc->restart_psw = lc->restart_psw;
+	abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+	memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+	abs_lc->program_new_psw = lc->program_new_psw;
+	abs_lc->mcesad = lc->mcesad;
+	put_abs_lowcore(abs_lc);
+
+	set_prefix(__pa(lc));
+	lowcore_ptr[0] = lc;
+	if (abs_lowcore_map(0, lowcore_ptr[0], false))
+		panic("Couldn't setup absolute lowcore");
+}
+
+static struct resource code_resource = {
+	.name  = "Kernel code",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource data_resource = {
+	.name = "Kernel data",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource bss_resource = {
+	.name = "Kernel bss",
+	.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
+};
+
+static struct resource __initdata *standard_resources[] = {
+	&code_resource,
+	&data_resource,
+	&bss_resource,
+};
+
+static void __init setup_resources(void)
+{
+	struct resource *res, *std_res, *sub_res;
+	phys_addr_t start, end;
+	int j;
+	u64 i;
+
+	code_resource.start = (unsigned long) _text;
+	code_resource.end = (unsigned long) _etext - 1;
+	data_resource.start = (unsigned long) _etext;
+	data_resource.end = (unsigned long) _edata - 1;
+	bss_resource.start = (unsigned long) __bss_start;
+	bss_resource.end = (unsigned long) __bss_stop - 1;
+
+	for_each_mem_range(i, &start, &end) {
+		res = memblock_alloc(sizeof(*res), 8);
+		if (!res)
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(*res), 8);
+		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
+
+		res->name = "System RAM";
+		res->start = start;
+		/*
+		 * In memblock, end points to the first byte after the
+		 * range while in resources, end points to the last byte in
+		 * the range.
+		 */
+		res->end = end - 1;
+		request_resource(&iomem_resource, res);
+
+		for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
+			std_res = standard_resources[j];
+			if (std_res->start < res->start ||
+			    std_res->start > res->end)
+				continue;
+			if (std_res->end > res->end) {
+				sub_res = memblock_alloc(sizeof(*sub_res), 8);
+				if (!sub_res)
+					panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+					      __func__, sizeof(*sub_res), 8);
+				*sub_res = *std_res;
+				sub_res->end = res->end;
+				std_res->start = res->end + 1;
+				request_resource(res, sub_res);
+			} else {
+				request_resource(res, std_res);
+			}
+		}
+	}
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Re-add removed crash kernel memory as reserved memory. This makes
+	 * sure it will be mapped with the identity mapping and struct pages
+	 * will be created, so it can be resized later on.
+	 * However add it later since the crash kernel resource should not be
+	 * part of the System RAM resource.
+	 */
+	if (crashk_res.end) {
+		memblock_add_node(crashk_res.start, resource_size(&crashk_res),
+				  0, MEMBLOCK_NONE);
+		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
+		insert_resource(&iomem_resource, &crashk_res);
+	}
+#endif
+}
+
+static void __init setup_memory_end(void)
+{
+	max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
+	pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
+}
+
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from the area
+ * [0 - crashkernel memory size] is set offline - it will be exchanged with
+ * the crashkernel memory region when kdump is triggered. The crashkernel
+ * memory region can never get offlined (pages are unmovable).
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+			      unsigned long action, void *data)
+{
+	struct memory_notify *arg = data;
+
+	if (action != MEM_GOING_OFFLINE)
+		return NOTIFY_OK;
+	if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+		return NOTIFY_BAD;
+	return NOTIFY_OK;
+}
+
+static struct notifier_block kdump_mem_nb = {
+	.notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+	unsigned long start, end;
+	struct reserved_range *range;
+
+	for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
+		memblock_reserve(start, end - start);
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+	unsigned long long crash_base, crash_size;
+	phys_addr_t low, high;
+	int rc;
+
+	rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
+			       &crash_base);
+
+	crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
+	crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
+	if (rc || crash_size == 0)
+		return;
+
+	if (memblock.memory.regions[0].size < crash_size) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"first memory chunk must be at least crashkernel size");
+		return;
+	}
+
+	low = crash_base ?: oldmem_data.start;
+	high = low + crash_size;
+	if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
+		/* The crashkernel fits into OLDMEM, reuse OLDMEM */
+		crash_base = low;
+	} else {
+		/* Find suitable area in free memory */
+		low = max_t(unsigned long, crash_size, sclp.hsa_size);
+		high = crash_base ? crash_base + crash_size : ULONG_MAX;
+
+		if (crash_base && crash_base < low) {
+			pr_info("crashkernel reservation failed: %s\n",
+				"crash_base too low");
+			return;
+		}
+		low = crash_base ?: low;
+		crash_base = memblock_phys_alloc_range(crash_size,
+						       KEXEC_CRASH_MEM_ALIGN,
+						       low, high);
+	}
+
+	if (!crash_base) {
+		pr_info("crashkernel reservation failed: %s\n",
+			"no suitable area found");
+		return;
+	}
+
+	if (register_memory_notifier(&kdump_mem_nb)) {
+		memblock_phys_free(crash_base, crash_size);
+		return;
+	}
+
+	if (!oldmem_data.start && MACHINE_IS_VM)
+		diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+	crashk_res.start = crash_base;
+	crashk_res.end = crash_base + crash_size - 1;
+	memblock_remove(crash_base, crash_size);
+	pr_info("Reserving %lluMB of memory at %lluMB "
+		"for crashkernel (System RAM: %luMB)\n",
+		crash_size >> 20, crash_base >> 20,
+		(unsigned long)memblock.memory.total_size >> 20);
+	os_info_crashkernel_add(crash_base, crash_size);
+#endif
+}
+
+/*
+ * Reserve the initrd from being used by memblock
+ */
+static void __init reserve_initrd(void)
+{
+	unsigned long addr, size;
+
+	if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
+		return;
+	initrd_start = (unsigned long)__va(addr);
+	initrd_end = initrd_start + size;
+	memblock_reserve(addr, size);
+}
+
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+	if (ipl_cert_list_addr)
+		memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
+
+static void __init reserve_physmem_info(void)
+{
+	unsigned long addr, size;
+
+	if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+		memblock_reserve(addr, size);
+}
+
+static void __init free_physmem_info(void)
+{
+	unsigned long addr, size;
+
+	if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
+		memblock_phys_free(addr, size);
+}
+
+static void __init memblock_add_physmem_info(void)
+{
+	unsigned long start, end;
+	int i;
+
+	pr_debug("physmem info source: %s (%hhd)\n",
+		 get_physmem_info_source(), physmem_info.info_source);
+	/* keep memblock lists close to the kernel */
+	memblock_set_bottom_up(true);
+	for_each_physmem_usable_range(i, &start, &end)
+		memblock_add(start, end - start);
+	for_each_physmem_online_range(i, &start, &end)
+		memblock_physmem_add(start, end - start);
+	memblock_set_bottom_up(false);
+	memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
+}
+
+/*
+ * Reserve memory used for lowcore/command line/kernel image.
+ */
+static void __init reserve_kernel(void)
+{
+	memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+	memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
+	memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
+	memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
+	memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
+	memblock_reserve(__pa(_stext), _end - _stext);
+}
+
+static void __init setup_memory(void)
+{
+	phys_addr_t start, end;
+	u64 i;
+
+	/*
+	 * Init storage key for present memory
+	 */
+	for_each_mem_range(i, &start, &end)
+		storage_key_init_range(start, end);
+
+	psw_set_key(PAGE_DEFAULT_KEY);
+}
+
+static void __init relocate_amode31_section(void)
+{
+	unsigned long amode31_size = __eamode31 - __samode31;
+	long amode31_offset, *ptr;
+
+	amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
+	pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+
+	/* Move original AMODE31 section to the new one */
+	memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
+	/* Zero out the old AMODE31 section to catch invalid accesses within it */
+	memset(__samode31, 0, amode31_size);
+
+	/* Update all AMODE31 region references */
+	for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
+		*ptr += amode31_offset;
+}
+
+/* This must be called after AMODE31 relocation */
+static void __init setup_cr(void)
+{
+	union ctlreg2 cr2;
+	union ctlreg5 cr5;
+	union ctlreg15 cr15;
+
+	__ctl_duct[1] = (unsigned long)__ctl_aste;
+	__ctl_duct[2] = (unsigned long)__ctl_aste;
+	__ctl_duct[4] = (unsigned long)__ctl_duald;
+
+	/* Update control registers CR2, CR5 and CR15 */
+	__ctl_store(cr2.val, 2, 2);
+	__ctl_store(cr5.val, 5, 5);
+	__ctl_store(cr15.val, 15, 15);
+	cr2.ducto = (unsigned long)__ctl_duct >> 6;
+	cr5.pasteo = (unsigned long)__ctl_duct >> 6;
+	cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
+	__ctl_load(cr2.val, 2, 2);
+	__ctl_load(cr5.val, 5, 5);
+	__ctl_load(cr15.val, 15, 15);
+}
+
+/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+	struct sysinfo_3_2_2 *vmms;
+
+	vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!vmms)
+		panic("Failed to allocate memory for sysinfo structure\n");
+	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
+	memblock_free(vmms, PAGE_SIZE);
+
+	if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+		static_branch_enable(&s390_arch_random_available);
+}
+
+/*
+ * Find the correct size for the task_struct. This depends on
+ * the size of the struct fpu at the end of the thread_struct
+ * which is embedded in the task_struct.
+ */
+static void __init setup_task_size(void)
+{
+	int task_size = sizeof(struct task_struct);
+
+	if (!MACHINE_HAS_VX) {
+		task_size -= sizeof(__vector128) * __NUM_VXRS;
+		task_size += sizeof(freg_t) * __NUM_FPRS;
+	}
+	arch_task_struct_size = task_size;
+}
+
+/*
+ * Issue diagnose 318 to set the control program name and
+ * version codes.
+ */
+static void __init setup_control_program_code(void)
+{
+	union diag318_info diag318_info = {
+		.cpnc = CPNC_LINUX,
+		.cpvc = 0,
+	};
+
+	if (!sclp.has_diag318)
+		return;
+
+	diag_stat_inc(DIAG_STAT_X318);
+	asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
+}
+
+/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+	struct ipl_rb_component_entry *ptr, *end;
+	char *str;
+
+	if (!early_ipl_comp_list_addr)
+		return;
+	if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
+		pr_info("Linux is running with Secure-IPL enabled\n");
+	else
+		pr_info("Linux is running with Secure-IPL disabled\n");
+	ptr = __va(early_ipl_comp_list_addr);
+	end = (void *) ptr + early_ipl_comp_list_size;
+	pr_info("The IPL report contains the following components:\n");
+	while (ptr < end) {
+		if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+			if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+				str = "signed, verified";
+			else
+				str = "signed, verification failed";
+		} else {
+			str = "not signed";
+		}
+		pr_info("%016llx - %016llx (%s)\n",
+			ptr->addr, ptr->addr + ptr->len, str);
+		ptr++;
+	}
+}
+
+/*
+ * Setup function called from init/main.c just after the banner
+ * was printed.
+ */
+
+void __init setup_arch(char **cmdline_p)
+{
+        /*
+         * print what head.S has found out about the machine
+         */
+	if (MACHINE_IS_VM)
+		pr_info("Linux is running as a z/VM "
+			"guest operating system in 64-bit mode\n");
+	else if (MACHINE_IS_KVM)
+		pr_info("Linux is running under KVM in 64-bit mode\n");
+	else if (MACHINE_IS_LPAR)
+		pr_info("Linux is running natively in 64-bit mode\n");
+	else
+		pr_info("Linux is running as a guest in 64-bit mode\n");
+
+	log_component_list();
+
+	/* Have one command line that is parsed and saved in /proc/cmdline */
+	/* boot_command_line has been already set up in early.c */
+	*cmdline_p = boot_command_line;
+
+        ROOT_DEV = Root_RAM0;
+
+	setup_initial_init_mm(_text, _etext, _edata, _end);
+
+	if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
+		nospec_auto_detect();
+
+	jump_label_init();
+	parse_early_param();
+#ifdef CONFIG_CRASH_DUMP
+	/* Deactivate elfcorehdr= kernel parameter */
+	elfcorehdr_addr = ELFCORE_ADDR_MAX;
+#endif
+
+	os_info_init();
+	setup_ipl();
+	setup_task_size();
+	setup_control_program_code();
+
+	/* Do some memory reservations *before* memory is added to memblock */
+	reserve_pgtables();
+	reserve_kernel();
+	reserve_initrd();
+	reserve_certificate_list();
+	reserve_physmem_info();
+	memblock_set_current_limit(ident_map_size);
+	memblock_allow_resize();
+
+	/* Get information about *all* installed memory */
+	memblock_add_physmem_info();
+
+	free_physmem_info();
+	setup_memory_end();
+	memblock_dump_all();
+	setup_memory();
+
+	relocate_amode31_section();
+	setup_cr();
+	setup_uv();
+	dma_contiguous_reserve(ident_map_size);
+	vmcp_cma_reserve();
+	if (MACHINE_HAS_EDAT2)
+		hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
+
+	reserve_crashkernel();
+#ifdef CONFIG_CRASH_DUMP
+	/*
+	 * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
+	 * Therefore CPU and device initialization should be done afterwards.
+	 */
+	smp_save_dump_secondary_cpus();
+#endif
+
+	setup_resources();
+	setup_lowcore();
+	smp_fill_possible_mask();
+	cpu_detect_mhz_feature();
+        cpu_init();
+	numa_setup();
+	smp_detect_cpus();
+	topology_init_early();
+
+	if (test_facility(193))
+		static_branch_enable(&cpu_has_bear);
+
+	/*
+	 * Create kernel page tables.
+	 */
+        paging_init();
+
+	/*
+	 * After paging_init created the kernel page table, the new PSWs
+	 * in lowcore can now run with DAT enabled.
+	 */
+#ifdef CONFIG_CRASH_DUMP
+	smp_save_dump_ipl_cpu();
+#endif
+
+        /* Setup default console */
+	conmode_default();
+	set_preferred_console();
+
+	apply_alternative_instructions();
+	if (IS_ENABLED(CONFIG_EXPOLINE))
+		nospec_init_branches();
+
+	/* Setup zfcp/nvme dump support */
+	setup_zfcpdump();
+
+	/* Add system specific data to the random pool */
+	setup_randomness();
+}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
new file mode 100644
index 0000000000..d63557d386
--- /dev/null
+++ b/arch/s390/kernel/signal.c
@@ -0,0 +1,534 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 1999, 2006
+ *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *    Based on Intel version
+ * 
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/entry-common.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/tty.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/ucontext.h>
+#include <linux/uaccess.h>
+#include <asm/lowcore.h>
+#include <asm/switch_to.h>
+#include <asm/vdso.h>
+#include "entry.h"
+
+/*
+ * Layout of an old-style signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| struct sigcontext			|
+ *	|	oldmask				|
+ *	|	_sigregs *			|
+ *	-----------------------------------------
+ *	| _sigregs with				|
+ *	|	_s390_regs_common		|
+ *	|	_s390_fp_regs			|
+ *	-----------------------------------------
+ *	| int signo				|
+ *	-----------------------------------------
+ *	| _sigregs_ext with			|
+ *	|	gprs_high 64 byte (opt)		|
+ *	|	vxrs_low 128 byte (opt)		|
+ *	|	vxrs_high 256 byte (opt)	|
+ *	|	reserved 128 byte (opt)		|
+ *	-----------------------------------------
+ *	| __u16 svc_insn			|
+ *	-----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	struct sigcontext sc;
+	_sigregs sregs;
+	int signo;
+	_sigregs_ext sregs_ext;
+	__u16 svc_insn;		/* Offset of svc_insn is NOT fixed! */
+};
+
+/*
+ * Layout of an rt signal-frame:
+ *	-----------------------------------------
+ *	| save area (_SIGNAL_FRAMESIZE)		|
+ *	-----------------------------------------
+ *	| svc __NR_rt_sigreturn 2 byte		|
+ *	-----------------------------------------
+ *	| struct siginfo			|
+ *	-----------------------------------------
+ *	| struct ucontext_extended with		|
+ *	|	unsigned long uc_flags		|
+ *	|	struct ucontext *uc_link	|
+ *	|	stack_t uc_stack		|
+ *	|	_sigregs uc_mcontext with	|
+ *	|		_s390_regs_common	|
+ *	|		_s390_fp_regs		|
+ *	|	sigset_t uc_sigmask		|
+ *	|	_sigregs_ext uc_mcontext_ext	|
+ *	|		gprs_high 64 byte (opt)	|
+ *	|		vxrs_low 128 byte (opt)	|
+ *	|		vxrs_high 256 byte (opt)|
+ *	|		reserved 128 byte (opt)	|
+ *	-----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
+{
+	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
+	__u16 svc_insn;
+	struct siginfo info;
+	struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+	save_access_regs(current->thread.acrs);
+	save_fpu_regs();
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+	restore_access_regs(current->thread.acrs);
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Copy a 'clean' PSW mask to the user to avoid leaking
+	   information about whether PER is currently on.  */
+	user_sregs.regs.psw.mask = PSW_USER_BITS |
+		(regs->psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	user_sregs.regs.psw.addr = regs->psw.addr;
+	memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
+	memcpy(&user_sregs.regs.acrs, current->thread.acrs,
+	       sizeof(user_sregs.regs.acrs));
+	fpregs_store(&user_sregs.fpregs, &current->thread.fpu);
+	if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
+		return -EFAULT;
+	return 0;
+}
+
+static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
+{
+	_sigregs user_sregs;
+
+	/* Always make any pending restarted system call return -EINTR */
+	current->restart_block.fn = do_no_restart_syscall;
+
+	if (__copy_from_user(&user_sregs, sregs, sizeof(user_sregs)))
+		return -EFAULT;
+
+	if (!is_ri_task(current) && (user_sregs.regs.psw.mask & PSW_MASK_RI))
+		return -EINVAL;
+
+	/* Test the floating-point-control word. */
+	if (test_fp_ctl(user_sregs.fpregs.fpc))
+		return -EINVAL;
+
+	/* Use regs->psw.mask instead of PSW_USER_BITS to preserve PER bit. */
+	regs->psw.mask = (regs->psw.mask & ~(PSW_MASK_USER | PSW_MASK_RI)) |
+		(user_sregs.regs.psw.mask & (PSW_MASK_USER | PSW_MASK_RI));
+	/* Check for invalid user address space control. */
+	if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_HOME)
+		regs->psw.mask = PSW_ASC_PRIMARY |
+			(regs->psw.mask & ~PSW_MASK_ASC);
+	/* Check for invalid amode */
+	if (regs->psw.mask & PSW_MASK_EA)
+		regs->psw.mask |= PSW_MASK_BA;
+	regs->psw.addr = user_sregs.regs.psw.addr;
+	memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
+	memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
+	       sizeof(current->thread.acrs));
+
+	fpregs_load(&user_sregs.fpregs, &current->thread.fpu);
+
+	clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
+	return 0;
+}
+
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+			    _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Save vector registers to signal stack */
+	if (MACHINE_HAS_VX) {
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			vxrs[i] = current->thread.fpu.vxrs[i].low;
+		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+				   sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_to_user(&sregs_ext->vxrs_high,
+				   current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				   sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+			       _sigregs_ext __user *sregs_ext)
+{
+	__u64 vxrs[__NUM_VXRS_LOW];
+	int i;
+
+	/* Restore vector registers from signal stack */
+	if (MACHINE_HAS_VX) {
+		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+				     sizeof(sregs_ext->vxrs_low)) ||
+		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
+				     &sregs_ext->vxrs_high,
+				     sizeof(sregs_ext->vxrs_high)))
+			return -EFAULT;
+		for (i = 0; i < __NUM_VXRS_LOW; i++)
+			current->thread.fpu.vxrs[i].low = vxrs[i];
+	}
+	return 0;
+}
+
+SYSCALL_DEFINE0(sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct sigframe __user *frame =
+		(struct sigframe __user *) regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
+		goto badframe;
+	set_current_blocked(&set);
+	save_fpu_regs();
+	if (restore_sigregs(regs, &frame->sregs))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->sregs_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+SYSCALL_DEFINE0(rt_sigreturn)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	struct rt_sigframe __user *frame =
+		(struct rt_sigframe __user *)regs->gprs[15];
+	sigset_t set;
+
+	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
+		goto badframe;
+	set_current_blocked(&set);
+	if (restore_altstack(&frame->uc.uc_stack))
+		goto badframe;
+	save_fpu_regs();
+	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
+		goto badframe;
+	if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		goto badframe;
+	load_sigregs();
+	return regs->gprs[2];
+badframe:
+	force_sig(SIGSEGV);
+	return 0;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static inline void __user *
+get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+{
+	unsigned long sp;
+
+	/* Default to using normal stack */
+	sp = regs->gprs[15];
+
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
+	/* This is the X/Open sanctioned signal stack switching.  */
+	if (ka->sa.sa_flags & SA_ONSTACK) {
+		if (! sas_ss_flags(sp))
+			sp = current->sas_ss_sp + current->sas_ss_size;
+	}
+
+	return (void __user *)((sp - frame_size) & -8ul);
+}
+
+static int setup_frame(int sig, struct k_sigaction *ka,
+		       sigset_t *set, struct pt_regs * regs)
+{
+	struct sigframe __user *frame;
+	struct sigcontext sc;
+	unsigned long restorer;
+	size_t frame_size;
+
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+	if (MACHINE_HAS_VX)
+		frame_size += sizeof(frame->sregs_ext);
+	frame = get_sigframe(ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Create struct sigcontext on the signal stack */
+	memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+	sc.sregs = (_sigregs __user __force *) &frame->sregs;
+	if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create _sigregs on the signal stack */
+	if (save_sigregs(regs, &frame->sregs))
+		return -EFAULT;
+
+	/* Place signal number on stack to allow backtrace from handler.  */
+	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+		return -EFAULT;
+
+	/* Create _sigregs_ext on the signal stack */
+	if (save_sigregs_ext(regs, &frame->sregs_ext))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ka->sa.sa_flags & SA_RESTORER)
+		restorer = (unsigned long) ka->sa.sa_restorer;
+	else
+		restorer = VDSO64_SYMBOL(current, sigreturn);
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ka->sa.sa_handler;
+
+	regs->gprs[2] = sig;
+	regs->gprs[3] = (unsigned long) &frame->sc;
+
+	/* We forgot to include these in the sigcontext.
+	   To avoid breaking binary compatibility, they are passed as args. */
+	if (sig == SIGSEGV || sig == SIGBUS || sig == SIGILL ||
+	    sig == SIGTRAP || sig == SIGFPE) {
+		/* set extra registers only for synchronous signals */
+		regs->gprs[4] = regs->int_code & 127;
+		regs->gprs[5] = regs->int_parm_long;
+		regs->gprs[6] = current->thread.last_break;
+	}
+	return 0;
+}
+
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs)
+{
+	struct rt_sigframe __user *frame;
+	unsigned long uc_flags, restorer;
+	size_t frame_size;
+
+	frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+	/*
+	 * gprs_high are only present for a 31-bit task running on
+	 * a 64-bit kernel (see compat_signal.c) but the space for
+	 * gprs_high need to be allocated if vector registers are
+	 * included in the signal frame on a 31-bit system.
+	 */
+	uc_flags = 0;
+	if (MACHINE_HAS_VX) {
+		frame_size += sizeof(_sigregs_ext);
+		uc_flags |= UC_VXRS;
+	}
+	frame = get_sigframe(&ksig->ka, regs, frame_size);
+	if (frame == (void __user *) -1UL)
+		return -EFAULT;
+
+	/* Set up backchain. */
+	if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+		return -EFAULT;
+
+	/* Set up to return from userspace.  If provided, use a stub
+	   already in userspace.  */
+	if (ksig->ka.sa.sa_flags & SA_RESTORER)
+		restorer = (unsigned long) ksig->ka.sa.sa_restorer;
+	else
+		restorer = VDSO64_SYMBOL(current, rt_sigreturn);
+
+	/* Create siginfo on the signal stack */
+	if (copy_siginfo_to_user(&frame->info, &ksig->info))
+		return -EFAULT;
+
+	/* Store registers needed to create the signal frame */
+	store_sigregs();
+
+	/* Create ucontext on the signal stack. */
+	if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+	    __put_user(NULL, &frame->uc.uc_link) ||
+	    __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+	    save_sigregs(regs, &frame->uc.uc_mcontext) ||
+	    __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+	    save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+		return -EFAULT;
+
+	/* Set up registers for signal handler */
+	regs->gprs[14] = restorer;
+	regs->gprs[15] = (unsigned long) frame;
+	/* Force default amode and default user address space control. */
+	regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+		(PSW_USER_BITS & PSW_MASK_ASC) |
+		(regs->psw.mask & ~PSW_MASK_ASC);
+	regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler;
+
+	regs->gprs[2] = ksig->sig;
+	regs->gprs[3] = (unsigned long) &frame->info;
+	regs->gprs[4] = (unsigned long) &frame->uc;
+	regs->gprs[5] = current->thread.last_break;
+	return 0;
+}
+
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+			  struct pt_regs *regs)
+{
+	int ret;
+
+	/* Set up the stack frame */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		ret = setup_rt_frame(ksig, oldset, regs);
+	else
+		ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
+}
+
+/*
+ * Note that 'init' is a special process: it doesn't get signals it doesn't
+ * want to handle. Thus you cannot kill init even with a SIGKILL even by
+ * mistake.
+ *
+ * Note that we go through the signals twice: once to check the signals that
+ * the kernel can handle, and then we build all the user-level signal handling
+ * stack-frames in one go after that.
+ */
+
+void arch_do_signal_or_restart(struct pt_regs *regs)
+{
+	struct ksignal ksig;
+	sigset_t *oldset = sigmask_to_save();
+
+	/*
+	 * Get signal to deliver. When running under ptrace, at this point
+	 * the debugger may change all our registers, including the system
+	 * call information.
+	 */
+	current->thread.system_call =
+		test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
+
+	if (get_signal(&ksig)) {
+		/* Whee!  Actually deliver the signal.  */
+		if (current->thread.system_call) {
+			regs->int_code = current->thread.system_call;
+			/* Check for system call restarting. */
+			switch (regs->gprs[2]) {
+			case -ERESTART_RESTARTBLOCK:
+			case -ERESTARTNOHAND:
+				regs->gprs[2] = -EINTR;
+				break;
+			case -ERESTARTSYS:
+				if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
+					regs->gprs[2] = -EINTR;
+					break;
+				}
+				fallthrough;
+			case -ERESTARTNOINTR:
+				regs->gprs[2] = regs->orig_gpr2;
+				regs->psw.addr =
+					__rewind_psw(regs->psw,
+						     regs->int_code >> 16);
+				break;
+			}
+		}
+		/* No longer in a system call */
+		clear_pt_regs_flag(regs, PIF_SYSCALL);
+
+		rseq_signal_deliver(&ksig, regs);
+		if (is_compat_task())
+			handle_signal32(&ksig, oldset, regs);
+		else
+			handle_signal(&ksig, oldset, regs);
+		return;
+	}
+
+	/* No handlers present - check for system call restart */
+	clear_pt_regs_flag(regs, PIF_SYSCALL);
+	if (current->thread.system_call) {
+		regs->int_code = current->thread.system_call;
+		switch (regs->gprs[2]) {
+		case -ERESTART_RESTARTBLOCK:
+			/* Restart with sys_restart_syscall */
+			regs->gprs[2] = regs->orig_gpr2;
+			current->restart_block.arch_data = regs->psw.addr;
+			if (is_compat_task())
+				regs->psw.addr = VDSO32_SYMBOL(current, restart_syscall);
+			else
+				regs->psw.addr = VDSO64_SYMBOL(current, restart_syscall);
+			if (test_thread_flag(TIF_SINGLE_STEP))
+				clear_thread_flag(TIF_PER_TRAP);
+			break;
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			regs->gprs[2] = regs->orig_gpr2;
+			regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+			if (test_thread_flag(TIF_SINGLE_STEP))
+				clear_thread_flag(TIF_PER_TRAP);
+			break;
+		}
+	}
+
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask back.
+	 */
+	restore_saved_sigmask();
+}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
new file mode 100644
index 0000000000..a4edb7ea66
--- /dev/null
+++ b/arch/s390/kernel/smp.c
@@ -0,0 +1,1317 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  SMP related functions
+ *
+ *    Copyright IBM Corp. 1999, 2012
+ *    Author(s): Denis Joseph Barrow,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ *
+ *  based on other smp stuff by
+ *    (c) 1995 Alan Cox, CymruNET Ltd  <alan@cymru.net>
+ *    (c) 1998 Ingo Molnar
+ *
+ * The code outside of smp.c uses logical cpu numbers, only smp.c does
+ * the translation of logical to physical cpu ids. All new code that
+ * operates on physical cpu numbers needs to go into smp.c.
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/irqflags.h>
+#include <linux/irq_work.h>
+#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/sched/hotplug.h>
+#include <linux/sched/task_stack.h>
+#include <linux/crash_dump.h>
+#include <linux/kprobes.h>
+#include <asm/asm-offsets.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/ipl.h>
+#include <asm/setup.h>
+#include <asm/irq.h>
+#include <asm/tlbflush.h>
+#include <asm/vtimer.h>
+#include <asm/abs_lowcore.h>
+#include <asm/sclp.h>
+#include <asm/debug.h>
+#include <asm/os_info.h>
+#include <asm/sigp.h>
+#include <asm/idle.h>
+#include <asm/nmi.h>
+#include <asm/stacktrace.h>
+#include <asm/topology.h>
+#include <asm/vdso.h>
+#include <asm/maccess.h>
+#include "entry.h"
+
+enum {
+	ec_schedule = 0,
+	ec_call_function_single,
+	ec_stop_cpu,
+	ec_mcck_pending,
+	ec_irq_work,
+};
+
+enum {
+	CPU_STATE_STANDBY,
+	CPU_STATE_CONFIGURED,
+};
+
+static DEFINE_PER_CPU(struct cpu *, cpu_device);
+
+struct pcpu {
+	unsigned long ec_mask;		/* bit mask for ec_xxx functions */
+	unsigned long ec_clk;		/* sigp timestamp for ec_xxx */
+	signed char state;		/* physical cpu state */
+	signed char polarization;	/* physical polarization */
+	u16 address;			/* physical cpu address */
+};
+
+static u8 boot_core_type;
+static struct pcpu pcpu_devices[NR_CPUS];
+
+unsigned int smp_cpu_mt_shift;
+EXPORT_SYMBOL(smp_cpu_mt_shift);
+
+unsigned int smp_cpu_mtid;
+EXPORT_SYMBOL(smp_cpu_mtid);
+
+#ifdef CONFIG_CRASH_DUMP
+__vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+#endif
+
+static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
+
+static int __init early_nosmt(char *s)
+{
+	smp_max_threads = 1;
+	return 0;
+}
+early_param("nosmt", early_nosmt);
+
+static int __init early_smt(char *s)
+{
+	get_option(&s, &smp_max_threads);
+	return 0;
+}
+early_param("smt", early_smt);
+
+/*
+ * The smp_cpu_state_mutex must be held when changing the state or polarization
+ * member of a pcpu data structure within the pcpu_devices array.
+ */
+DEFINE_MUTEX(smp_cpu_state_mutex);
+
+/*
+ * Signal processor helper functions.
+ */
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm)
+{
+	int cc;
+
+	while (1) {
+		cc = __pcpu_sigp(addr, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			return cc;
+		cpu_relax();
+	}
+}
+
+static int pcpu_sigp_retry(struct pcpu *pcpu, u8 order, u32 parm)
+{
+	int cc, retry;
+
+	for (retry = 0; ; retry++) {
+		cc = __pcpu_sigp(pcpu->address, order, parm, NULL);
+		if (cc != SIGP_CC_BUSY)
+			break;
+		if (retry >= 3)
+			udelay(10);
+	}
+	return cc;
+}
+
+static inline int pcpu_stopped(struct pcpu *pcpu)
+{
+	u32 status;
+
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE,
+			0, &status) != SIGP_CC_STATUS_STORED)
+		return 0;
+	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
+}
+
+static inline int pcpu_running(struct pcpu *pcpu)
+{
+	if (__pcpu_sigp(pcpu->address, SIGP_SENSE_RUNNING,
+			0, NULL) != SIGP_CC_STATUS_STORED)
+		return 1;
+	/* Status stored condition code is equivalent to cpu not running. */
+	return 0;
+}
+
+/*
+ * Find struct pcpu by cpu address.
+ */
+static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		if (pcpu_devices[cpu].address == address)
+			return pcpu_devices + cpu;
+	return NULL;
+}
+
+static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
+{
+	int order;
+
+	if (test_and_set_bit(ec_bit, &pcpu->ec_mask))
+		return;
+	order = pcpu_running(pcpu) ? SIGP_EXTERNAL_CALL : SIGP_EMERGENCY_SIGNAL;
+	pcpu->ec_clk = get_tod_clock_fast();
+	pcpu_sigp_retry(pcpu, order, 0);
+}
+
+static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc;
+
+	lc = (struct lowcore *) __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+	async_stack = stack_alloc();
+	mcck_stack = stack_alloc();
+	if (!lc || !nodat_stack || !async_stack || !mcck_stack)
+		goto out;
+	memcpy(lc, &S390_lowcore, 512);
+	memset((char *) lc + 512, 0, sizeof(*lc) - 512);
+	lc->async_stack = async_stack + STACK_INIT_OFFSET;
+	lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
+	lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+	lc->cpu_nr = cpu;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	lc->spinlock_index = 0;
+	lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
+	lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
+	lc->preempt_count = PREEMPT_DISABLED;
+	if (nmi_alloc_mcesa(&lc->mcesad))
+		goto out;
+	if (abs_lowcore_map(cpu, lc, true))
+		goto out_mcesa;
+	lowcore_ptr[cpu] = lc;
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, __pa(lc));
+	return 0;
+
+out_mcesa:
+	nmi_free_mcesa(&lc->mcesad);
+out:
+	stack_free(mcck_stack);
+	stack_free(async_stack);
+	free_pages(nodat_stack, THREAD_SIZE_ORDER);
+	free_pages((unsigned long) lc, LC_ORDER);
+	return -ENOMEM;
+}
+
+static void pcpu_free_lowcore(struct pcpu *pcpu)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
+	async_stack = lc->async_stack - STACK_INIT_OFFSET;
+	mcck_stack = lc->mcck_stack - STACK_INIT_OFFSET;
+	pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
+	lowcore_ptr[cpu] = NULL;
+	abs_lowcore_unmap(cpu);
+	nmi_free_mcesa(&lc->mcesad);
+	stack_free(async_stack);
+	stack_free(mcck_stack);
+	free_pages(nodat_stack, THREAD_SIZE_ORDER);
+	free_pages((unsigned long) lc, LC_ORDER);
+}
+
+static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
+{
+	struct lowcore *lc, *abs_lc;
+
+	lc = lowcore_ptr[cpu];
+	cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
+	cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+	lc->cpu_nr = cpu;
+	lc->restart_flags = RESTART_FLAG_CTLREGS;
+	lc->spinlock_lockval = arch_spin_lockval(cpu);
+	lc->spinlock_index = 0;
+	lc->percpu_offset = __per_cpu_offset[cpu];
+	lc->kernel_asce = S390_lowcore.kernel_asce;
+	lc->user_asce = s390_invalid_asce;
+	lc->machine_flags = S390_lowcore.machine_flags;
+	lc->user_timer = lc->system_timer =
+		lc->steal_timer = lc->avg_steal_timer = 0;
+	abs_lc = get_abs_lowcore();
+	memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+	put_abs_lowcore(abs_lc);
+	lc->cregs_save_area[1] = lc->kernel_asce;
+	lc->cregs_save_area[7] = lc->user_asce;
+	save_access_regs((unsigned int *) lc->access_regs_save_area);
+	arch_spin_lock_setup(cpu);
+}
+
+static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
+{
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
+	lc->current_task = (unsigned long)tsk;
+	lc->lpp = LPP_MAGIC;
+	lc->current_pid = tsk->pid;
+	lc->user_timer = tsk->thread.user_timer;
+	lc->guest_timer = tsk->thread.guest_timer;
+	lc->system_timer = tsk->thread.system_timer;
+	lc->hardirq_timer = tsk->thread.hardirq_timer;
+	lc->softirq_timer = tsk->thread.softirq_timer;
+	lc->steal_timer = 0;
+}
+
+static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
+{
+	struct lowcore *lc;
+	int cpu;
+
+	cpu = pcpu - pcpu_devices;
+	lc = lowcore_ptr[cpu];
+	lc->restart_stack = lc->kernel_stack;
+	lc->restart_fn = (unsigned long) func;
+	lc->restart_data = (unsigned long) data;
+	lc->restart_source = -1U;
+	pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
+}
+
+typedef void (pcpu_delegate_fn)(void *);
+
+/*
+ * Call function via PSW restart on pcpu and stop the current cpu.
+ */
+static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
+{
+	func(data);	/* should not return */
+}
+
+static void pcpu_delegate(struct pcpu *pcpu,
+			  pcpu_delegate_fn *func,
+			  void *data, unsigned long stack)
+{
+	struct lowcore *lc, *abs_lc;
+	unsigned int source_cpu;
+
+	lc = lowcore_ptr[pcpu - pcpu_devices];
+	source_cpu = stap();
+
+	if (pcpu->address == source_cpu) {
+		call_on_stack(2, stack, void, __pcpu_delegate,
+			      pcpu_delegate_fn *, func, void *, data);
+	}
+	/* Stop target cpu (if func returns this stops the current cpu). */
+	pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
+	pcpu_sigp_retry(pcpu, SIGP_CPU_RESET, 0);
+	/* Restart func on the target cpu and stop the current cpu. */
+	if (lc) {
+		lc->restart_stack = stack;
+		lc->restart_fn = (unsigned long)func;
+		lc->restart_data = (unsigned long)data;
+		lc->restart_source = source_cpu;
+	} else {
+		abs_lc = get_abs_lowcore();
+		abs_lc->restart_stack = stack;
+		abs_lc->restart_fn = (unsigned long)func;
+		abs_lc->restart_data = (unsigned long)data;
+		abs_lc->restart_source = source_cpu;
+		put_abs_lowcore(abs_lc);
+	}
+	asm volatile(
+		"0:	sigp	0,%0,%2	# sigp restart to target cpu\n"
+		"	brc	2,0b	# busy, try again\n"
+		"1:	sigp	0,%1,%3	# sigp stop to current cpu\n"
+		"	brc	2,1b	# busy, try again\n"
+		: : "d" (pcpu->address), "d" (source_cpu),
+		    "K" (SIGP_RESTART), "K" (SIGP_STOP)
+		: "0", "1", "cc");
+	for (;;) ;
+}
+
+/*
+ * Enable additional logical cpus for multi-threading.
+ */
+static int pcpu_set_smt(unsigned int mtid)
+{
+	int cc;
+
+	if (smp_cpu_mtid == mtid)
+		return 0;
+	cc = __pcpu_sigp(0, SIGP_SET_MULTI_THREADING, mtid, NULL);
+	if (cc == 0) {
+		smp_cpu_mtid = mtid;
+		smp_cpu_mt_shift = 0;
+		while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
+			smp_cpu_mt_shift++;
+		pcpu_devices[0].address = stap();
+	}
+	return cc;
+}
+
+/*
+ * Call function on an online CPU.
+ */
+void smp_call_online_cpu(void (*func)(void *), void *data)
+{
+	struct pcpu *pcpu;
+
+	/* Use the current cpu if it is online. */
+	pcpu = pcpu_find_address(cpu_online_mask, stap());
+	if (!pcpu)
+		/* Use the first online cpu. */
+		pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
+	pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
+}
+
+/*
+ * Call function on the ipl CPU.
+ */
+void smp_call_ipl_cpu(void (*func)(void *), void *data)
+{
+	struct lowcore *lc = lowcore_ptr[0];
+
+	if (pcpu_devices[0].address == stap())
+		lc = &S390_lowcore;
+
+	pcpu_delegate(&pcpu_devices[0], func, data,
+		      lc->nodat_stack);
+}
+
+int smp_find_processor_id(u16 address)
+{
+	int cpu;
+
+	for_each_present_cpu(cpu)
+		if (pcpu_devices[cpu].address == address)
+			return cpu;
+	return -1;
+}
+
+void schedule_mcck_handler(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
+}
+
+bool notrace arch_vcpu_is_preempted(int cpu)
+{
+	if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
+		return false;
+	if (pcpu_running(pcpu_devices + cpu))
+		return false;
+	return true;
+}
+EXPORT_SYMBOL(arch_vcpu_is_preempted);
+
+void notrace smp_yield_cpu(int cpu)
+{
+	if (!MACHINE_HAS_DIAG9C)
+		return;
+	diag_stat_inc_norecursion(DIAG_STAT_X09C);
+	asm volatile("diag %0,0,0x9c"
+		     : : "d" (pcpu_devices[cpu].address));
+}
+EXPORT_SYMBOL_GPL(smp_yield_cpu);
+
+/*
+ * Send cpus emergency shutdown signal. This gives the cpus the
+ * opportunity to complete outstanding interrupts.
+ */
+void notrace smp_emergency_stop(void)
+{
+	static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+	static cpumask_t cpumask;
+	u64 end;
+	int cpu;
+
+	arch_spin_lock(&lock);
+	cpumask_copy(&cpumask, cpu_online_mask);
+	cpumask_clear_cpu(smp_processor_id(), &cpumask);
+
+	end = get_tod_clock() + (1000000UL << 12);
+	for_each_cpu(cpu, &cpumask) {
+		struct pcpu *pcpu = pcpu_devices + cpu;
+		set_bit(ec_stop_cpu, &pcpu->ec_mask);
+		while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
+				   0, NULL) == SIGP_CC_BUSY &&
+		       get_tod_clock() < end)
+			cpu_relax();
+	}
+	while (get_tod_clock() < end) {
+		for_each_cpu(cpu, &cpumask)
+			if (pcpu_stopped(pcpu_devices + cpu))
+				cpumask_clear_cpu(cpu, &cpumask);
+		if (cpumask_empty(&cpumask))
+			break;
+		cpu_relax();
+	}
+	arch_spin_unlock(&lock);
+}
+NOKPROBE_SYMBOL(smp_emergency_stop);
+
+/*
+ * Stop all cpus but the current one.
+ */
+void smp_send_stop(void)
+{
+	int cpu;
+
+	/* Disable all interrupts/machine checks */
+	__load_psw_mask(PSW_KERNEL_BITS);
+	trace_hardirqs_off();
+
+	debug_set_critical();
+
+	if (oops_in_progress)
+		smp_emergency_stop();
+
+	/* stop all processors */
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+		pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
+		while (!pcpu_stopped(pcpu_devices + cpu))
+			cpu_relax();
+	}
+}
+
+/*
+ * This is the main routine where commands issued by other
+ * cpus are handled.
+ */
+static void smp_handle_ext_call(void)
+{
+	unsigned long bits;
+
+	/* handle bit signal external calls */
+	bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
+	if (test_bit(ec_stop_cpu, &bits))
+		smp_stop_cpu();
+	if (test_bit(ec_schedule, &bits))
+		scheduler_ipi();
+	if (test_bit(ec_call_function_single, &bits))
+		generic_smp_call_function_single_interrupt();
+	if (test_bit(ec_mcck_pending, &bits))
+		s390_handle_mcck();
+	if (test_bit(ec_irq_work, &bits))
+		irq_work_run();
+}
+
+static void do_ext_call_interrupt(struct ext_code ext_code,
+				  unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(ext_code.code == 0x1202 ? IRQEXT_EXC : IRQEXT_EMS);
+	smp_handle_ext_call();
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	int cpu;
+
+	for_each_cpu(cpu, mask)
+		pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
+}
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+void arch_smp_send_reschedule(int cpu)
+{
+	pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
+}
+
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+	pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+}
+#endif
+
+/*
+ * parameter area for the set/clear control bit callbacks
+ */
+struct ec_creg_mask_parms {
+	unsigned long orval;
+	unsigned long andval;
+	int cr;
+};
+
+/*
+ * callback for setting/clearing control bits
+ */
+static void smp_ctl_bit_callback(void *info)
+{
+	struct ec_creg_mask_parms *pp = info;
+	unsigned long cregs[16];
+
+	__ctl_store(cregs, 0, 15);
+	cregs[pp->cr] = (cregs[pp->cr] & pp->andval) | pp->orval;
+	__ctl_load(cregs, 0, 15);
+}
+
+static DEFINE_SPINLOCK(ctl_lock);
+
+void smp_ctl_set_clear_bit(int cr, int bit, bool set)
+{
+	struct ec_creg_mask_parms parms = { .cr = cr, };
+	struct lowcore *abs_lc;
+	u64 ctlreg;
+
+	if (set) {
+		parms.orval = 1UL << bit;
+		parms.andval = -1UL;
+	} else {
+		parms.orval = 0;
+		parms.andval = ~(1UL << bit);
+	}
+	spin_lock(&ctl_lock);
+	abs_lc = get_abs_lowcore();
+	ctlreg = abs_lc->cregs_save_area[cr];
+	ctlreg = (ctlreg & parms.andval) | parms.orval;
+	abs_lc->cregs_save_area[cr] = ctlreg;
+	put_abs_lowcore(abs_lc);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+	spin_unlock(&ctl_lock);
+}
+EXPORT_SYMBOL(smp_ctl_set_clear_bit);
+
+#ifdef CONFIG_CRASH_DUMP
+
+int smp_store_status(int cpu)
+{
+	struct lowcore *lc;
+	struct pcpu *pcpu;
+	unsigned long pa;
+
+	pcpu = pcpu_devices + cpu;
+	lc = lowcore_ptr[cpu];
+	pa = __pa(&lc->floating_pt_save_area);
+	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
+			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+	if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
+		return 0;
+	pa = lc->mcesad & MCESA_ORIGIN_MASK;
+	if (MACHINE_HAS_GS)
+		pa |= lc->mcesad & MCESA_LC_MASK;
+	if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+			      pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Collect CPU state of the previous, crashed system.
+ * There are four cases:
+ * 1) standard zfcp/nvme dump
+ *    condition: OLDMEM_BASE == NULL && is_ipl_type_dump() == true
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The boot CPU state is located in
+ *    the absolute lowcore of the memory stored in the HSA. The zcore code
+ *    will copy the boot CPU state from the HSA.
+ * 2) stand-alone kdump for SCSI/NVMe (zfcp/nvme dump with swapped memory)
+ *    condition: OLDMEM_BASE != NULL && is_ipl_type_dump() == true
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The firmware or the boot-loader
+ *    stored the registers of the boot CPU in the absolute lowcore in the
+ *    memory of the old system.
+ * 3) kdump and the old kernel did not store the CPU state,
+ *    or stand-alone kdump for DASD
+ *    condition: OLDMEM_BASE != NULL && !is_kdump_kernel()
+ *    The state for all CPUs except the boot CPU needs to be collected
+ *    with sigp stop-and-store-status. The kexec code or the boot-loader
+ *    stored the registers of the boot CPU in the memory of the old system.
+ * 4) kdump and the old kernel stored the CPU state
+ *    condition: OLDMEM_BASE != NULL && is_kdump_kernel()
+ *    This case does not exist for s390 anymore, setup_arch explicitly
+ *    deactivates the elfcorehdr= kernel parameter
+ */
+static bool dump_available(void)
+{
+	return oldmem_data.start || is_ipl_type_dump();
+}
+
+void __init smp_save_dump_ipl_cpu(void)
+{
+	struct save_area *sa;
+	void *regs;
+
+	if (!dump_available())
+		return;
+	sa = save_area_alloc(true);
+	regs = memblock_alloc(512, 8);
+	if (!sa || !regs)
+		panic("could not allocate memory for boot CPU save area\n");
+	copy_oldmem_kernel(regs, __LC_FPREGS_SAVE_AREA, 512);
+	save_area_add_regs(sa, regs);
+	memblock_free(regs, 512);
+	if (MACHINE_HAS_VX)
+		save_area_add_vxrs(sa, boot_cpu_vector_save_area);
+}
+
+void __init smp_save_dump_secondary_cpus(void)
+{
+	int addr, boot_cpu_addr, max_cpu_addr;
+	struct save_area *sa;
+	void *page;
+
+	if (!dump_available())
+		return;
+	/* Allocate a page as dumping area for the store status sigps */
+	page = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
+	if (!page)
+		panic("ERROR: Failed to allocate %lx bytes below %lx\n",
+		      PAGE_SIZE, 1UL << 31);
+
+	/* Set multi-threading state to the previous system. */
+	pcpu_set_smt(sclp.mtid_prev);
+	boot_cpu_addr = stap();
+	max_cpu_addr = SCLP_MAX_CORES << sclp.mtid_prev;
+	for (addr = 0; addr <= max_cpu_addr; addr++) {
+		if (addr == boot_cpu_addr)
+			continue;
+		if (__pcpu_sigp_relax(addr, SIGP_SENSE, 0) ==
+		    SIGP_CC_NOT_OPERATIONAL)
+			continue;
+		sa = save_area_alloc(false);
+		if (!sa)
+			panic("could not allocate memory for save area\n");
+		__pcpu_sigp_relax(addr, SIGP_STORE_STATUS_AT_ADDRESS, __pa(page));
+		save_area_add_regs(sa, page);
+		if (MACHINE_HAS_VX) {
+			__pcpu_sigp_relax(addr, SIGP_STORE_ADDITIONAL_STATUS, __pa(page));
+			save_area_add_vxrs(sa, page);
+		}
+	}
+	memblock_free(page, PAGE_SIZE);
+	diag_amode31_ops.diag308_reset();
+	pcpu_set_smt(0);
+}
+#endif /* CONFIG_CRASH_DUMP */
+
+void smp_cpu_set_polarization(int cpu, int val)
+{
+	pcpu_devices[cpu].polarization = val;
+}
+
+int smp_cpu_get_polarization(int cpu)
+{
+	return pcpu_devices[cpu].polarization;
+}
+
+int smp_cpu_get_cpu_address(int cpu)
+{
+	return pcpu_devices[cpu].address;
+}
+
+static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
+{
+	static int use_sigp_detection;
+	int address;
+
+	if (use_sigp_detection || sclp_get_core_info(info, early)) {
+		use_sigp_detection = 1;
+		for (address = 0;
+		     address < (SCLP_MAX_CORES << smp_cpu_mt_shift);
+		     address += (1U << smp_cpu_mt_shift)) {
+			if (__pcpu_sigp_relax(address, SIGP_SENSE, 0) ==
+			    SIGP_CC_NOT_OPERATIONAL)
+				continue;
+			info->core[info->configured].core_id =
+				address >> smp_cpu_mt_shift;
+			info->configured++;
+		}
+		info->combined = info->configured;
+	}
+}
+
+static int smp_add_present_cpu(int cpu);
+
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+			bool configured, bool early)
+{
+	struct pcpu *pcpu;
+	int cpu, nr, i;
+	u16 address;
+
+	nr = 0;
+	if (sclp.has_core_type && core->type != boot_core_type)
+		return nr;
+	cpu = cpumask_first(avail);
+	address = core->core_id << smp_cpu_mt_shift;
+	for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+		if (pcpu_find_address(cpu_present_mask, address + i))
+			continue;
+		pcpu = pcpu_devices + cpu;
+		pcpu->address = address + i;
+		if (configured)
+			pcpu->state = CPU_STATE_CONFIGURED;
+		else
+			pcpu->state = CPU_STATE_STANDBY;
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+		set_cpu_present(cpu, true);
+		if (!early && smp_add_present_cpu(cpu) != 0)
+			set_cpu_present(cpu, false);
+		else
+			nr++;
+		cpumask_clear_cpu(cpu, avail);
+		cpu = cpumask_next(cpu, avail);
+	}
+	return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+	struct sclp_core_entry *core;
+	static cpumask_t avail;
+	bool configured;
+	u16 core_id;
+	int nr, i;
+
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	nr = 0;
+	cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+	/*
+	 * Add IPL core first (which got logical CPU number 0) to make sure
+	 * that all SMT threads get subsequent logical CPU numbers.
+	 */
+	if (early) {
+		core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+		for (i = 0; i < info->configured; i++) {
+			core = &info->core[i];
+			if (core->core_id == core_id) {
+				nr += smp_add_core(core, &avail, true, early);
+				break;
+			}
+		}
+	}
+	for (i = 0; i < info->combined; i++) {
+		configured = i < info->configured;
+		nr += smp_add_core(&info->core[i], &avail, configured, early);
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return nr;
+}
+
+void __init smp_detect_cpus(void)
+{
+	unsigned int cpu, mtid, c_cpus, s_cpus;
+	struct sclp_core_info *info;
+	u16 address;
+
+	/* Get CPU information */
+	info = memblock_alloc(sizeof(*info), 8);
+	if (!info)
+		panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+		      __func__, sizeof(*info), 8);
+	smp_get_core_info(info, 1);
+	/* Find boot CPU type */
+	if (sclp.has_core_type) {
+		address = stap();
+		for (cpu = 0; cpu < info->combined; cpu++)
+			if (info->core[cpu].core_id == address) {
+				/* The boot cpu dictates the cpu type. */
+				boot_core_type = info->core[cpu].type;
+				break;
+			}
+		if (cpu >= info->combined)
+			panic("Could not find boot CPU type");
+	}
+
+	/* Set multi-threading state for the current system */
+	mtid = boot_core_type ? sclp.mtid : sclp.mtid_cp;
+	mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1;
+	pcpu_set_smt(mtid);
+
+	/* Print number of CPUs */
+	c_cpus = s_cpus = 0;
+	for (cpu = 0; cpu < info->combined; cpu++) {
+		if (sclp.has_core_type &&
+		    info->core[cpu].type != boot_core_type)
+			continue;
+		if (cpu < info->configured)
+			c_cpus += smp_cpu_mtid + 1;
+		else
+			s_cpus += smp_cpu_mtid + 1;
+	}
+	pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
+
+	/* Add CPUs present at boot */
+	__smp_rescan_cpus(info, true);
+	memblock_free(info, sizeof(*info));
+}
+
+/*
+ *	Activate a secondary processor.
+ */
+static void smp_start_secondary(void *cpuvoid)
+{
+	int cpu = raw_smp_processor_id();
+
+	S390_lowcore.last_update_clock = get_tod_clock();
+	S390_lowcore.restart_stack = (unsigned long)restart_stack;
+	S390_lowcore.restart_fn = (unsigned long)do_restart;
+	S390_lowcore.restart_data = 0;
+	S390_lowcore.restart_source = -1U;
+	S390_lowcore.restart_flags = 0;
+	restore_access_regs(S390_lowcore.access_regs_save_area);
+	cpu_init();
+	rcu_cpu_starting(cpu);
+	init_cpu_timer();
+	vtime_init();
+	vdso_getcpu_init();
+	pfault_init();
+	cpumask_set_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
+	notify_cpu_starting(cpu);
+	if (topology_cpu_dedicated(cpu))
+		set_cpu_flag(CIF_DEDICATED_CPU);
+	else
+		clear_cpu_flag(CIF_DEDICATED_CPU);
+	set_cpu_online(cpu, true);
+	inc_irq_stat(CPU_RST);
+	local_irq_enable();
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+/* Upping and downing of CPUs */
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+	struct pcpu *pcpu = pcpu_devices + cpu;
+	int rc;
+
+	if (pcpu->state != CPU_STATE_CONFIGURED)
+		return -EIO;
+	if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) !=
+	    SIGP_CC_ORDER_CODE_ACCEPTED)
+		return -EIO;
+
+	rc = pcpu_alloc_lowcore(pcpu, cpu);
+	if (rc)
+		return rc;
+	/*
+	 * Make sure global control register contents do not change
+	 * until new CPU has initialized control registers.
+	 */
+	spin_lock(&ctl_lock);
+	pcpu_prepare_secondary(pcpu, cpu);
+	pcpu_attach_task(pcpu, tidle);
+	pcpu_start_fn(pcpu, smp_start_secondary, NULL);
+	/* Wait until cpu puts itself in the online & active maps */
+	while (!cpu_online(cpu))
+		cpu_relax();
+	spin_unlock(&ctl_lock);
+	return 0;
+}
+
+static unsigned int setup_possible_cpus __initdata;
+
+static int __init _setup_possible_cpus(char *s)
+{
+	get_option(&s, &setup_possible_cpus);
+	return 0;
+}
+early_param("possible_cpus", _setup_possible_cpus);
+
+int __cpu_disable(void)
+{
+	unsigned long cregs[16];
+	int cpu;
+
+	/* Handle possible pending IPIs */
+	smp_handle_ext_call();
+	cpu = smp_processor_id();
+	set_cpu_online(cpu, false);
+	cpumask_clear_cpu(cpu, &cpu_setup_mask);
+	update_cpu_masks();
+	/* Disable pseudo page faults on this cpu. */
+	pfault_fini();
+	/* Disable interrupt sources via control register. */
+	__ctl_store(cregs, 0, 15);
+	cregs[0]  &= ~0x0000ee70UL;	/* disable all external interrupts */
+	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
+	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
+	__ctl_load(cregs, 0, 15);
+	clear_cpu_flag(CIF_NOHZ_DELAY);
+	return 0;
+}
+
+void __cpu_die(unsigned int cpu)
+{
+	struct pcpu *pcpu;
+
+	/* Wait until target cpu is down */
+	pcpu = pcpu_devices + cpu;
+	while (!pcpu_stopped(pcpu))
+		cpu_relax();
+	pcpu_free_lowcore(pcpu);
+	cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+	cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
+}
+
+void __noreturn cpu_die(void)
+{
+	idle_task_exit();
+	pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
+	for (;;) ;
+}
+
+void __init smp_fill_possible_mask(void)
+{
+	unsigned int possible, sclp_max, cpu;
+
+	sclp_max = max(sclp.mtid, sclp.mtid_cp) + 1;
+	sclp_max = min(smp_max_threads, sclp_max);
+	sclp_max = (sclp.max_cores * sclp_max) ?: nr_cpu_ids;
+	possible = setup_possible_cpus ?: nr_cpu_ids;
+	possible = min(possible, sclp_max);
+	for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++)
+		set_cpu_possible(cpu, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	/* request the 0x1201 emergency signal external interrupt */
+	if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1201");
+	/* request the 0x1202 external call external interrupt */
+	if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
+		panic("Couldn't request external interrupt 0x1202");
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+	struct pcpu *pcpu = pcpu_devices;
+
+	WARN_ON(!cpu_present(0) || !cpu_online(0));
+	pcpu->state = CPU_STATE_CONFIGURED;
+	S390_lowcore.percpu_offset = __per_cpu_offset[0];
+	smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
+}
+
+void __init smp_setup_processor_id(void)
+{
+	pcpu_devices[0].address = stap();
+	S390_lowcore.cpu_nr = 0;
+	S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+	S390_lowcore.spinlock_index = 0;
+}
+
+/*
+ * the frequency of the profiling timer can be changed
+ * by writing a multiplier value into /proc/profile.
+ *
+ * usually you want to run this on all CPUs ;)
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return 0;
+}
+
+static ssize_t cpu_configure_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t cpu_configure_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct pcpu *pcpu;
+	int cpu, val, rc, i;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	rc = -EBUSY;
+	/* disallow configuration changes of online cpus and cpu 0 */
+	cpu = dev->id;
+	cpu = smp_get_base_cpu(cpu);
+	if (cpu == 0)
+		goto out;
+	for (i = 0; i <= smp_cpu_mtid; i++)
+		if (cpu_online(cpu + i))
+			goto out;
+	pcpu = pcpu_devices + cpu;
+	rc = 0;
+	switch (val) {
+	case 0:
+		if (pcpu->state != CPU_STATE_CONFIGURED)
+			break;
+		rc = sclp_core_deconfigure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_STANDBY;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	case 1:
+		if (pcpu->state != CPU_STATE_STANDBY)
+			break;
+		rc = sclp_core_configure(pcpu->address >> smp_cpu_mt_shift);
+		if (rc)
+			break;
+		for (i = 0; i <= smp_cpu_mtid; i++) {
+			if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
+				continue;
+			pcpu[i].state = CPU_STATE_CONFIGURED;
+			smp_cpu_set_polarization(cpu + i,
+						 POLARIZATION_UNKNOWN);
+		}
+		topology_expect_change();
+		break;
+	default:
+		break;
+	}
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+
+static ssize_t show_cpu_address(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
+}
+static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
+
+static struct attribute *cpu_common_attrs[] = {
+	&dev_attr_configure.attr,
+	&dev_attr_address.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+	.attrs = cpu_common_attrs,
+};
+
+static struct attribute *cpu_online_attrs[] = {
+	&dev_attr_idle_count.attr,
+	&dev_attr_idle_time_us.attr,
+	NULL,
+};
+
+static struct attribute_group cpu_online_attr_group = {
+	.attrs = cpu_online_attrs,
+};
+
+static int smp_cpu_online(unsigned int cpu)
+{
+	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+	return sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+}
+
+static int smp_cpu_pre_down(unsigned int cpu)
+{
+	struct device *s = &per_cpu(cpu_device, cpu)->dev;
+
+	sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
+	return 0;
+}
+
+static int smp_add_present_cpu(int cpu)
+{
+	struct device *s;
+	struct cpu *c;
+	int rc;
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+	per_cpu(cpu_device, cpu) = c;
+	s = &c->dev;
+	c->hotpluggable = 1;
+	rc = register_cpu(c, cpu);
+	if (rc)
+		goto out;
+	rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+	if (rc)
+		goto out_cpu;
+	rc = topology_cpu_init(c);
+	if (rc)
+		goto out_topology;
+	return 0;
+
+out_topology:
+	sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+	unregister_cpu(c);
+out:
+	return rc;
+}
+
+int __ref smp_rescan_cpus(void)
+{
+	struct sclp_core_info *info;
+	int nr;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	smp_get_core_info(info, 0);
+	nr = __smp_rescan_cpus(info, false);
+	kfree(info);
+	if (nr)
+		topology_schedule_update();
+	return 0;
+}
+
+static ssize_t __ref rescan_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf,
+				  size_t count)
+{
+	int rc;
+
+	rc = lock_device_hotplug_sysfs();
+	if (rc)
+		return rc;
+	rc = smp_rescan_cpus();
+	unlock_device_hotplug();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR_WO(rescan);
+
+static int __init s390_smp_init(void)
+{
+	struct device *dev_root;
+	int cpu, rc = 0;
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_rescan);
+		put_device(dev_root);
+		if (rc)
+			return rc;
+	}
+
+	for_each_present_cpu(cpu) {
+		rc = smp_add_present_cpu(cpu);
+		if (rc)
+			goto out;
+	}
+
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "s390/smp:online",
+			       smp_cpu_online, smp_cpu_pre_down);
+	rc = rc <= 0 ? rc : 0;
+out:
+	return rc;
+}
+subsys_initcall(s390_smp_init);
+
+static __always_inline void set_new_lowcore(struct lowcore *lc)
+{
+	union register_pair dst, src;
+	u32 pfx;
+
+	src.even = (unsigned long) &S390_lowcore;
+	src.odd  = sizeof(S390_lowcore);
+	dst.even = (unsigned long) lc;
+	dst.odd  = sizeof(*lc);
+	pfx = __pa(lc);
+
+	asm volatile(
+		"	mvcl	%[dst],%[src]\n"
+		"	spx	%[pfx]\n"
+		: [dst] "+&d" (dst.pair), [src] "+&d" (src.pair)
+		: [pfx] "Q" (pfx)
+		: "memory", "cc");
+}
+
+int __init smp_reinit_ipl_cpu(void)
+{
+	unsigned long async_stack, nodat_stack, mcck_stack;
+	struct lowcore *lc, *lc_ipl;
+	unsigned long flags, cr0;
+	u64 mcesad;
+
+	lc_ipl = lowcore_ptr[0];
+	lc = (struct lowcore *)	__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
+	nodat_stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
+	async_stack = stack_alloc();
+	mcck_stack = stack_alloc();
+	if (!lc || !nodat_stack || !async_stack || !mcck_stack || nmi_alloc_mcesa(&mcesad))
+		panic("Couldn't allocate memory");
+
+	local_irq_save(flags);
+	local_mcck_disable();
+	set_new_lowcore(lc);
+	S390_lowcore.nodat_stack = nodat_stack + STACK_INIT_OFFSET;
+	S390_lowcore.async_stack = async_stack + STACK_INIT_OFFSET;
+	S390_lowcore.mcck_stack = mcck_stack + STACK_INIT_OFFSET;
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28); /* disable lowcore protection */
+	S390_lowcore.mcesad = mcesad;
+	__ctl_load(cr0, 0, 0);
+	if (abs_lowcore_map(0, lc, false))
+		panic("Couldn't remap absolute lowcore");
+	lowcore_ptr[0] = lc;
+	local_mcck_enable();
+	local_irq_restore(flags);
+
+	memblock_free_late(__pa(lc_ipl->mcck_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl->async_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl->nodat_stack - STACK_INIT_OFFSET), THREAD_SIZE);
+	memblock_free_late(__pa(lc_ipl), sizeof(*lc_ipl));
+	return 0;
+}
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
new file mode 100644
index 0000000000..0787010139
--- /dev/null
+++ b/arch/s390/kernel/stacktrace.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stack trace management functions
+ *
+ *  Copyright IBM Corp. 2006
+ */
+
+#include <linux/stacktrace.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+#include <asm/kprobes.h>
+
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+		     struct task_struct *task, struct pt_regs *regs)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, regs, 0) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || !consume_entry(cookie, addr))
+			break;
+	}
+}
+
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+			     void *cookie, struct task_struct *task)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	unwind_for_each_frame(&state, task, NULL, 0) {
+		if (state.stack_info.type != STACK_TYPE_TASK)
+			return -EINVAL;
+
+		if (state.regs)
+			return -EINVAL;
+
+		addr = unwind_get_return_address(&state);
+		if (!addr)
+			return -EINVAL;
+
+#ifdef CONFIG_RETHOOK
+		/*
+		 * Mark stacktraces with krethook functions on them
+		 * as unreliable.
+		 */
+		if (state.ip == (unsigned long)arch_rethook_trampoline)
+			return -EINVAL;
+#endif
+
+		if (!consume_entry(cookie, addr))
+			return -EINVAL;
+	}
+
+	/* Check for stack corruption */
+	if (unwind_error(&state))
+		return -EINVAL;
+	return 0;
+}
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
new file mode 100644
index 0000000000..30bb20461d
--- /dev/null
+++ b/arch/s390/kernel/sthyi.c
@@ -0,0 +1,517 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/syscalls.h>
+#include <linux/mutex.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+#include <asm/facility.h>
+#include <asm/sthyi.h>
+#include "entry.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP  0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+	HDR_NOT_LPAR   = 0x10,
+	HDR_STACK_INCM = 0x20,
+	HDR_STSI_UNAV  = 0x40,
+	HDR_PERF_UNAV  = 0x80,
+};
+
+enum mac_validity {
+	MAC_NAME_VLD = 0x20,
+	MAC_ID_VLD   = 0x40,
+	MAC_CNT_VLD  = 0x80,
+};
+
+enum par_flag {
+	PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+	PAR_GRP_VLD  = 0x08,
+	PAR_ID_VLD   = 0x10,
+	PAR_ABS_VLD  = 0x20,
+	PAR_WGHT_VLD = 0x40,
+	PAR_PCNT_VLD  = 0x80,
+};
+
+struct hdr_sctn {
+	u8 infhflg1;
+	u8 infhflg2; /* reserved */
+	u8 infhval1; /* reserved */
+	u8 infhval2; /* reserved */
+	u8 reserved[3];
+	u8 infhygct;
+	u16 infhtotl;
+	u16 infhdln;
+	u16 infmoff;
+	u16 infmlen;
+	u16 infpoff;
+	u16 infplen;
+	u16 infhoff1;
+	u16 infhlen1;
+	u16 infgoff1;
+	u16 infglen1;
+	u16 infhoff2;
+	u16 infhlen2;
+	u16 infgoff2;
+	u16 infglen2;
+	u16 infhoff3;
+	u16 infhlen3;
+	u16 infgoff3;
+	u16 infglen3;
+	u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+	u8 infmflg1; /* reserved */
+	u8 infmflg2; /* reserved */
+	u8 infmval1;
+	u8 infmval2; /* reserved */
+	u16 infmscps;
+	u16 infmdcps;
+	u16 infmsifl;
+	u16 infmdifl;
+	char infmname[8];
+	char infmtype[4];
+	char infmmanu[16];
+	char infmseq[16];
+	char infmpman[4];
+	u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+	u8 infpflg1;
+	u8 infpflg2; /* reserved */
+	u8 infpval1;
+	u8 infpval2; /* reserved */
+	u16 infppnum;
+	u16 infpscps;
+	u16 infpdcps;
+	u16 infpsifl;
+	u16 infpdifl;
+	u16 reserved;
+	char infppnam[8];
+	u32 infpwbcp;
+	u32 infpabcp;
+	u32 infpwbif;
+	u32 infpabif;
+	char infplgnm[8];
+	u32 infplgcp;
+	u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+	struct hdr_sctn hdr;
+	struct mac_sctn mac;
+	struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+	u64 lpar_cap;
+	u64 lpar_grp_cap;
+	u64 lpar_weight;
+	u64 all_weight;
+	int cpu_num_ded;
+	int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+	struct cpu_inf cp;
+	struct cpu_inf ifl;
+};
+
+/*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * cache the retrieved data whose valid period is 1s.
+ */
+#define CACHE_VALID_JIFFIES	HZ
+
+struct sthyi_info {
+	void *info;
+	unsigned long end;
+};
+
+static DEFINE_MUTEX(sthyi_mutex);
+static struct sthyi_info sthyi_cache;
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+	return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+	return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+	sctns->hdr.infhdln = sizeof(sctns->hdr);
+	sctns->hdr.infmoff = sizeof(sctns->hdr);
+	sctns->hdr.infmlen = sizeof(sctns->mac);
+	sctns->hdr.infplen = sizeof(sctns->par);
+	sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+	sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+			  struct sysinfo_1_1_1 *sysinfo)
+{
+	sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+	if (*(u64 *)sctns->mac.infmname != 0)
+		sctns->mac.infmval1 |= MAC_NAME_VLD;
+
+	if (stsi(sysinfo, 1, 1, 1))
+		return;
+
+	memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+	memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+	memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+	memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+	sctns->mac.infmval1 |= MAC_ID_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+			  struct sysinfo_2_2_2 *sysinfo)
+{
+	if (stsi(sysinfo, 2, 2, 2))
+		return;
+
+	sctns->par.infppnum = sysinfo->lpar_number;
+	memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+	sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+	void *sysinfo;
+
+	/* Errors are handled through the validity bits in the response. */
+	sysinfo = (void *)__get_free_page(GFP_KERNEL);
+	if (!sysinfo)
+		return;
+
+	fill_stsi_mac(sctns, sysinfo);
+	fill_stsi_par(sctns, sysinfo);
+
+	free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+			  struct diag204_x_phys_block *block,
+			  void *diag224_buf)
+{
+	int i;
+
+	for (i = 0; i < block->hdr.cpus; i++) {
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdcps++;
+			else
+				sctns->mac.infmscps++;
+			break;
+		case IFL:
+			if (block->cpus[i].weight == DED_WEIGHT)
+				sctns->mac.infmdifl++;
+			else
+				sctns->mac.infmsifl++;
+			break;
+		}
+	}
+	sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+						 bool this_lpar,
+						 void *diag224_buf,
+						 struct diag204_x_part_block *block)
+{
+	int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+	struct cpu_inf *cpu_inf;
+
+	for (i = 0; i < block->hdr.rcpus; i++) {
+		if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+			continue;
+
+		switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+		case CP:
+			cpu_inf = &part_inf->cp;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_cp |= block->cpus[i].cur_weight;
+			break;
+		case IFL:
+			cpu_inf = &part_inf->ifl;
+			if (block->cpus[i].cur_weight < DED_WEIGHT)
+				weight_ifl |= block->cpus[i].cur_weight;
+			break;
+		default:
+			continue;
+		}
+
+		if (!this_lpar)
+			continue;
+
+		capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+		cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+		cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+		if (block->cpus[i].weight == DED_WEIGHT)
+			cpu_inf->cpu_num_ded += 1;
+		else
+			cpu_inf->cpu_num_shd += 1;
+	}
+
+	if (this_lpar && capped) {
+		part_inf->cp.lpar_weight = weight_cp;
+		part_inf->ifl.lpar_weight = weight_ifl;
+	}
+	part_inf->cp.all_weight += weight_cp;
+	part_inf->ifl.all_weight += weight_ifl;
+	return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+	int i, r, pages;
+	bool this_lpar;
+	void *diag204_buf;
+	void *diag224_buf = NULL;
+	struct diag204_x_info_blk_hdr *ti_hdr;
+	struct diag204_x_part_block *part_block;
+	struct diag204_x_phys_block *phys_block;
+	struct lpar_cpu_inf lpar_inf = {};
+
+	/* Errors are handled through the validity bits in the response. */
+	pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+			(unsigned long)DIAG204_INFO_EXT, 0, NULL);
+	if (pages <= 0)
+		return;
+
+	diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+				     PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+				     __builtin_return_address(0));
+	if (!diag204_buf)
+		return;
+
+	r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+		    (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+	if (r < 0)
+		goto out;
+
+	diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+	if (!diag224_buf || diag224(diag224_buf))
+		goto out;
+
+	ti_hdr = diag204_buf;
+	part_block = diag204_buf + sizeof(*ti_hdr);
+
+	for (i = 0; i < ti_hdr->npar; i++) {
+		/*
+		 * For the calling lpar we also need to get the cpu
+		 * caps and weights. The time information block header
+		 * specifies the offset to the partition block of the
+		 * caller lpar, so we know when we process its data.
+		 */
+		this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+		part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+					  part_block);
+	}
+
+	phys_block = (struct diag204_x_phys_block *)part_block;
+	part_block = diag204_buf + ti_hdr->this_part;
+	if (part_block->hdr.mtid)
+		sctns->par.infpflg1 = PAR_MT_EN;
+
+	sctns->par.infpval1 |= PAR_GRP_VLD;
+	sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+	sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+	memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+	       sizeof(sctns->par.infplgnm));
+
+	sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+	sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+	sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+	sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+	sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+	sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+	sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+	sctns->par.infpval1 |= PAR_ABS_VLD;
+
+	/*
+	 * Everything below needs global performance data to be
+	 * meaningful.
+	 */
+	if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+		sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+		goto out;
+	}
+
+	fill_diag_mac(sctns, phys_block, diag224_buf);
+
+	if (lpar_inf.cp.lpar_weight) {
+		sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+			lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+	}
+
+	if (lpar_inf.ifl.lpar_weight) {
+		sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+			lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+	}
+	sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+	free_page((unsigned long)diag224_buf);
+	vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr, u64 *rc)
+{
+	union register_pair r1 = { .even = 0, }; /* subcode */
+	union register_pair r2 = { .even = vaddr, };
+	int cc;
+
+	asm volatile(
+		".insn   rre,0xB2560000,%[r1],%[r2]\n"
+		"ipm     %[cc]\n"
+		"srl     %[cc],28\n"
+		: [cc] "=&d" (cc), [r2] "+&d" (r2.pair)
+		: [r1] "d" (r1.pair)
+		: "memory", "cc");
+	*rc = r2.odd;
+	return cc;
+}
+
+static int fill_dst(void *dst, u64 *rc)
+{
+	struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst;
+
+	/*
+	 * If the facility is on, we don't want to emulate the instruction.
+	 * We ask the hypervisor to provide the data.
+	 */
+	if (test_facility(74))
+		return sthyi((u64)dst, rc);
+
+	fill_hdr(sctns);
+	fill_stsi(sctns);
+	fill_diag(sctns);
+	*rc = 0;
+	return 0;
+}
+
+static int sthyi_init_cache(void)
+{
+	if (sthyi_cache.info)
+		return 0;
+	sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!sthyi_cache.info)
+		return -ENOMEM;
+	sthyi_cache.end = jiffies - 1; /* expired */
+	return 0;
+}
+
+static int sthyi_update_cache(u64 *rc)
+{
+	int r;
+
+	memset(sthyi_cache.info, 0, PAGE_SIZE);
+	r = fill_dst(sthyi_cache.info, rc);
+	if (r)
+		return r;
+	sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES;
+	return r;
+}
+
+/*
+ * sthyi_fill - Fill page with data returned by the STHYI instruction
+ *
+ * @dst: Pointer to zeroed page
+ * @rc:  Pointer for storing the return code of the instruction
+ *
+ * Fills the destination with system information returned by the STHYI
+ * instruction. The data is generated by emulation or execution of STHYI,
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
+ */
+int sthyi_fill(void *dst, u64 *rc)
+{
+	int r;
+
+	mutex_lock(&sthyi_mutex);
+	r = sthyi_init_cache();
+	if (r)
+		goto out;
+
+	if (time_is_before_jiffies(sthyi_cache.end)) {
+		/* cache expired */
+		r = sthyi_update_cache(rc);
+		if (r)
+			goto out;
+	}
+	*rc = 0;
+	memcpy(dst, sthyi_cache.info, PAGE_SIZE);
+out:
+	mutex_unlock(&sthyi_mutex);
+	return r;
+}
+EXPORT_SYMBOL_GPL(sthyi_fill);
+
+SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer,
+		u64 __user *, return_code, unsigned long, flags)
+{
+	u64 sthyi_rc;
+	void *info;
+	int r;
+
+	if (flags)
+		return -EINVAL;
+	if (function_code != STHYI_FC_CP_IFL_CAP)
+		return -EOPNOTSUPP;
+	info = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+	r = sthyi_fill(info, &sthyi_rc);
+	if (r < 0)
+		goto out;
+	if (return_code && put_user(sthyi_rc, return_code)) {
+		r = -EFAULT;
+		goto out;
+	}
+	if (copy_to_user(buffer, info, PAGE_SIZE))
+		r = -EFAULT;
+out:
+	free_page((unsigned long)info);
+	return r;
+}
diff --git a/arch/s390/kernel/syscall.c b/arch/s390/kernel/syscall.c
new file mode 100644
index 0000000000..dc2355c623
--- /dev/null
+++ b/arch/s390/kernel/syscall.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Thomas Spatzier (tspat@de.ibm.com)
+ *
+ *  Derived from "arch/i386/kernel/sys_i386.c"
+ *
+ *  This file contains various random system calls that
+ *  have a non-standard calling sequence on the Linux/s390
+ *  platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/sem.h>
+#include <linux/msg.h>
+#include <linux/shm.h>
+#include <linux/stat.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/utsname.h>
+#include <linux/personality.h>
+#include <linux/unistd.h>
+#include <linux/ipc.h>
+#include <linux/uaccess.h>
+#include <linux/string.h>
+#include <linux/thread_info.h>
+#include <linux/entry-common.h>
+
+#include <asm/ptrace.h>
+#include <asm/vtime.h>
+
+#include "entry.h"
+
+/*
+ * Perform the mmap() system call. Linux for S/390 isn't able to handle more
+ * than 5 system call parameters, so this system call uses a memory block
+ * for parameter passing.
+ */
+
+struct s390_mmap_arg_struct {
+	unsigned long addr;
+	unsigned long len;
+	unsigned long prot;
+	unsigned long flags;
+	unsigned long fd;
+	unsigned long offset;
+};
+
+SYSCALL_DEFINE1(mmap2, struct s390_mmap_arg_struct __user *, arg)
+{
+	struct s390_mmap_arg_struct a;
+	int error = -EFAULT;
+
+	if (copy_from_user(&a, arg, sizeof(a)))
+		goto out;
+	error = ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset);
+out:
+	return error;
+}
+
+#ifdef CONFIG_SYSVIPC
+/*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.
+ */
+SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
+		unsigned long, third, void __user *, ptr)
+{
+	if (call >> 16)
+		return -EINVAL;
+	/* The s390 sys_ipc variant has only five parameters instead of six
+	 * like the generic variant. The only difference is the handling of
+	 * the SEMTIMEDOP subcall where on s390 the third parameter is used
+	 * as a pointer to a struct timespec where the generic variant uses
+	 * the fifth parameter.
+	 * Therefore we can call the generic variant by simply passing the
+	 * third parameter also as fifth parameter.
+	 */
+	return ksys_ipc(call, first, second, third, ptr, third);
+}
+#endif /* CONFIG_SYSVIPC */
+
+SYSCALL_DEFINE1(s390_personality, unsigned int, personality)
+{
+	unsigned int ret = current->personality;
+
+	if (personality(current->personality) == PER_LINUX32 &&
+	    personality(personality) == PER_LINUX)
+		personality |= PER_LINUX32;
+
+	if (personality != 0xffffffff)
+		set_personality(personality);
+
+	if (personality(ret) == PER_LINUX32)
+		ret &= ~PER_LINUX32;
+
+	return ret;
+}
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+	return -ENOSYS;
+}
+
+static void do_syscall(struct pt_regs *regs)
+{
+	unsigned long nr;
+
+	nr = regs->int_code & 0xffff;
+	if (!nr) {
+		nr = regs->gprs[1] & 0xffff;
+		regs->int_code &= ~0xffffUL;
+		regs->int_code |= nr;
+	}
+
+	regs->gprs[2] = nr;
+
+	if (nr == __NR_restart_syscall && !(current->restart_block.arch_data & 1)) {
+		regs->psw.addr = current->restart_block.arch_data;
+		current->restart_block.arch_data = 1;
+	}
+	nr = syscall_enter_from_user_mode_work(regs, nr);
+
+	/*
+	 * In the s390 ptrace ABI, both the syscall number and the return value
+	 * use gpr2. However, userspace puts the syscall number either in the
+	 * svc instruction itself, or uses gpr1. To make at least skipping syscalls
+	 * work, the ptrace code sets PIF_SYSCALL_RET_SET, which is checked here
+	 * and if set, the syscall will be skipped.
+	 */
+
+	if (unlikely(test_and_clear_pt_regs_flag(regs, PIF_SYSCALL_RET_SET)))
+		goto out;
+	regs->gprs[2] = -ENOSYS;
+	if (likely(nr >= NR_syscalls))
+		goto out;
+	do {
+		regs->gprs[2] = current->thread.sys_call_table[nr](regs);
+	} while (test_and_clear_pt_regs_flag(regs, PIF_EXECVE_PGSTE_RESTART));
+out:
+	syscall_exit_to_user_mode_work(regs);
+}
+
+void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
+{
+	add_random_kstack_offset();
+	enter_from_user_mode(regs);
+	regs->psw = S390_lowcore.svc_old_psw;
+	regs->int_code = S390_lowcore.svc_int_code;
+	update_timer_sys();
+	if (static_branch_likely(&cpu_has_bear))
+		current->thread.last_break = regs->last_break;
+
+	local_irq_enable();
+	regs->orig_gpr2 = regs->gprs[2];
+
+	if (per_trap)
+		set_thread_flag(TIF_PER_TRAP);
+
+	regs->flags = 0;
+	set_pt_regs_flag(regs, PIF_SYSCALL);
+	do_syscall(regs);
+	exit_to_user_mode();
+}
diff --git a/arch/s390/kernel/syscalls/Makefile b/arch/s390/kernel/syscalls/Makefile
new file mode 100644
index 0000000000..fb85e79794
--- /dev/null
+++ b/arch/s390/kernel/syscalls/Makefile
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen	:= arch/$(ARCH)/include/generated
+kapi	:= $(gen)/asm
+uapi	:= $(gen)/uapi/asm
+
+syscall	:= $(srctree)/$(src)/syscall.tbl
+systbl	:= $(srctree)/$(src)/syscalltbl
+
+gen-y := $(kapi)/syscall_table.h
+kapi-hdrs-y := $(kapi)/unistd_nr.h
+uapi-hdrs-y := $(uapi)/unistd_32.h
+uapi-hdrs-y += $(uapi)/unistd_64.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y) $(uapi-hdrs-y))
+
+PHONY += kapi uapi
+
+kapi:	$(gen-y) $(kapi-hdrs-y)
+uapi:	$(uapi-hdrs-y)
+
+
+# Create output directory if not already present
+$(shell mkdir -p $(uapi) $(kapi))
+
+filechk_syshdr = $(CONFIG_SHELL) '$(systbl)' -H -a $(syshdr_abi_$(basetarget)) -f "$2" < $<
+
+filechk_sysnr = $(CONFIG_SHELL) '$(systbl)' -N -a $(sysnr_abi_$(basetarget)) < $<
+
+filechk_syscalls = $(CONFIG_SHELL) '$(systbl)' -S < $<
+
+syshdr_abi_unistd_32 := common,32
+$(uapi)/unistd_32.h: $(syscall) FORCE
+	$(call filechk,syshdr,$@)
+
+syshdr_abi_unistd_64 := common,64
+$(uapi)/unistd_64.h: $(syscall) FORCE
+	$(call filechk,syshdr,$@)
+
+$(kapi)/syscall_table.h: $(syscall) FORCE
+	$(call filechk,syscalls)
+
+sysnr_abi_unistd_nr := common,32,64
+$(kapi)/unistd_nr.h: $(syscall) FORCE
+	$(call filechk,sysnr)
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000000..0122cc1569
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -0,0 +1,457 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1    common	exit			sys_exit			sys_exit
+2    common	fork			sys_fork			sys_fork
+3    common	read			sys_read			compat_sys_s390_read
+4    common	write			sys_write			compat_sys_s390_write
+5    common	open			sys_open			compat_sys_open
+6    common	close			sys_close			sys_close
+7    common	restart_syscall		sys_restart_syscall		sys_restart_syscall
+8    common	creat			sys_creat			sys_creat
+9    common	link			sys_link			sys_link
+10   common	unlink			sys_unlink			sys_unlink
+11   common	execve			sys_execve			compat_sys_execve
+12   common	chdir			sys_chdir			sys_chdir
+13   32		time			-				sys_time32
+14   common	mknod			sys_mknod			sys_mknod
+15   common	chmod			sys_chmod			sys_chmod
+16   32		lchown			-				sys_lchown16
+19   common	lseek			sys_lseek			compat_sys_lseek
+20   common	getpid			sys_getpid			sys_getpid
+21   common	mount			sys_mount			sys_mount
+22   common	umount			sys_oldumount			sys_oldumount
+23   32		setuid			-				sys_setuid16
+24   32		getuid			-				sys_getuid16
+25   32		stime			-				sys_stime32
+26   common	ptrace			sys_ptrace			compat_sys_ptrace
+27   common	alarm			sys_alarm			sys_alarm
+29   common	pause			sys_pause			sys_pause
+30   common	utime			sys_utime			sys_utime32
+33   common	access			sys_access			sys_access
+34   common	nice			sys_nice			sys_nice
+36   common	sync			sys_sync			sys_sync
+37   common	kill			sys_kill			sys_kill
+38   common	rename			sys_rename			sys_rename
+39   common	mkdir			sys_mkdir			sys_mkdir
+40   common	rmdir			sys_rmdir			sys_rmdir
+41   common	dup			sys_dup				sys_dup
+42   common	pipe			sys_pipe			sys_pipe
+43   common	times			sys_times			compat_sys_times
+45   common	brk			sys_brk				sys_brk
+46   32		setgid			-				sys_setgid16
+47   32		getgid			-				sys_getgid16
+48   common	signal			sys_signal			sys_signal
+49   32		geteuid			-				sys_geteuid16
+50   32		getegid			-				sys_getegid16
+51   common	acct			sys_acct			sys_acct
+52   common	umount2			sys_umount			sys_umount
+54   common	ioctl			sys_ioctl			compat_sys_ioctl
+55   common	fcntl			sys_fcntl			compat_sys_fcntl
+57   common	setpgid			sys_setpgid			sys_setpgid
+60   common	umask			sys_umask			sys_umask
+61   common	chroot			sys_chroot			sys_chroot
+62   common	ustat			sys_ustat			compat_sys_ustat
+63   common	dup2			sys_dup2			sys_dup2
+64   common	getppid			sys_getppid			sys_getppid
+65   common	getpgrp			sys_getpgrp			sys_getpgrp
+66   common	setsid			sys_setsid			sys_setsid
+67   common	sigaction		sys_sigaction			compat_sys_sigaction
+70   32		setreuid		-				sys_setreuid16
+71   32		setregid		-				sys_setregid16
+72   common	sigsuspend		sys_sigsuspend			sys_sigsuspend
+73   common	sigpending		sys_sigpending			compat_sys_sigpending
+74   common	sethostname		sys_sethostname			sys_sethostname
+75   common	setrlimit		sys_setrlimit			compat_sys_setrlimit
+76   32		getrlimit		-				compat_sys_old_getrlimit
+77   common	getrusage		sys_getrusage			compat_sys_getrusage
+78   common	gettimeofday		sys_gettimeofday		compat_sys_gettimeofday
+79   common	settimeofday		sys_settimeofday		compat_sys_settimeofday
+80   32		getgroups		-				sys_getgroups16
+81   32		setgroups		-				sys_setgroups16
+83   common	symlink			sys_symlink			sys_symlink
+85   common	readlink		sys_readlink			sys_readlink
+86   common	uselib			sys_uselib			sys_uselib
+87   common	swapon			sys_swapon			sys_swapon
+88   common	reboot			sys_reboot			sys_reboot
+89   common	readdir			-				compat_sys_old_readdir
+90   common	mmap			sys_old_mmap			compat_sys_s390_old_mmap
+91   common	munmap			sys_munmap			sys_munmap
+92   common	truncate		sys_truncate			compat_sys_truncate
+93   common	ftruncate		sys_ftruncate			compat_sys_ftruncate
+94   common	fchmod			sys_fchmod			sys_fchmod
+95   32		fchown			-				sys_fchown16
+96   common	getpriority		sys_getpriority			sys_getpriority
+97   common	setpriority		sys_setpriority			sys_setpriority
+99   common	statfs			sys_statfs			compat_sys_statfs
+100  common	fstatfs			sys_fstatfs			compat_sys_fstatfs
+101  32		ioperm			-				-
+102  common	socketcall		sys_socketcall			compat_sys_socketcall
+103  common	syslog			sys_syslog			sys_syslog
+104  common	setitimer		sys_setitimer			compat_sys_setitimer
+105  common	getitimer		sys_getitimer			compat_sys_getitimer
+106  common	stat			sys_newstat			compat_sys_newstat
+107  common	lstat			sys_newlstat			compat_sys_newlstat
+108  common	fstat			sys_newfstat			compat_sys_newfstat
+110  common	lookup_dcookie		sys_lookup_dcookie		compat_sys_lookup_dcookie
+111  common	vhangup			sys_vhangup			sys_vhangup
+112  common	idle			-				-
+114  common	wait4			sys_wait4			compat_sys_wait4
+115  common	swapoff			sys_swapoff			sys_swapoff
+116  common	sysinfo			sys_sysinfo			compat_sys_sysinfo
+117  common	ipc			sys_s390_ipc			compat_sys_s390_ipc
+118  common	fsync			sys_fsync			sys_fsync
+119  common	sigreturn		sys_sigreturn			compat_sys_sigreturn
+120  common	clone			sys_clone			sys_clone
+121  common	setdomainname		sys_setdomainname		sys_setdomainname
+122  common	uname			sys_newuname			sys_newuname
+124  common	adjtimex		sys_adjtimex			sys_adjtimex_time32
+125  common	mprotect		sys_mprotect			sys_mprotect
+126  common	sigprocmask		sys_sigprocmask			compat_sys_sigprocmask
+127  common	create_module		-				-
+128  common	init_module		sys_init_module			sys_init_module
+129  common	delete_module		sys_delete_module		sys_delete_module
+130  common	get_kernel_syms		-				-
+131  common	quotactl		sys_quotactl			sys_quotactl
+132  common	getpgid			sys_getpgid			sys_getpgid
+133  common	fchdir			sys_fchdir			sys_fchdir
+134  common	bdflush			sys_ni_syscall			sys_ni_syscall
+135  common	sysfs			sys_sysfs			sys_sysfs
+136  common	personality		sys_s390_personality		sys_s390_personality
+137  common	afs_syscall		-				-
+138  32		setfsuid		-				sys_setfsuid16
+139  32		setfsgid		-				sys_setfsgid16
+140  32		_llseek			-				sys_llseek
+141  common	getdents		sys_getdents			compat_sys_getdents
+142  32		_newselect		-				compat_sys_select
+142  64		select			sys_select			-
+143  common	flock			sys_flock			sys_flock
+144  common	msync			sys_msync			sys_msync
+145  common	readv			sys_readv			sys_readv
+146  common	writev			sys_writev			sys_writev
+147  common	getsid			sys_getsid			sys_getsid
+148  common	fdatasync		sys_fdatasync			sys_fdatasync
+149  common	_sysctl			-				-
+150  common	mlock			sys_mlock			sys_mlock
+151  common	munlock			sys_munlock			sys_munlock
+152  common	mlockall		sys_mlockall			sys_mlockall
+153  common	munlockall		sys_munlockall			sys_munlockall
+154  common	sched_setparam		sys_sched_setparam		sys_sched_setparam
+155  common	sched_getparam		sys_sched_getparam		sys_sched_getparam
+156  common	sched_setscheduler	sys_sched_setscheduler		sys_sched_setscheduler
+157  common	sched_getscheduler	sys_sched_getscheduler		sys_sched_getscheduler
+158  common	sched_yield		sys_sched_yield			sys_sched_yield
+159  common	sched_get_priority_max	sys_sched_get_priority_max	sys_sched_get_priority_max
+160  common	sched_get_priority_min	sys_sched_get_priority_min	sys_sched_get_priority_min
+161  common	sched_rr_get_interval	sys_sched_rr_get_interval	sys_sched_rr_get_interval_time32
+162  common	nanosleep		sys_nanosleep			sys_nanosleep_time32
+163  common	mremap			sys_mremap			sys_mremap
+164  32		setresuid		-				sys_setresuid16
+165  32		getresuid		-				sys_getresuid16
+167  common	query_module		-				-
+168  common	poll			sys_poll			sys_poll
+169  common	nfsservctl		-				-
+170  32		setresgid		-				sys_setresgid16
+171  32		getresgid		-				sys_getresgid16
+172  common	prctl			sys_prctl			sys_prctl
+173  common	rt_sigreturn		sys_rt_sigreturn		compat_sys_rt_sigreturn
+174  common	rt_sigaction		sys_rt_sigaction		compat_sys_rt_sigaction
+175  common	rt_sigprocmask		sys_rt_sigprocmask		compat_sys_rt_sigprocmask
+176  common	rt_sigpending		sys_rt_sigpending		compat_sys_rt_sigpending
+177  common	rt_sigtimedwait		sys_rt_sigtimedwait		compat_sys_rt_sigtimedwait_time32
+178  common	rt_sigqueueinfo		sys_rt_sigqueueinfo		compat_sys_rt_sigqueueinfo
+179  common	rt_sigsuspend		sys_rt_sigsuspend		compat_sys_rt_sigsuspend
+180  common	pread64			sys_pread64			compat_sys_s390_pread64
+181  common	pwrite64		sys_pwrite64			compat_sys_s390_pwrite64
+182  32		chown			-				sys_chown16
+183  common	getcwd			sys_getcwd			sys_getcwd
+184  common	capget			sys_capget			sys_capget
+185  common	capset			sys_capset			sys_capset
+186  common	sigaltstack		sys_sigaltstack			compat_sys_sigaltstack
+187  common	sendfile		sys_sendfile64			compat_sys_sendfile
+188  common	getpmsg			-				-
+189  common	putpmsg			-				-
+190  common	vfork			sys_vfork			sys_vfork
+191  32		ugetrlimit		-				compat_sys_getrlimit
+191  64		getrlimit		sys_getrlimit			-
+192  32		mmap2			-				compat_sys_s390_mmap2
+193  32		truncate64		-				compat_sys_s390_truncate64
+194  32		ftruncate64		-				compat_sys_s390_ftruncate64
+195  32		stat64			-				compat_sys_s390_stat64
+196  32		lstat64			-				compat_sys_s390_lstat64
+197  32		fstat64			-				compat_sys_s390_fstat64
+198  32		lchown32		-				sys_lchown
+198  64		lchown			sys_lchown			-
+199  32		getuid32		-				sys_getuid
+199  64		getuid			sys_getuid			-
+200  32		getgid32		-				sys_getgid
+200  64		getgid			sys_getgid			-
+201  32		geteuid32		-				sys_geteuid
+201  64		geteuid			sys_geteuid			-
+202  32		getegid32		-				sys_getegid
+202  64		getegid			sys_getegid			-
+203  32		setreuid32		-				sys_setreuid
+203  64		setreuid		sys_setreuid			-
+204  32		setregid32		-				sys_setregid
+204  64		setregid		sys_setregid			-
+205  32		getgroups32		-				sys_getgroups
+205  64		getgroups		sys_getgroups			-
+206  32		setgroups32		-				sys_setgroups
+206  64		setgroups		sys_setgroups			-
+207  32		fchown32		-				sys_fchown
+207  64		fchown			sys_fchown			-
+208  32		setresuid32		-				sys_setresuid
+208  64		setresuid		sys_setresuid			-
+209  32		getresuid32		-				sys_getresuid
+209  64		getresuid		sys_getresuid			-
+210  32		setresgid32		-				sys_setresgid
+210  64		setresgid		sys_setresgid			-
+211  32		getresgid32		-				sys_getresgid
+211  64		getresgid		sys_getresgid			-
+212  32		chown32			-				sys_chown
+212  64		chown			sys_chown			-
+213  32		setuid32		-				sys_setuid
+213  64		setuid			sys_setuid			-
+214  32		setgid32		-				sys_setgid
+214  64		setgid			sys_setgid			-
+215  32		setfsuid32		-				sys_setfsuid
+215  64		setfsuid		sys_setfsuid			-
+216  32		setfsgid32		-				sys_setfsgid
+216  64		setfsgid		sys_setfsgid			-
+217  common	pivot_root		sys_pivot_root			sys_pivot_root
+218  common	mincore			sys_mincore			sys_mincore
+219  common	madvise			sys_madvise			sys_madvise
+220  common	getdents64		sys_getdents64			sys_getdents64
+221  32		fcntl64			-				compat_sys_fcntl64
+222  common	readahead		sys_readahead			compat_sys_s390_readahead
+223  32		sendfile64		-				compat_sys_sendfile64
+224  common	setxattr		sys_setxattr			sys_setxattr
+225  common	lsetxattr		sys_lsetxattr			sys_lsetxattr
+226  common	fsetxattr		sys_fsetxattr			sys_fsetxattr
+227  common	getxattr		sys_getxattr			sys_getxattr
+228  common	lgetxattr		sys_lgetxattr			sys_lgetxattr
+229  common	fgetxattr		sys_fgetxattr			sys_fgetxattr
+230  common	listxattr		sys_listxattr			sys_listxattr
+231  common	llistxattr		sys_llistxattr			sys_llistxattr
+232  common	flistxattr		sys_flistxattr			sys_flistxattr
+233  common	removexattr		sys_removexattr			sys_removexattr
+234  common	lremovexattr		sys_lremovexattr		sys_lremovexattr
+235  common	fremovexattr		sys_fremovexattr		sys_fremovexattr
+236  common	gettid			sys_gettid			sys_gettid
+237  common	tkill			sys_tkill			sys_tkill
+238  common	futex			sys_futex			sys_futex_time32
+239  common	sched_setaffinity	sys_sched_setaffinity		compat_sys_sched_setaffinity
+240  common	sched_getaffinity	sys_sched_getaffinity		compat_sys_sched_getaffinity
+241  common	tgkill			sys_tgkill			sys_tgkill
+243  common	io_setup		sys_io_setup			compat_sys_io_setup
+244  common	io_destroy		sys_io_destroy			sys_io_destroy
+245  common	io_getevents		sys_io_getevents		sys_io_getevents_time32
+246  common	io_submit		sys_io_submit			compat_sys_io_submit
+247  common	io_cancel		sys_io_cancel			sys_io_cancel
+248  common	exit_group		sys_exit_group			sys_exit_group
+249  common	epoll_create		sys_epoll_create		sys_epoll_create
+250  common	epoll_ctl		sys_epoll_ctl			sys_epoll_ctl
+251  common	epoll_wait		sys_epoll_wait			sys_epoll_wait
+252  common	set_tid_address		sys_set_tid_address		sys_set_tid_address
+253  common	fadvise64		sys_fadvise64_64		compat_sys_s390_fadvise64
+254  common	timer_create		sys_timer_create		compat_sys_timer_create
+255  common	timer_settime		sys_timer_settime		sys_timer_settime32
+256  common	timer_gettime		sys_timer_gettime		sys_timer_gettime32
+257  common	timer_getoverrun	sys_timer_getoverrun		sys_timer_getoverrun
+258  common	timer_delete		sys_timer_delete		sys_timer_delete
+259  common	clock_settime		sys_clock_settime		sys_clock_settime32
+260  common	clock_gettime		sys_clock_gettime		sys_clock_gettime32
+261  common	clock_getres		sys_clock_getres		sys_clock_getres_time32
+262  common	clock_nanosleep		sys_clock_nanosleep		sys_clock_nanosleep_time32
+264  32		fadvise64_64		-				compat_sys_s390_fadvise64_64
+265  common	statfs64		sys_statfs64			compat_sys_statfs64
+266  common	fstatfs64		sys_fstatfs64			compat_sys_fstatfs64
+267  common	remap_file_pages	sys_remap_file_pages		sys_remap_file_pages
+268  common	mbind			sys_mbind			sys_mbind
+269  common	get_mempolicy		sys_get_mempolicy		sys_get_mempolicy
+270  common	set_mempolicy		sys_set_mempolicy		sys_set_mempolicy
+271  common	mq_open			sys_mq_open			compat_sys_mq_open
+272  common	mq_unlink		sys_mq_unlink			sys_mq_unlink
+273  common	mq_timedsend		sys_mq_timedsend		sys_mq_timedsend_time32
+274  common	mq_timedreceive		sys_mq_timedreceive		sys_mq_timedreceive_time32
+275  common	mq_notify		sys_mq_notify			compat_sys_mq_notify
+276  common	mq_getsetattr		sys_mq_getsetattr		compat_sys_mq_getsetattr
+277  common	kexec_load		sys_kexec_load			compat_sys_kexec_load
+278  common	add_key			sys_add_key			sys_add_key
+279  common	request_key		sys_request_key			sys_request_key
+280  common	keyctl			sys_keyctl			compat_sys_keyctl
+281  common	waitid			sys_waitid			compat_sys_waitid
+282  common	ioprio_set		sys_ioprio_set			sys_ioprio_set
+283  common	ioprio_get		sys_ioprio_get			sys_ioprio_get
+284  common	inotify_init		sys_inotify_init		sys_inotify_init
+285  common	inotify_add_watch	sys_inotify_add_watch		sys_inotify_add_watch
+286  common	inotify_rm_watch	sys_inotify_rm_watch		sys_inotify_rm_watch
+287  common	migrate_pages		sys_migrate_pages		sys_migrate_pages
+288  common	openat			sys_openat			compat_sys_openat
+289  common	mkdirat			sys_mkdirat			sys_mkdirat
+290  common	mknodat			sys_mknodat			sys_mknodat
+291  common	fchownat		sys_fchownat			sys_fchownat
+292  common	futimesat		sys_futimesat			sys_futimesat_time32
+293  32		fstatat64		-				compat_sys_s390_fstatat64
+293  64		newfstatat		sys_newfstatat			-
+294  common	unlinkat		sys_unlinkat			sys_unlinkat
+295  common	renameat		sys_renameat			sys_renameat
+296  common	linkat			sys_linkat			sys_linkat
+297  common	symlinkat		sys_symlinkat			sys_symlinkat
+298  common	readlinkat		sys_readlinkat			sys_readlinkat
+299  common	fchmodat		sys_fchmodat			sys_fchmodat
+300  common	faccessat		sys_faccessat			sys_faccessat
+301  common	pselect6		sys_pselect6			compat_sys_pselect6_time32
+302  common	ppoll			sys_ppoll			compat_sys_ppoll_time32
+303  common	unshare			sys_unshare			sys_unshare
+304  common	set_robust_list		sys_set_robust_list		compat_sys_set_robust_list
+305  common	get_robust_list		sys_get_robust_list		compat_sys_get_robust_list
+306  common	splice			sys_splice			sys_splice
+307  common	sync_file_range		sys_sync_file_range		compat_sys_s390_sync_file_range
+308  common	tee			sys_tee				sys_tee
+309  common	vmsplice		sys_vmsplice			sys_vmsplice
+310  common	move_pages		sys_move_pages			sys_move_pages
+311  common	getcpu			sys_getcpu			sys_getcpu
+312  common	epoll_pwait		sys_epoll_pwait			compat_sys_epoll_pwait
+313  common	utimes			sys_utimes			sys_utimes_time32
+314  common	fallocate		sys_fallocate			compat_sys_s390_fallocate
+315  common	utimensat		sys_utimensat			sys_utimensat_time32
+316  common	signalfd		sys_signalfd			compat_sys_signalfd
+317  common	timerfd			-				-
+318  common	eventfd			sys_eventfd			sys_eventfd
+319  common	timerfd_create		sys_timerfd_create		sys_timerfd_create
+320  common	timerfd_settime		sys_timerfd_settime		sys_timerfd_settime32
+321  common	timerfd_gettime		sys_timerfd_gettime		sys_timerfd_gettime32
+322  common	signalfd4		sys_signalfd4			compat_sys_signalfd4
+323  common	eventfd2		sys_eventfd2			sys_eventfd2
+324  common	inotify_init1		sys_inotify_init1		sys_inotify_init1
+325  common	pipe2			sys_pipe2			sys_pipe2
+326  common	dup3			sys_dup3			sys_dup3
+327  common	epoll_create1		sys_epoll_create1		sys_epoll_create1
+328  common	preadv			sys_preadv			compat_sys_preadv
+329  common	pwritev			sys_pwritev			compat_sys_pwritev
+330  common	rt_tgsigqueueinfo	sys_rt_tgsigqueueinfo		compat_sys_rt_tgsigqueueinfo
+331  common	perf_event_open		sys_perf_event_open		sys_perf_event_open
+332  common	fanotify_init		sys_fanotify_init		sys_fanotify_init
+333  common	fanotify_mark		sys_fanotify_mark		compat_sys_fanotify_mark
+334  common	prlimit64		sys_prlimit64			sys_prlimit64
+335  common	name_to_handle_at	sys_name_to_handle_at		sys_name_to_handle_at
+336  common	open_by_handle_at	sys_open_by_handle_at		compat_sys_open_by_handle_at
+337  common	clock_adjtime		sys_clock_adjtime		sys_clock_adjtime32
+338  common	syncfs			sys_syncfs			sys_syncfs
+339  common	setns			sys_setns			sys_setns
+340  common	process_vm_readv	sys_process_vm_readv		sys_process_vm_readv
+341  common	process_vm_writev	sys_process_vm_writev		sys_process_vm_writev
+342  common	s390_runtime_instr	sys_s390_runtime_instr		sys_s390_runtime_instr
+343  common	kcmp			sys_kcmp			sys_kcmp
+344  common	finit_module		sys_finit_module		sys_finit_module
+345  common	sched_setattr		sys_sched_setattr		sys_sched_setattr
+346  common	sched_getattr		sys_sched_getattr		sys_sched_getattr
+347  common	renameat2		sys_renameat2			sys_renameat2
+348  common	seccomp			sys_seccomp			sys_seccomp
+349  common	getrandom		sys_getrandom			sys_getrandom
+350  common	memfd_create		sys_memfd_create		sys_memfd_create
+351  common	bpf			sys_bpf				sys_bpf
+352  common	s390_pci_mmio_write	sys_s390_pci_mmio_write		sys_s390_pci_mmio_write
+353  common	s390_pci_mmio_read	sys_s390_pci_mmio_read		sys_s390_pci_mmio_read
+354  common	execveat		sys_execveat			compat_sys_execveat
+355  common	userfaultfd		sys_userfaultfd			sys_userfaultfd
+356  common	membarrier		sys_membarrier			sys_membarrier
+357  common	recvmmsg		sys_recvmmsg			compat_sys_recvmmsg_time32
+358  common	sendmmsg		sys_sendmmsg			compat_sys_sendmmsg
+359  common	socket			sys_socket			sys_socket
+360  common	socketpair		sys_socketpair			sys_socketpair
+361  common	bind			sys_bind			sys_bind
+362  common	connect			sys_connect			sys_connect
+363  common	listen			sys_listen			sys_listen
+364  common	accept4			sys_accept4			sys_accept4
+365  common	getsockopt		sys_getsockopt			sys_getsockopt
+366  common	setsockopt		sys_setsockopt			sys_setsockopt
+367  common	getsockname		sys_getsockname			sys_getsockname
+368  common	getpeername		sys_getpeername			sys_getpeername
+369  common	sendto			sys_sendto			sys_sendto
+370  common	sendmsg			sys_sendmsg			compat_sys_sendmsg
+371  common	recvfrom		sys_recvfrom			compat_sys_recvfrom
+372  common	recvmsg			sys_recvmsg			compat_sys_recvmsg
+373  common	shutdown		sys_shutdown			sys_shutdown
+374  common	mlock2			sys_mlock2			sys_mlock2
+375  common	copy_file_range		sys_copy_file_range		sys_copy_file_range
+376  common	preadv2			sys_preadv2			compat_sys_preadv2
+377  common	pwritev2		sys_pwritev2			compat_sys_pwritev2
+378  common	s390_guarded_storage	sys_s390_guarded_storage	sys_s390_guarded_storage
+379  common	statx			sys_statx			sys_statx
+380  common	s390_sthyi		sys_s390_sthyi			sys_s390_sthyi
+381  common	kexec_file_load		sys_kexec_file_load		sys_kexec_file_load
+382  common	io_pgetevents		sys_io_pgetevents		compat_sys_io_pgetevents
+383  common	rseq			sys_rseq			sys_rseq
+384  common	pkey_mprotect		sys_pkey_mprotect		sys_pkey_mprotect
+385  common	pkey_alloc		sys_pkey_alloc			sys_pkey_alloc
+386  common	pkey_free		sys_pkey_free			sys_pkey_free
+# room for arch specific syscalls
+392	64	semtimedop		sys_semtimedop			-
+393  common	semget			sys_semget			sys_semget
+394  common	semctl			sys_semctl			compat_sys_semctl
+395  common	shmget			sys_shmget			sys_shmget
+396  common	shmctl			sys_shmctl			compat_sys_shmctl
+397  common	shmat			sys_shmat			compat_sys_shmat
+398  common	shmdt			sys_shmdt 			sys_shmdt
+399  common	msgget			sys_msgget			sys_msgget
+400  common	msgsnd			sys_msgsnd			compat_sys_msgsnd
+401  common	msgrcv			sys_msgrcv			compat_sys_msgrcv
+402  common	msgctl			sys_msgctl			compat_sys_msgctl
+403	32	clock_gettime64		-				sys_clock_gettime
+404	32	clock_settime64		-				sys_clock_settime
+405	32	clock_adjtime64		-				sys_clock_adjtime
+406	32	clock_getres_time64	-				sys_clock_getres
+407	32	clock_nanosleep_time64	-				sys_clock_nanosleep
+408	32	timer_gettime64		-				sys_timer_gettime
+409	32	timer_settime64		-				sys_timer_settime
+410	32	timerfd_gettime64	-				sys_timerfd_gettime
+411	32	timerfd_settime64	-				sys_timerfd_settime
+412	32	utimensat_time64	-				sys_utimensat
+413	32	pselect6_time64		-				compat_sys_pselect6_time64
+414	32	ppoll_time64		-				compat_sys_ppoll_time64
+416	32	io_pgetevents_time64	-				sys_io_pgetevents
+417	32	recvmmsg_time64		-				compat_sys_recvmmsg_time64
+418	32	mq_timedsend_time64	-				sys_mq_timedsend
+419	32	mq_timedreceive_time64	-				sys_mq_timedreceive
+420	32	semtimedop_time64	-				sys_semtimedop
+421	32	rt_sigtimedwait_time64	-				compat_sys_rt_sigtimedwait_time64
+422	32	futex_time64		-				sys_futex
+423	32	sched_rr_get_interval_time64	-			sys_sched_rr_get_interval
+424  common	pidfd_send_signal	sys_pidfd_send_signal		sys_pidfd_send_signal
+425  common	io_uring_setup		sys_io_uring_setup              sys_io_uring_setup
+426  common	io_uring_enter		sys_io_uring_enter              sys_io_uring_enter
+427  common	io_uring_register	sys_io_uring_register           sys_io_uring_register
+428  common	open_tree		sys_open_tree			sys_open_tree
+429  common	move_mount		sys_move_mount			sys_move_mount
+430  common	fsopen			sys_fsopen			sys_fsopen
+431  common	fsconfig		sys_fsconfig			sys_fsconfig
+432  common	fsmount			sys_fsmount			sys_fsmount
+433  common	fspick			sys_fspick			sys_fspick
+434  common	pidfd_open		sys_pidfd_open			sys_pidfd_open
+435  common	clone3			sys_clone3			sys_clone3
+436  common	close_range		sys_close_range			sys_close_range
+437  common	openat2			sys_openat2			sys_openat2
+438  common	pidfd_getfd		sys_pidfd_getfd			sys_pidfd_getfd
+439  common	faccessat2		sys_faccessat2			sys_faccessat2
+440  common	process_madvise		sys_process_madvise		sys_process_madvise
+441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
+442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
+443  common	quotactl_fd		sys_quotactl_fd			sys_quotactl_fd
+444  common	landlock_create_ruleset	sys_landlock_create_ruleset	sys_landlock_create_ruleset
+445  common	landlock_add_rule	sys_landlock_add_rule		sys_landlock_add_rule
+446  common	landlock_restrict_self	sys_landlock_restrict_self	sys_landlock_restrict_self
+447  common	memfd_secret		sys_memfd_secret		sys_memfd_secret
+448  common	process_mrelease	sys_process_mrelease		sys_process_mrelease
+449  common	futex_waitv		sys_futex_waitv			sys_futex_waitv
+450  common	set_mempolicy_home_node	sys_set_mempolicy_home_node	sys_set_mempolicy_home_node
+451  common	cachestat		sys_cachestat			sys_cachestat
+452  common	fchmodat2		sys_fchmodat2			sys_fchmodat2
diff --git a/arch/s390/kernel/syscalls/syscalltbl b/arch/s390/kernel/syscalls/syscalltbl
new file mode 100755
index 0000000000..fbac1732f8
--- /dev/null
+++ b/arch/s390/kernel/syscalls/syscalltbl
@@ -0,0 +1,232 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table and header files
+#
+# Copyright IBM Corp. 2018
+# Author(s):  Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+
+#
+# File path to the system call table definition.
+# You can set the path with the -i option.  If omitted,
+# system call table definitions are read from standard input.
+#
+SYSCALL_TBL=""
+
+
+create_syscall_table_entries()
+{
+	local nr abi name entry64 entry32 _ignore
+	local temp=$(mktemp ${TMPDIR:-/tmp}/syscalltbl-common.XXXXXXXXX)
+
+	(
+	#
+	# Initialize with 0 to create an NI_SYSCALL for 0
+	#
+	local prev_nr=0 prev_32=sys_ni_syscall prev_64=sys_ni_syscall
+	while read nr abi name entry64 entry32 _ignore; do
+		test x$entry32 = x- && entry32=sys_ni_syscall
+		test x$entry64 = x- && entry64=sys_ni_syscall
+
+		if test $prev_nr -eq $nr; then
+			#
+			# Same syscall but different ABI, just update
+			# the respective entry point
+			#
+			case $abi in
+			32)
+				prev_32=$entry32
+			;;
+			64)
+				prev_64=$entry64
+			;;
+			esac
+			continue;
+		else
+			printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+		fi
+
+		prev_nr=$nr
+		prev_64=$entry64
+		prev_32=$entry32
+	done
+	printf "%d\t%s\t%s\n" $prev_nr $prev_64 $prev_32
+	) >> $temp
+
+	#
+	# Check for duplicate syscall numbers
+	#
+	if ! cat $temp |cut -f1 |uniq -d 2>&1; then
+		echo "Error: generated system call table contains duplicate entries: $temp" >&2
+		exit 1
+	fi
+
+	#
+	# Generate syscall table
+	#
+	prev_nr=0
+	while read nr entry64 entry32; do
+		while test $prev_nr -lt $((nr - 1)); do
+			printf "NI_SYSCALL\n"
+			prev_nr=$((prev_nr + 1))
+		done
+		if test x$entry64 = xsys_ni_syscall &&
+		   test x$entry32 = xsys_ni_syscall; then
+			printf "NI_SYSCALL\n"
+		else
+			printf "SYSCALL(%s,%s)\n" $entry64 $entry32
+		fi
+		prev_nr=$nr
+	done < $temp
+	rm $temp
+}
+
+generate_syscall_table()
+{
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 */
+	/*
+	 * Definitions for sys_call_table, each line represents an
+	 * entry in the table in the form
+	 * SYSCALL(64 bit syscall, 31 bit emulated syscall)
+	 *
+	 * This file is meant to be included from entry.S.
+	 */
+
+	#define NI_SYSCALL SYSCALL(sys_ni_syscall,sys_ni_syscall)
+
+EoHEADER
+	grep -Ev '^(#|[[:blank:]]*$)' $SYSCALL_TBL	\
+		|sort -k1 -n				\
+		|create_syscall_table_entries
+}
+
+create_header_defines()
+{
+	local nr abi name _ignore
+
+	while read nr abi name _ignore; do
+		printf "#define __NR_%s %d\n" $name $nr
+	done
+}
+
+normalize_fileguard()
+{
+	local fileguard="$1"
+
+	echo "$1" |tr '[[:lower:]]' '[[:upper:]]' \
+		  |sed -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'
+}
+
+generate_syscall_header()
+{
+	local abis=$(echo "($1)" | tr ',' '|')
+	local filename="$2"
+	local fileguard suffix
+
+	if test "$filename"; then
+		fileguard=$(normalize_fileguard "__UAPI_ASM_S390_$2")
+	else
+		case "$abis" in
+		*64*) suffix=64 ;;
+		*32*) suffix=32 ;;
+		esac
+		fileguard=$(normalize_fileguard "__UAPI_ASM_S390_SYSCALLS_$suffix")
+	fi
+
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+	#ifndef ${fileguard}
+	#define ${fileguard}
+
+EoHEADER
+
+	grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL	\
+		|sort -k1 -n					\
+		|create_header_defines
+
+	cat <<-EoFOOTER
+
+	#endif /* ${fileguard} */
+EoFOOTER
+}
+
+__max_syscall_nr()
+{
+	local abis=$(echo "($1)" | tr ',' '|')
+
+	grep -E "^[[:digit:]]+[[:space:]]+${abis}" $SYSCALL_TBL	 \
+		|sed -ne 's/^\([[:digit:]]*\)[[:space:]].*/\1/p' \
+		|sort -n					 \
+		|tail -1
+}
+
+
+generate_syscall_nr()
+{
+	local abis="$1"
+	local max_syscall_nr num_syscalls
+
+	max_syscall_nr=$(__max_syscall_nr "$abis")
+	num_syscalls=$((max_syscall_nr + 1))
+
+	cat <<-EoHEADER
+	/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+	#ifndef __ASM_S390_SYSCALLS_NR
+	#define __ASM_S390_SYSCALLS_NR
+
+	#define NR_syscalls ${num_syscalls}
+
+	#endif /* __ASM_S390_SYSCALLS_NR */
+EoHEADER
+}
+
+
+#
+# Parse command line arguments
+#
+do_syscall_header=""
+do_syscall_table=""
+do_syscall_nr=""
+output_file=""
+abi_list="common,64"
+filename=""
+while getopts ":HNSXi:a:f:" arg; do
+	case $arg in
+	a)
+		abi_list="$OPTARG"
+		;;
+	i)
+		SYSCALL_TBL="$OPTARG"
+		;;
+	f)
+		filename=${OPTARG##*/}
+		;;
+	H)
+		do_syscall_header=1
+		;;
+	N)
+		do_syscall_nr=1
+		;;
+	S)
+		do_syscall_table=1
+		;;
+	X)
+		set -x
+		;;
+	:)
+		echo "Missing argument for -$OPTARG" >&2
+		exit 1
+	;;
+	\?)
+		echo "Invalid option specified" >&2
+		exit 1
+	;;
+	esac
+done
+
+test "$do_syscall_header" && generate_syscall_header "$abi_list" "$filename"
+test "$do_syscall_table" && generate_syscall_table
+test "$do_syscall_nr" && generate_syscall_nr "$abi_list"
+
+exit 0
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
new file mode 100644
index 0000000000..b5e364358c
--- /dev/null
+++ b/arch/s390/kernel/sysinfo.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright IBM Corp. 2001, 2009
+ *  Author(s): Ulrich Weigand <Ulrich.Weigand@de.ibm.com>,
+ *	       Martin Schwidefsky <schwidefsky@de.ibm.com>,
+ */
+
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <asm/asm-extable.h>
+#include <asm/ebcdic.h>
+#include <asm/debug.h>
+#include <asm/sysinfo.h>
+#include <asm/cpcmd.h>
+#include <asm/topology.h>
+#include <asm/fpu/api.h>
+
+int topology_max_mnest;
+
+static inline int __stsi(void *sysinfo, int fc, int sel1, int sel2, int *lvl)
+{
+	int r0 = (fc << 28) | sel1;
+	int rc = 0;
+
+	asm volatile(
+		"	lr	0,%[r0]\n"
+		"	lr	1,%[r1]\n"
+		"	stsi	0(%[sysinfo])\n"
+		"0:	jz	2f\n"
+		"1:	lhi	%[rc],%[retval]\n"
+		"2:	lr	%[r0],0\n"
+		EX_TABLE(0b, 1b)
+		: [r0] "+d" (r0), [rc] "+d" (rc)
+		: [r1] "d" (sel2),
+		  [sysinfo] "a" (sysinfo),
+		  [retval] "K" (-EOPNOTSUPP)
+		: "cc", "0", "1", "memory");
+	*lvl = ((unsigned int) r0) >> 28;
+	return rc;
+}
+
+/*
+ * stsi - store system information
+ *
+ * Returns the current configuration level if function code 0 was specified.
+ * Otherwise returns 0 on success or a negative value on error.
+ */
+int stsi(void *sysinfo, int fc, int sel1, int sel2)
+{
+	int lvl, rc;
+
+	rc = __stsi(sysinfo, fc, sel1, sel2, &lvl);
+	if (rc)
+		return rc;
+	return fc ? 0 : lvl;
+}
+EXPORT_SYMBOL(stsi);
+
+#ifdef CONFIG_PROC_FS
+
+static bool convert_ext_name(unsigned char encoding, char *name, size_t len)
+{
+	switch (encoding) {
+	case 1: /* EBCDIC */
+		EBCASC(name, len);
+		break;
+	case 2:	/* UTF-8 */
+		break;
+	default:
+		return false;
+	}
+	return true;
+}
+
+static void stsi_1_1_1(struct seq_file *m, struct sysinfo_1_1_1 *info)
+{
+	int i;
+
+	if (stsi(info, 1, 1, 1))
+		return;
+	EBCASC(info->manufacturer, sizeof(info->manufacturer));
+	EBCASC(info->type, sizeof(info->type));
+	EBCASC(info->model, sizeof(info->model));
+	EBCASC(info->sequence, sizeof(info->sequence));
+	EBCASC(info->plant, sizeof(info->plant));
+	EBCASC(info->model_capacity, sizeof(info->model_capacity));
+	EBCASC(info->model_perm_cap, sizeof(info->model_perm_cap));
+	EBCASC(info->model_temp_cap, sizeof(info->model_temp_cap));
+	seq_printf(m, "Manufacturer:         %-16.16s\n", info->manufacturer);
+	seq_printf(m, "Type:                 %-4.4s\n", info->type);
+	if (info->lic)
+		seq_printf(m, "LIC Identifier:       %016lx\n", info->lic);
+	/*
+	 * Sigh: the model field has been renamed with System z9
+	 * to model_capacity and a new model field has been added
+	 * after the plant field. To avoid confusing older programs
+	 * the "Model:" prints "model_capacity model" or just
+	 * "model_capacity" if the model string is empty .
+	 */
+	seq_printf(m, "Model:                %-16.16s", info->model_capacity);
+	if (info->model[0] != '\0')
+		seq_printf(m, " %-16.16s", info->model);
+	seq_putc(m, '\n');
+	seq_printf(m, "Sequence Code:        %-16.16s\n", info->sequence);
+	seq_printf(m, "Plant:                %-4.4s\n", info->plant);
+	seq_printf(m, "Model Capacity:       %-16.16s %08u\n",
+		   info->model_capacity, info->model_cap_rating);
+	if (info->model_perm_cap_rating)
+		seq_printf(m, "Model Perm. Capacity: %-16.16s %08u\n",
+			   info->model_perm_cap,
+			   info->model_perm_cap_rating);
+	if (info->model_temp_cap_rating)
+		seq_printf(m, "Model Temp. Capacity: %-16.16s %08u\n",
+			   info->model_temp_cap,
+			   info->model_temp_cap_rating);
+	if (info->ncr)
+		seq_printf(m, "Nominal Cap. Rating:  %08u\n", info->ncr);
+	if (info->npr)
+		seq_printf(m, "Nominal Perm. Rating: %08u\n", info->npr);
+	if (info->ntr)
+		seq_printf(m, "Nominal Temp. Rating: %08u\n", info->ntr);
+	if (info->cai) {
+		seq_printf(m, "Capacity Adj. Ind.:   %d\n", info->cai);
+		seq_printf(m, "Capacity Ch. Reason:  %d\n", info->ccr);
+		seq_printf(m, "Capacity Transient:   %d\n", info->t);
+	}
+	if (info->p) {
+		for (i = 1; i <= ARRAY_SIZE(info->typepct); i++) {
+			seq_printf(m, "Type %d Percentage:    %d\n",
+				   i, info->typepct[i - 1]);
+		}
+	}
+}
+
+static void stsi_15_1_x(struct seq_file *m, struct sysinfo_15_1_x *info)
+{
+	int i;
+
+	seq_putc(m, '\n');
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	if (stsi(info, 15, 1, topology_max_mnest))
+		return;
+	seq_printf(m, "CPU Topology HW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#ifdef CONFIG_SCHED_TOPOLOGY
+	store_topology(info);
+	seq_printf(m, "CPU Topology SW:     ");
+	for (i = 0; i < TOPOLOGY_NR_MAG; i++)
+		seq_printf(m, " %d", info->mag[i]);
+	seq_putc(m, '\n');
+#endif
+}
+
+static void stsi_1_2_2(struct seq_file *m, struct sysinfo_1_2_2 *info)
+{
+	struct sysinfo_1_2_2_extension *ext;
+	int i;
+
+	if (stsi(info, 1, 2, 2))
+		return;
+	ext = (struct sysinfo_1_2_2_extension *)
+		((unsigned long) info + info->acc_offset);
+	seq_printf(m, "CPUs Total:           %d\n", info->cpus_total);
+	seq_printf(m, "CPUs Configured:      %d\n", info->cpus_configured);
+	seq_printf(m, "CPUs Standby:         %d\n", info->cpus_standby);
+	seq_printf(m, "CPUs Reserved:        %d\n", info->cpus_reserved);
+	if (info->mt_installed) {
+		seq_printf(m, "CPUs G-MTID:          %d\n", info->mt_gtid);
+		seq_printf(m, "CPUs S-MTID:          %d\n", info->mt_stid);
+	}
+	/*
+	 * Sigh 2. According to the specification the alternate
+	 * capability field is a 32 bit floating point number
+	 * if the higher order 8 bits are not zero. Printing
+	 * a floating point number in the kernel is a no-no,
+	 * always print the number as 32 bit unsigned integer.
+	 * The user-space needs to know about the strange
+	 * encoding of the alternate cpu capability.
+	 */
+	seq_printf(m, "Capability:           %u", info->capability);
+	if (info->format == 1)
+		seq_printf(m, " %u", ext->alt_capability);
+	seq_putc(m, '\n');
+	if (info->nominal_cap)
+		seq_printf(m, "Nominal Capability:   %d\n", info->nominal_cap);
+	if (info->secondary_cap)
+		seq_printf(m, "Secondary Capability: %d\n", info->secondary_cap);
+	for (i = 2; i <= info->cpus_total; i++) {
+		seq_printf(m, "Adjustment %02d-way:    %u",
+			   i, info->adjustment[i-2]);
+		if (info->format == 1)
+			seq_printf(m, " %u", ext->alt_adjustment[i-2]);
+		seq_putc(m, '\n');
+	}
+}
+
+static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info)
+{
+	if (stsi(info, 2, 2, 2))
+		return;
+	EBCASC(info->name, sizeof(info->name));
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Number:          %d\n", info->lpar_number);
+	seq_printf(m, "LPAR Characteristics: ");
+	if (info->characteristics & LPAR_CHAR_DEDICATED)
+		seq_printf(m, "Dedicated ");
+	if (info->characteristics & LPAR_CHAR_SHARED)
+		seq_printf(m, "Shared ");
+	if (info->characteristics & LPAR_CHAR_LIMITED)
+		seq_printf(m, "Limited ");
+	seq_putc(m, '\n');
+	seq_printf(m, "LPAR Name:            %-8.8s\n", info->name);
+	seq_printf(m, "LPAR Adjustment:      %d\n", info->caf);
+	seq_printf(m, "LPAR CPUs Total:      %d\n", info->cpus_total);
+	seq_printf(m, "LPAR CPUs Configured: %d\n", info->cpus_configured);
+	seq_printf(m, "LPAR CPUs Standby:    %d\n", info->cpus_standby);
+	seq_printf(m, "LPAR CPUs Reserved:   %d\n", info->cpus_reserved);
+	seq_printf(m, "LPAR CPUs Dedicated:  %d\n", info->cpus_dedicated);
+	seq_printf(m, "LPAR CPUs Shared:     %d\n", info->cpus_shared);
+	if (info->mt_installed) {
+		seq_printf(m, "LPAR CPUs G-MTID:     %d\n", info->mt_gtid);
+		seq_printf(m, "LPAR CPUs S-MTID:     %d\n", info->mt_stid);
+		seq_printf(m, "LPAR CPUs PS-MTID:    %d\n", info->mt_psmtid);
+	}
+	if (convert_ext_name(info->vsne, info->ext_name, sizeof(info->ext_name))) {
+		seq_printf(m, "LPAR Extended Name:   %-.256s\n", info->ext_name);
+		seq_printf(m, "LPAR UUID:            %pUb\n", &info->uuid);
+	}
+}
+
+static void print_ext_name(struct seq_file *m, int lvl,
+			   struct sysinfo_3_2_2 *info)
+{
+	size_t len = sizeof(info->ext_names[lvl]);
+
+	if (!convert_ext_name(info->vm[lvl].evmne, info->ext_names[lvl], len))
+		return;
+	seq_printf(m, "VM%02d Extended Name:   %-.256s\n", lvl,
+		   info->ext_names[lvl]);
+}
+
+static void print_uuid(struct seq_file *m, int i, struct sysinfo_3_2_2 *info)
+{
+	if (uuid_is_null(&info->vm[i].uuid))
+		return;
+	seq_printf(m, "VM%02d UUID:            %pUb\n", i, &info->vm[i].uuid);
+}
+
+static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info)
+{
+	int i;
+
+	if (stsi(info, 3, 2, 2))
+		return;
+	for (i = 0; i < info->count; i++) {
+		EBCASC(info->vm[i].name, sizeof(info->vm[i].name));
+		EBCASC(info->vm[i].cpi, sizeof(info->vm[i].cpi));
+		seq_putc(m, '\n');
+		seq_printf(m, "VM%02d Name:            %-8.8s\n", i, info->vm[i].name);
+		seq_printf(m, "VM%02d Control Program: %-16.16s\n", i, info->vm[i].cpi);
+		seq_printf(m, "VM%02d Adjustment:      %d\n", i, info->vm[i].caf);
+		seq_printf(m, "VM%02d CPUs Total:      %d\n", i, info->vm[i].cpus_total);
+		seq_printf(m, "VM%02d CPUs Configured: %d\n", i, info->vm[i].cpus_configured);
+		seq_printf(m, "VM%02d CPUs Standby:    %d\n", i, info->vm[i].cpus_standby);
+		seq_printf(m, "VM%02d CPUs Reserved:   %d\n", i, info->vm[i].cpus_reserved);
+		print_ext_name(m, i, info);
+		print_uuid(m, i, info);
+	}
+}
+
+static int sysinfo_show(struct seq_file *m, void *v)
+{
+	void *info = (void *)get_zeroed_page(GFP_KERNEL);
+	int level;
+
+	if (!info)
+		return 0;
+	level = stsi(NULL, 0, 0, 0);
+	if (level >= 1)
+		stsi_1_1_1(m, info);
+	if (level >= 1)
+		stsi_15_1_x(m, info);
+	if (level >= 1)
+		stsi_1_2_2(m, info);
+	if (level >= 2)
+		stsi_2_2_2(m, info);
+	if (level >= 3)
+		stsi_3_2_2(m, info);
+	free_page((unsigned long)info);
+	return 0;
+}
+
+static int __init sysinfo_create_proc(void)
+{
+	proc_create_single("sysinfo", 0444, NULL, sysinfo_show);
+	return 0;
+}
+device_initcall(sysinfo_create_proc);
+
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Service levels interface.
+ */
+
+static DECLARE_RWSEM(service_level_sem);
+static LIST_HEAD(service_level_list);
+
+int register_service_level(struct service_level *slr)
+{
+	struct service_level *ptr;
+
+	down_write(&service_level_sem);
+	list_for_each_entry(ptr, &service_level_list, list)
+		if (ptr == slr) {
+			up_write(&service_level_sem);
+			return -EEXIST;
+		}
+	list_add_tail(&slr->list, &service_level_list);
+	up_write(&service_level_sem);
+	return 0;
+}
+EXPORT_SYMBOL(register_service_level);
+
+int unregister_service_level(struct service_level *slr)
+{
+	struct service_level *ptr, *next;
+	int rc = -ENOENT;
+
+	down_write(&service_level_sem);
+	list_for_each_entry_safe(ptr, next, &service_level_list, list) {
+		if (ptr != slr)
+			continue;
+		list_del(&ptr->list);
+		rc = 0;
+		break;
+	}
+	up_write(&service_level_sem);
+	return rc;
+}
+EXPORT_SYMBOL(unregister_service_level);
+
+static void *service_level_start(struct seq_file *m, loff_t *pos)
+{
+	down_read(&service_level_sem);
+	return seq_list_start(&service_level_list, *pos);
+}
+
+static void *service_level_next(struct seq_file *m, void *p, loff_t *pos)
+{
+	return seq_list_next(p, &service_level_list, pos);
+}
+
+static void service_level_stop(struct seq_file *m, void *p)
+{
+	up_read(&service_level_sem);
+}
+
+static int service_level_show(struct seq_file *m, void *p)
+{
+	struct service_level *slr;
+
+	slr = list_entry(p, struct service_level, list);
+	slr->seq_print(m, slr);
+	return 0;
+}
+
+static const struct seq_operations service_level_seq_ops = {
+	.start		= service_level_start,
+	.next		= service_level_next,
+	.stop		= service_level_stop,
+	.show		= service_level_show
+};
+
+static void service_level_vm_print(struct seq_file *m,
+				   struct service_level *slr)
+{
+	char *query_buffer, *str;
+
+	query_buffer = kmalloc(1024, GFP_KERNEL | GFP_DMA);
+	if (!query_buffer)
+		return;
+	cpcmd("QUERY CPLEVEL", query_buffer, 1024, NULL);
+	str = strchr(query_buffer, '\n');
+	if (str)
+		*str = 0;
+	seq_printf(m, "VM: %s\n", query_buffer);
+	kfree(query_buffer);
+}
+
+static struct service_level service_level_vm = {
+	.seq_print = service_level_vm_print
+};
+
+static __init int create_proc_service_level(void)
+{
+	proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops);
+	if (MACHINE_IS_VM)
+		register_service_level(&service_level_vm);
+	return 0;
+}
+subsys_initcall(create_proc_service_level);
+
+/*
+ * CPU capability might have changed. Therefore recalculate loops_per_jiffy.
+ */
+void s390_adjust_jiffies(void)
+{
+	struct sysinfo_1_2_2 *info;
+	unsigned long capability;
+	struct kernel_fpu fpu;
+
+	info = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!info)
+		return;
+
+	if (stsi(info, 1, 2, 2) == 0) {
+		/*
+		 * Major sigh. The cpu capability encoding is "special".
+		 * If the first 9 bits of info->capability are 0 then it
+		 * is a 32 bit unsigned integer in the range 0 .. 2^23.
+		 * If the first 9 bits are != 0 then it is a 32 bit float.
+		 * In addition a lower value indicates a proportionally
+		 * higher cpu capacity. Bogomips are the other way round.
+		 * To get to a halfway suitable number we divide 1e7
+		 * by the cpu capability number. Yes, that means a floating
+		 * point division ..
+		 */
+		kernel_fpu_begin(&fpu, KERNEL_FPR);
+		asm volatile(
+			"	sfpc	%3\n"
+			"	l	%0,%1\n"
+			"	tmlh	%0,0xff80\n"
+			"	jnz	0f\n"
+			"	cefbr	%%f2,%0\n"
+			"	j	1f\n"
+			"0:	le	%%f2,%1\n"
+			"1:	cefbr	%%f0,%2\n"
+			"	debr	%%f0,%%f2\n"
+			"	cgebr	%0,5,%%f0\n"
+			: "=&d" (capability)
+			: "Q" (info->capability), "d" (10000000), "d" (0)
+			: "cc"
+			);
+		kernel_fpu_end(&fpu, KERNEL_FPR);
+	} else
+		/*
+		 * Really old machine without stsi block for basic
+		 * cpu information. Report 42.0 bogomips.
+		 */
+		capability = 42;
+	loops_per_jiffy = capability * (500000/HZ);
+	free_page((unsigned long) info);
+}
+
+/*
+ * calibrate the delay loop
+ */
+void calibrate_delay(void)
+{
+	s390_adjust_jiffies();
+	/* Print the good old Bogomips line .. */
+	printk(KERN_DEBUG "Calibrating delay loop (skipped)... "
+	       "%lu.%02lu BogoMIPS preset\n", loops_per_jiffy/(500000/HZ),
+	       (loops_per_jiffy/(5000/HZ)) % 100);
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+#define STSI_FILE(fc, s1, s2)						       \
+static int stsi_open_##fc##_##s1##_##s2(struct inode *inode, struct file *file)\
+{									       \
+	file->private_data = (void *) get_zeroed_page(GFP_KERNEL);	       \
+	if (!file->private_data)					       \
+		return -ENOMEM;						       \
+	if (stsi(file->private_data, fc, s1, s2)) {			       \
+		free_page((unsigned long)file->private_data);		       \
+		file->private_data = NULL;				       \
+		return -EACCES;						       \
+	}								       \
+	return nonseekable_open(inode, file);				       \
+}									       \
+									       \
+static const struct file_operations stsi_##fc##_##s1##_##s2##_fs_ops = {       \
+	.open		= stsi_open_##fc##_##s1##_##s2,			       \
+	.release	= stsi_release,					       \
+	.read		= stsi_read,					       \
+	.llseek		= no_llseek,					       \
+};
+
+static int stsi_release(struct inode *inode, struct file *file)
+{
+	free_page((unsigned long)file->private_data);
+	return 0;
+}
+
+static ssize_t stsi_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+	return simple_read_from_buffer(buf, size, ppos, file->private_data, PAGE_SIZE);
+}
+
+STSI_FILE( 1, 1, 1);
+STSI_FILE( 1, 2, 1);
+STSI_FILE( 1, 2, 2);
+STSI_FILE( 2, 2, 1);
+STSI_FILE( 2, 2, 2);
+STSI_FILE( 3, 2, 2);
+STSI_FILE(15, 1, 2);
+STSI_FILE(15, 1, 3);
+STSI_FILE(15, 1, 4);
+STSI_FILE(15, 1, 5);
+STSI_FILE(15, 1, 6);
+
+struct stsi_file {
+	const struct file_operations *fops;
+	char *name;
+};
+
+static struct stsi_file stsi_file[] __initdata = {
+	{.fops = &stsi_1_1_1_fs_ops,  .name =  "1_1_1"},
+	{.fops = &stsi_1_2_1_fs_ops,  .name =  "1_2_1"},
+	{.fops = &stsi_1_2_2_fs_ops,  .name =  "1_2_2"},
+	{.fops = &stsi_2_2_1_fs_ops,  .name =  "2_2_1"},
+	{.fops = &stsi_2_2_2_fs_ops,  .name =  "2_2_2"},
+	{.fops = &stsi_3_2_2_fs_ops,  .name =  "3_2_2"},
+	{.fops = &stsi_15_1_2_fs_ops, .name = "15_1_2"},
+	{.fops = &stsi_15_1_3_fs_ops, .name = "15_1_3"},
+	{.fops = &stsi_15_1_4_fs_ops, .name = "15_1_4"},
+	{.fops = &stsi_15_1_5_fs_ops, .name = "15_1_5"},
+	{.fops = &stsi_15_1_6_fs_ops, .name = "15_1_6"},
+};
+
+static u8 stsi_0_0_0;
+
+static __init int stsi_init_debugfs(void)
+{
+	struct dentry *stsi_root;
+	struct stsi_file *sf;
+	int lvl, i;
+
+	stsi_root = debugfs_create_dir("stsi", arch_debugfs_dir);
+	lvl = stsi(NULL, 0, 0, 0);
+	if (lvl > 0)
+		stsi_0_0_0 = lvl;
+	debugfs_create_u8("0_0_0", 0400, stsi_root, &stsi_0_0_0);
+	for (i = 0; i < ARRAY_SIZE(stsi_file); i++) {
+		sf = &stsi_file[i];
+		debugfs_create_file(sf->name, 0400, stsi_root, NULL, sf->fops);
+	}
+	if (IS_ENABLED(CONFIG_SCHED_TOPOLOGY) && MACHINE_HAS_TOPOLOGY) {
+		char link_to[10];
+
+		sprintf(link_to, "15_1_%d", topology_mnest_limit());
+		debugfs_create_symlink("topology", stsi_root, link_to);
+	}
+	return 0;
+}
+device_initcall(stsi_init_debugfs);
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S
new file mode 100644
index 0000000000..14c6d25c03
--- /dev/null
+++ b/arch/s390/kernel/text_amode31.S
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+	.section .amode31.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+	.macro BR_EX_AMODE31_r14
+	larl	%r1,0f
+	ex	0,0(%r1)
+	j	.
+0:	br	%r14
+	.endm
+
+/*
+ * int _diag14_amode31(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+SYM_FUNC_START(_diag14_amode31)
+	lgr	%r1,%r2
+	lgr	%r2,%r3
+	lgr	%r3,%r4
+	lhi	%r5,-EIO
+	sam31
+	diag	%r1,%r2,0x14
+.Ldiag14_ex:
+	ipm	%r5
+	srl	%r5,28
+.Ldiag14_fault:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag14_ex, .Ldiag14_fault)
+SYM_FUNC_END(_diag14_amode31)
+
+/*
+ * int _diag210_amode31(struct diag210 *addr)
+ */
+SYM_FUNC_START(_diag210_amode31)
+	lgr	%r1,%r2
+	lhi	%r2,-1
+	sam31
+	diag	%r1,%r0,0x210
+.Ldiag210_ex:
+	ipm	%r2
+	srl	%r2,28
+.Ldiag210_fault:
+	sam64
+	lgfr	%r2,%r2
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag210_ex, .Ldiag210_fault)
+SYM_FUNC_END(_diag210_amode31)
+
+/*
+ * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len)
+*/
+SYM_FUNC_START(_diag8c_amode31)
+	llgf	%r3,0(%r3)
+	sam31
+	diag	%r2,%r4,0x8c
+.Ldiag8c_ex:
+	sam64
+	lgfr	%r2,%r3
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex)
+SYM_FUNC_END(_diag8c_amode31)
+/*
+ * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode)
+ */
+SYM_FUNC_START(_diag26c_amode31)
+	lghi	%r5,-EOPNOTSUPP
+	sam31
+	diag	%r2,%r4,0x26c
+.Ldiag26c_ex:
+	sam64
+	lgfr	%r2,%r5
+	BR_EX_AMODE31_r14
+	EX_TABLE_AMODE31(.Ldiag26c_ex, .Ldiag26c_ex)
+SYM_FUNC_END(_diag26c_amode31)
+
+/*
+ * void _diag0c_amode31(struct hypfs_diag0c_entry *entry)
+ */
+SYM_FUNC_START(_diag0c_amode31)
+	sam31
+	diag	%r2,%r2,0x0c
+	sam64
+	BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag0c_amode31)
+
+/*
+ * void _diag308_reset_amode31(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+SYM_FUNC_START(_diag308_reset_amode31)
+	larl	%r4,ctlregs		# Save control registers
+	stctg	%c0,%c15,0(%r4)
+	lg	%r2,0(%r4)		# Disable lowcore protection
+	nilh	%r2,0xefff
+	larl	%r4,ctlreg0
+	stg	%r2,0(%r4)
+	lctlg	%c0,%c0,0(%r4)
+	larl	%r4,fpctl		# Floating point control register
+	stfpc	0(%r4)
+	larl	%r4,prefix		# Save prefix register
+	stpx	0(%r4)
+	larl	%r4,prefix_zero	# Set prefix register to 0
+	spx	0(%r4)
+	larl	%r4,continue_psw	# Save PSW flags
+	epsw	%r2,%r3
+	stm	%r2,%r3,0(%r4)
+	larl	%r4,.Lrestart_part2	# Setup restart PSW at absolute 0
+	larl	%r3,restart_diag308_psw
+	og	%r4,0(%r3)		# Save PSW
+	lghi	%r3,0
+	sturg	%r4,%r3			# Use sturg, because of large pages
+	lghi	%r1,1
+	lghi	%r0,0
+	diag	%r0,%r1,0x308
+.Lrestart_part2:
+	lhi	%r0,0			# Load r0 with zero
+	lhi	%r1,2			# Use mode 2 = ESAME (dump)
+	sigp	%r1,%r0,SIGP_SET_ARCHITECTURE	# Switch to ESAME mode
+	sam64				# Switch to 64 bit addressing mode
+	larl	%r4,ctlregs		# Restore control registers
+	lctlg	%c0,%c15,0(%r4)
+	larl	%r4,fpctl		# Restore floating point ctl register
+	lfpc	0(%r4)
+	larl	%r4,prefix		# Restore prefix register
+	spx	0(%r4)
+	larl	%r4,continue_psw	# Restore PSW flags
+	larl	%r2,.Lcontinue
+	stg	%r2,8(%r4)
+	lpswe	0(%r4)
+.Lcontinue:
+	BR_EX_AMODE31_r14
+SYM_FUNC_END(_diag308_reset_amode31)
+
+	.section .amode31.data,"aw",@progbits
+	.balign	8
+SYM_DATA_LOCAL(restart_diag308_psw,	.long 0x00080000,0x80000000)
+SYM_DATA_LOCAL(continue_psw,		.quad 0,0)
+SYM_DATA_LOCAL(ctlreg0,			.quad 0)
+SYM_DATA_LOCAL(ctlregs,			.fill 16,8,0)
+SYM_DATA_LOCAL(fpctl,			.long 0)
+SYM_DATA_LOCAL(prefix,			.long 0)
+SYM_DATA_LOCAL(prefix_zero,		.long 0)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
new file mode 100644
index 0000000000..d34d3548c0
--- /dev/null
+++ b/arch/s390/kernel/time.c
@@ -0,0 +1,944 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Time of day based timer functions.
+ *
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2008
+ *    Author(s): Hartmut Penner (hp@de.ibm.com),
+ *               Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ *
+ *  Derived from "arch/i386/kernel/time.c"
+ *    Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ */
+
+#define KMSG_COMPONENT "time"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel_stat.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/stop_machine.h>
+#include <linux/time.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+#include <linux/timex.h>
+#include <linux/notifier.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/clockchips.h>
+#include <linux/gfp.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <vdso/vsyscall.h>
+#include <vdso/clocksource.h>
+#include <vdso/helpers.h>
+#include <asm/facility.h>
+#include <asm/delay.h>
+#include <asm/div64.h>
+#include <asm/vdso.h>
+#include <asm/irq.h>
+#include <asm/irq_regs.h>
+#include <asm/vtimer.h>
+#include <asm/stp.h>
+#include <asm/cio.h>
+#include "entry.h"
+
+union tod_clock tod_clock_base __section(".data");
+EXPORT_SYMBOL_GPL(tod_clock_base);
+
+u64 clock_comparator_max = -1ULL;
+EXPORT_SYMBOL_GPL(clock_comparator_max);
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
+
+ATOMIC_NOTIFIER_HEAD(s390_epoch_delta_notifier);
+EXPORT_SYMBOL(s390_epoch_delta_notifier);
+
+unsigned char ptff_function_mask[16];
+
+static unsigned long lpar_offset;
+static unsigned long initial_leap_seconds;
+static unsigned long tod_steering_end;
+static long tod_steering_delta;
+
+/*
+ * Get time offsets with PTFF
+ */
+void __init time_early_init(void)
+{
+	struct ptff_qto qto;
+	struct ptff_qui qui;
+	int cs;
+
+	/* Initialize TOD steering parameters */
+	tod_steering_end = tod_clock_base.tod;
+	for (cs = 0; cs < CS_BASES; cs++)
+		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+
+	if (!test_facility(28))
+		return;
+
+	ptff(&ptff_function_mask, sizeof(ptff_function_mask), PTFF_QAF);
+
+	/* get LPAR offset */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+
+	/* get initial leap seconds */
+	if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
+		initial_leap_seconds = (unsigned long)
+			((long) qui.old_leap * 4096000000L);
+}
+
+unsigned long long noinstr sched_clock_noinstr(void)
+{
+	return tod_to_ns(__get_tod_clock_monotonic());
+}
+
+/*
+ * Scheduler clock - returns current time in nanosec units.
+ */
+unsigned long long notrace sched_clock(void)
+{
+	return tod_to_ns(get_tod_clock_monotonic());
+}
+NOKPROBE_SYMBOL(sched_clock);
+
+static void ext_to_timespec64(union tod_clock *clk, struct timespec64 *xt)
+{
+	unsigned long rem, sec, nsec;
+
+	sec = clk->us;
+	rem = do_div(sec, 1000000);
+	nsec = ((clk->sus + (rem << 12)) * 125) >> 9;
+	xt->tv_sec = sec;
+	xt->tv_nsec = nsec;
+}
+
+void clock_comparator_work(void)
+{
+	struct clock_event_device *cd;
+
+	S390_lowcore.clock_comparator = clock_comparator_max;
+	cd = this_cpu_ptr(&comparators);
+	cd->event_handler(cd);
+}
+
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
+{
+	S390_lowcore.clock_comparator = get_tod_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
+}
+
+/*
+ * Set up lowcore and control register of the current cpu to
+ * enable TOD clock and clock comparator interrupts.
+ */
+void init_cpu_timer(void)
+{
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = clock_comparator_max;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->min_delta_ticks	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->max_delta_ticks	= ULONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of(cpu);
+	cd->set_next_event	= s390_next_event;
+
+	clockevents_register_device(cd);
+
+	/* Enable clock comparator timer interrupt. */
+	__ctl_set_bit(0,11);
+
+	/* Always allow the timing alert external interrupt. */
+	__ctl_set_bit(0, 4);
+}
+
+static void clock_comparator_interrupt(struct ext_code ext_code,
+				       unsigned int param32,
+				       unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_CLK);
+	if (S390_lowcore.clock_comparator == clock_comparator_max)
+		set_clock_comparator(S390_lowcore.clock_comparator);
+}
+
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(struct ext_code ext_code,
+				   unsigned int param32, unsigned long param64)
+{
+	inc_irq_stat(IRQEXT_TLA);
+	if (param32 & 0x00038000)
+		stp_timing_alert((struct stp_irq_parm *) &param32);
+}
+
+static void stp_reset(void);
+
+void read_persistent_clock64(struct timespec64 *ts)
+{
+	union tod_clock clk;
+	u64 delta;
+
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	store_tod_clock_ext(&clk);
+	clk.eitod -= delta;
+	ext_to_timespec64(&clk, ts);
+}
+
+void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
+						 struct timespec64 *boot_offset)
+{
+	struct timespec64 boot_time;
+	union tod_clock clk;
+	u64 delta;
+
+	delta = initial_leap_seconds + TOD_UNIX_EPOCH;
+	clk = tod_clock_base;
+	clk.eitod -= delta;
+	ext_to_timespec64(&clk, &boot_time);
+
+	read_persistent_clock64(wall_time);
+	*boot_offset = timespec64_sub(*wall_time, boot_time);
+}
+
+static u64 read_tod_clock(struct clocksource *cs)
+{
+	unsigned long now, adj;
+
+	preempt_disable(); /* protect from changes to steering parameters */
+	now = get_tod_clock();
+	adj = tod_steering_end - now;
+	if (unlikely((s64) adj > 0))
+		/*
+		 * manually steer by 1 cycle every 2^16 cycles. This
+		 * corresponds to shifting the tod delta by 15. 1s is
+		 * therefore steered in ~9h. The adjust will decrease
+		 * over time, until it finally reaches 0.
+		 */
+		now += (tod_steering_delta < 0) ? (adj >> 15) : -(adj >> 15);
+	preempt_enable();
+	return now;
+}
+
+static struct clocksource clocksource_tod = {
+	.name		= "tod",
+	.rating		= 400,
+	.read		= read_tod_clock,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.mult		= 1000,
+	.shift		= 12,
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+	.vdso_clock_mode = VDSO_CLOCKMODE_TOD,
+};
+
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_tod;
+}
+
+/*
+ * Initialize the TOD clock and the CPU timer of
+ * the boot cpu.
+ */
+void __init time_init(void)
+{
+	/* Reset time synchronization interfaces. */
+	stp_reset();
+
+	/* request the clock comparator external interrupt */
+	if (register_external_irq(EXT_IRQ_CLK_COMP, clock_comparator_interrupt))
+		panic("Couldn't request external interrupt 0x1004");
+
+	/* request the timing alert external interrupt */
+	if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
+		panic("Couldn't request external interrupt 0x1406");
+
+	if (__clocksource_register(&clocksource_tod) != 0)
+		panic("Could not register TOD clock source");
+
+	/* Enable TOD clock interrupts on the boot cpu. */
+	init_cpu_timer();
+
+	/* Enable cpu timer interrupts on the boot cpu. */
+	vtime_init();
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static DEFINE_MUTEX(stp_mutex);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_STP		0
+#define CLOCK_SYNC_STP			1
+#define CLOCK_SYNC_STPINFO_VALID	2
+
+/*
+ * The get_clock function for the physical clock. It will get the current
+ * TOD clock, subtract the LPAR offset and write the result to *clock.
+ * The function returns 0 if the clock is in sync with the external time
+ * source. If the clock mode is local it will return -EOPNOTSUPP and
+ * -EAGAIN if the clock is not in sync with the external reference.
+ */
+int get_phys_clock(unsigned long *clock)
+{
+	atomic_t *sw_ptr;
+	unsigned int sw0, sw1;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	sw0 = atomic_read(sw_ptr);
+	*clock = get_tod_clock() - lpar_offset;
+	sw1 = atomic_read(sw_ptr);
+	put_cpu_var(clock_sync_word);
+	if (sw0 == sw1 && (sw0 & 0x80000000U))
+		/* Success: time is in sync. */
+		return 0;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return -EACCES;
+	return -EAGAIN;
+}
+EXPORT_SYMBOL(get_phys_clock);
+
+/*
+ * Make get_phys_clock() return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	/*
+	 * Clear the in-sync bit 2^31. All get_phys_clock calls will
+	 * fail until the sync bit is turned back on. In addition
+	 * increase the "sequence" counter to avoid the race of an
+	 * stp event and the complete recovery against get_phys_clock.
+	 */
+	atomic_andnot(0x80000000, sw_ptr);
+	atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_phys_clock() return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
+	atomic_or(0x80000000, sw_ptr);
+}
+
+/*
+ * Function to check if the clock is in sync.
+ */
+static inline int check_sync_clock(void)
+{
+	atomic_t *sw_ptr;
+	int rc;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	rc = (atomic_read(sw_ptr) & 0x80000000U) != 0;
+	put_cpu_var(clock_sync_word);
+	return rc;
+}
+
+/*
+ * Apply clock delta to the global data structures.
+ * This is called once on the CPU that performed the clock sync.
+ */
+static void clock_sync_global(long delta)
+{
+	unsigned long now, adj;
+	struct ptff_qto qto;
+	int cs;
+
+	/* Fixup the monotonic sched clock. */
+	tod_clock_base.eitod += delta;
+	/* Adjust TOD steering parameters. */
+	now = get_tod_clock();
+	adj = tod_steering_end - now;
+	if (unlikely((s64) adj >= 0))
+		/* Calculate how much of the old adjustment is left. */
+		tod_steering_delta = (tod_steering_delta < 0) ?
+			-(adj >> 15) : (adj >> 15);
+	tod_steering_delta += delta;
+	if ((abs(tod_steering_delta) >> 48) != 0)
+		panic("TOD clock sync offset %li is too large to drift\n",
+		      tod_steering_delta);
+	tod_steering_end = now + (abs(tod_steering_delta) << 15);
+	for (cs = 0; cs < CS_BASES; cs++) {
+		vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+		vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
+	}
+
+	/* Update LPAR offset. */
+	if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
+		lpar_offset = qto.tod_epoch_difference;
+	/* Call the TOD clock change notifier. */
+	atomic_notifier_call_chain(&s390_epoch_delta_notifier, 0, &delta);
+}
+
+/*
+ * Apply clock delta to the per-CPU data structures of this CPU.
+ * This is called for each online CPU after the call to clock_sync_global.
+ */
+static void clock_sync_local(long delta)
+{
+	/* Add the delta to the clock comparator. */
+	if (S390_lowcore.clock_comparator != clock_comparator_max) {
+		S390_lowcore.clock_comparator += delta;
+		set_clock_comparator(S390_lowcore.clock_comparator);
+	}
+	/* Adjust the last_update_clock time-stamp. */
+	S390_lowcore.last_update_clock += delta;
+}
+
+/* Single threaded workqueue used for stp sync events */
+static struct workqueue_struct *time_sync_wq;
+
+static void __init time_init_wq(void)
+{
+	if (time_sync_wq)
+		return;
+	time_sync_wq = create_singlethread_workqueue("timesync");
+}
+
+struct clock_sync_data {
+	atomic_t cpus;
+	int in_sync;
+	long clock_delta;
+};
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static bool stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
+
+static int __init early_parse_stp(char *p)
+{
+	return kstrtobool(p, &stp_online);
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void __init stp_reset(void)
+{
+	int rc;
+
+	stp_page = (void *) get_zeroed_page(GFP_ATOMIC);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+	if (rc == 0)
+		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+	else if (stp_online) {
+		pr_warn("The real or virtual hardware system does not provide an STP interface\n");
+		free_page((unsigned long) stp_page);
+		stp_page = NULL;
+		stp_online = false;
+	}
+}
+
+static void stp_timeout(struct timer_list *unused)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
+static int __init stp_init(void)
+{
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return 0;
+	timer_setup(&stp_timer, stp_timeout, 0);
+	time_init_wq();
+	if (!stp_online)
+		return 0;
+	queue_work(time_sync_wq, &stp_work);
+	return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+	if (intparm->tsc || intparm->lac || intparm->tcpc)
+		queue_work(time_sync_wq, &stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+int stp_sync_check(void)
+{
+	disable_sync_clock(NULL);
+	return 1;
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server  attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+int stp_island_check(void)
+{
+	disable_sync_clock(NULL);
+	return 1;
+}
+
+void stp_queue_work(void)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
+static int __store_stpinfo(void)
+{
+	int rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+
+	if (rc)
+		clear_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+	else
+		set_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+	return rc;
+}
+
+static int stpinfo_valid(void)
+{
+	return stp_online && test_bit(CLOCK_SYNC_STPINFO_VALID, &clock_sync_flags);
+}
+
+static int stp_sync_clock(void *data)
+{
+	struct clock_sync_data *sync = data;
+	long clock_delta, flags;
+	static int first;
+	int rc;
+
+	enable_sync_clock();
+	if (xchg(&first, 1) == 0) {
+		/* Wait until all other cpus entered the sync function. */
+		while (atomic_read(&sync->cpus) != 0)
+			cpu_relax();
+		rc = 0;
+		if (stp_info.todoff || stp_info.tmd != 2) {
+			flags = vdso_update_begin();
+			rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0,
+					&clock_delta);
+			if (rc == 0) {
+				sync->clock_delta = clock_delta;
+				clock_sync_global(clock_delta);
+				rc = __store_stpinfo();
+				if (rc == 0 && stp_info.tmd != 2)
+					rc = -EAGAIN;
+			}
+			vdso_update_end(flags);
+		}
+		sync->in_sync = rc ? -EAGAIN : 1;
+		xchg(&first, 0);
+	} else {
+		/* Slave */
+		atomic_dec(&sync->cpus);
+		/* Wait for in_sync to be set. */
+		while (READ_ONCE(sync->in_sync) == 0)
+			__udelay(1);
+	}
+	if (sync->in_sync != 1)
+		/* Didn't work. Clear per-cpu in sync bit again. */
+		disable_sync_clock(NULL);
+	/* Apply clock delta to per-CPU fields of this CPU. */
+	clock_sync_local(sync->clock_delta);
+
+	return 0;
+}
+
+static int stp_clear_leap(void)
+{
+	struct __kernel_timex txc;
+	int ret;
+
+	memset(&txc, 0, sizeof(txc));
+
+	ret = do_adjtimex(&txc);
+	if (ret < 0)
+		return ret;
+
+	txc.modes = ADJ_STATUS;
+	txc.status &= ~(STA_INS|STA_DEL);
+	return do_adjtimex(&txc);
+}
+
+static void stp_check_leap(void)
+{
+	struct stp_stzi stzi;
+	struct stp_lsoib *lsoib = &stzi.lsoib;
+	struct __kernel_timex txc;
+	int64_t timediff;
+	int leapdiff, ret;
+
+	if (!stp_info.lu || !check_sync_clock()) {
+		/*
+		 * Either a scheduled leap second was removed by the operator,
+		 * or STP is out of sync. In both cases, clear the leap second
+		 * kernel flags.
+		 */
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+		return;
+	}
+
+	if (chsc_stzi(stp_page, &stzi, sizeof(stzi))) {
+		pr_err("stzi failed\n");
+		return;
+	}
+
+	timediff = tod_to_ns(lsoib->nlsout - get_tod_clock()) / NSEC_PER_SEC;
+	leapdiff = lsoib->nlso - lsoib->also;
+
+	if (leapdiff != 1 && leapdiff != -1) {
+		pr_err("Cannot schedule %d leap seconds\n", leapdiff);
+		return;
+	}
+
+	if (timediff < 0) {
+		if (stp_clear_leap() < 0)
+			pr_err("failed to clear leap second flags\n");
+	} else if (timediff < 7200) {
+		memset(&txc, 0, sizeof(txc));
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			return;
+
+		txc.modes = ADJ_STATUS;
+		if (leapdiff > 0)
+			txc.status |= STA_INS;
+		else
+			txc.status |= STA_DEL;
+		ret = do_adjtimex(&txc);
+		if (ret < 0)
+			pr_err("failed to set leap second flags\n");
+		/* arm Timer to clear leap second flags */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(14400 * MSEC_PER_SEC));
+	} else {
+		/* The day the leap second is scheduled for hasn't been reached. Retry
+		 * in one hour.
+		 */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(3600 * MSEC_PER_SEC));
+	}
+}
+
+/*
+ * STP work. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+	struct clock_sync_data stp_sync;
+	int rc;
+
+	/* prevent multiple execution. */
+	mutex_lock(&stp_mutex);
+
+	if (!stp_online) {
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000, NULL);
+		del_timer_sync(&stp_timer);
+		goto out_unlock;
+	}
+
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xf0e0, NULL);
+	if (rc)
+		goto out_unlock;
+
+	rc = __store_stpinfo();
+	if (rc || stp_info.c == 0)
+		goto out_unlock;
+
+	/* Skip synchronization if the clock is already in sync. */
+	if (!check_sync_clock()) {
+		memset(&stp_sync, 0, sizeof(stp_sync));
+		cpus_read_lock();
+		atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
+		stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
+		cpus_read_unlock();
+	}
+
+	if (!check_sync_clock())
+		/*
+		 * There is a usable clock but the synchronization failed.
+		 * Retry after a second.
+		 */
+		mod_timer(&stp_timer, jiffies + msecs_to_jiffies(MSEC_PER_SEC));
+	else if (stp_info.lu)
+		stp_check_leap();
+
+out_unlock:
+	mutex_unlock(&stp_mutex);
+}
+
+/*
+ * STP subsys sysfs interface functions
+ */
+static struct bus_type stp_subsys = {
+	.name		= "stp",
+	.dev_name	= "stp",
+};
+
+static ssize_t ctn_id_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%016lx\n",
+			      *(unsigned long *) stp_info.ctnid);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(ctn_id);
+
+static ssize_t ctn_type_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.ctn);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(ctn_type);
+
+static ssize_t dst_offset_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x2000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(dst_offset);
+
+static ssize_t leap_seconds_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x8000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(leap_seconds);
+
+static ssize_t leap_seconds_scheduled_show(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct stp_stzi stzi;
+	ssize_t ret;
+
+	mutex_lock(&stp_mutex);
+	if (!stpinfo_valid() || !(stp_info.vbits & 0x8000) || !stp_info.lu) {
+		mutex_unlock(&stp_mutex);
+		return -ENODATA;
+	}
+
+	ret = chsc_stzi(stp_page, &stzi, sizeof(stzi));
+	mutex_unlock(&stp_mutex);
+	if (ret < 0)
+		return ret;
+
+	if (!stzi.lsoib.p)
+		return sprintf(buf, "0,0\n");
+
+	return sprintf(buf, "%lu,%d\n",
+		       tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
+		       stzi.lsoib.nlso - stzi.lsoib.also);
+}
+
+static DEVICE_ATTR_RO(leap_seconds_scheduled);
+
+static ssize_t stratum_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(stratum);
+
+static ssize_t time_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x0800))
+		ret = sprintf(buf, "%i\n", (int) stp_info.tto);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(time_offset);
+
+static ssize_t time_zone_offset_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid() && (stp_info.vbits & 0x4000))
+		ret = sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(time_zone_offset);
+
+static ssize_t timing_mode_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.tmd);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(timing_mode);
+
+static ssize_t timing_state_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t ret = -ENODATA;
+
+	mutex_lock(&stp_mutex);
+	if (stpinfo_valid())
+		ret = sprintf(buf, "%i\n", stp_info.tst);
+	mutex_unlock(&stp_mutex);
+	return ret;
+}
+
+static DEVICE_ATTR_RO(timing_state);
+
+static ssize_t online_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t online_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	unsigned int value;
+
+	value = simple_strtoul(buf, NULL, 0);
+	if (value != 0 && value != 1)
+		return -EINVAL;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	mutex_lock(&stp_mutex);
+	stp_online = value;
+	if (stp_online)
+		set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	else
+		clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	queue_work(time_sync_wq, &stp_work);
+	mutex_unlock(&stp_mutex);
+	return count;
+}
+
+/*
+ * Can't use DEVICE_ATTR because the attribute should be named
+ * stp/online but dev_attr_online already exists in this file ..
+ */
+static DEVICE_ATTR_RW(online);
+
+static struct attribute *stp_dev_attrs[] = {
+	&dev_attr_ctn_id.attr,
+	&dev_attr_ctn_type.attr,
+	&dev_attr_dst_offset.attr,
+	&dev_attr_leap_seconds.attr,
+	&dev_attr_online.attr,
+	&dev_attr_leap_seconds_scheduled.attr,
+	&dev_attr_stratum.attr,
+	&dev_attr_time_offset.attr,
+	&dev_attr_time_zone_offset.attr,
+	&dev_attr_timing_mode.attr,
+	&dev_attr_timing_state.attr,
+	NULL
+};
+ATTRIBUTE_GROUPS(stp_dev);
+
+static int __init stp_init_sysfs(void)
+{
+	return subsys_system_register(&stp_subsys, stp_dev_groups);
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 0000000000..68adf1de88
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Copyright IBM Corp. 2007, 2011
+ */
+
+#define KMSG_COMPONENT "cpu"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/workqueue.h>
+#include <linux/memblock.h>
+#include <linux/uaccess.h>
+#include <linux/sysctl.h>
+#include <linux/cpuset.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/topology.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+#include <linux/nodemask.h>
+#include <linux/node.h>
+#include <asm/sysinfo.h>
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+enum {
+	TOPOLOGY_MODE_HW,
+	TOPOLOGY_MODE_SINGLE,
+	TOPOLOGY_MODE_PACKAGE,
+	TOPOLOGY_MODE_UNINITIALIZED
+};
+
+struct mask_info {
+	struct mask_info *next;
+	unsigned char id;
+	cpumask_t mask;
+};
+
+static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
+static void set_topology_timer(void);
+static void topology_work_fn(struct work_struct *work);
+static struct sysinfo_15_1_x *tl_info;
+
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+/*
+ * Socket/Book linked lists and cpu_topology updates are
+ * protected by "sched_domains_mutex".
+ */
+static struct mask_info socket_info;
+static struct mask_info book_info;
+static struct mask_info drawer_info;
+
+struct cpu_topology_s390 cpu_topology[NR_CPUS];
+EXPORT_SYMBOL_GPL(cpu_topology);
+
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
+{
+	static cpumask_t mask;
+
+	cpumask_clear(&mask);
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
+	switch (topology_mode) {
+	case TOPOLOGY_MODE_HW:
+		while (info) {
+			if (cpumask_test_cpu(cpu, &info->mask)) {
+				cpumask_copy(&mask, &info->mask);
+				break;
+			}
+			info = info->next;
+		}
+		break;
+	case TOPOLOGY_MODE_PACKAGE:
+		cpumask_copy(&mask, cpu_present_mask);
+		break;
+	default:
+		fallthrough;
+	case TOPOLOGY_MODE_SINGLE:
+		break;
+	}
+	cpumask_and(&mask, &mask, &cpu_setup_mask);
+out:
+	cpumask_copy(dst, &mask);
+}
+
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
+{
+	static cpumask_t mask;
+	unsigned int max_cpu;
+
+	cpumask_clear(&mask);
+	if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
+		goto out;
+	cpumask_set_cpu(cpu, &mask);
+	if (topology_mode != TOPOLOGY_MODE_HW)
+		goto out;
+	cpu -= cpu % (smp_cpu_mtid + 1);
+	max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+	for (; cpu <= max_cpu; cpu++) {
+		if (cpumask_test_cpu(cpu, &cpu_setup_mask))
+			cpumask_set_cpu(cpu, &mask);
+	}
+out:
+	cpumask_copy(dst, &mask);
+}
+
+#define TOPOLOGY_CORE_BITS	64
+
+static void add_cpus_to_mask(struct topology_core *tl_core,
+			     struct mask_info *drawer,
+			     struct mask_info *book,
+			     struct mask_info *socket)
+{
+	struct cpu_topology_s390 *topo;
+	unsigned int core;
+
+	for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
+		unsigned int max_cpu, rcore;
+		int cpu;
+
+		rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
+		cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
+		if (cpu < 0)
+			continue;
+		max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
+		for (; cpu <= max_cpu; cpu++) {
+			topo = &cpu_topology[cpu];
+			topo->drawer_id = drawer->id;
+			topo->book_id = book->id;
+			topo->socket_id = socket->id;
+			topo->core_id = rcore;
+			topo->thread_id = cpu;
+			topo->dedicated = tl_core->d;
+			cpumask_set_cpu(cpu, &drawer->mask);
+			cpumask_set_cpu(cpu, &book->mask);
+			cpumask_set_cpu(cpu, &socket->mask);
+			smp_cpu_set_polarization(cpu, tl_core->pp);
+		}
+	}
+}
+
+static void clear_masks(void)
+{
+	struct mask_info *info;
+
+	info = &socket_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+	info = &book_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+	info = &drawer_info;
+	while (info) {
+		cpumask_clear(&info->mask);
+		info = info->next;
+	}
+}
+
+static union topology_entry *next_tle(union topology_entry *tle)
+{
+	if (!tle->nl)
+		return (union topology_entry *)((struct topology_core *)tle + 1);
+	return (union topology_entry *)((struct topology_container *)tle + 1);
+}
+
+static void tl_to_masks(struct sysinfo_15_1_x *info)
+{
+	struct mask_info *socket = &socket_info;
+	struct mask_info *book = &book_info;
+	struct mask_info *drawer = &drawer_info;
+	union topology_entry *tle, *end;
+
+	clear_masks();
+	tle = info->tle;
+	end = (union topology_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 3:
+			drawer = drawer->next;
+			drawer->id = tle->container.id;
+			break;
+		case 2:
+			book = book->next;
+			book->id = tle->container.id;
+			break;
+		case 1:
+			socket = socket->next;
+			socket->id = tle->container.id;
+			break;
+		case 0:
+			add_cpus_to_mask(&tle->cpu, drawer, book, socket);
+			break;
+		default:
+			clear_masks();
+			return;
+		}
+		tle = next_tle(tle);
+	}
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu, rc;
+
+	if (!MACHINE_HAS_TOPOLOGY)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_possible_cpu(cpu)
+		smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+	return rc;
+}
+
+void update_cpu_masks(void)
+{
+	struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
+	int cpu, sibling, pkg_first, smt_first, id;
+
+	for_each_possible_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		cpu_thread_map(&topo->thread_mask, cpu);
+		cpu_group_map(&topo->core_mask, &socket_info, cpu);
+		cpu_group_map(&topo->book_mask, &book_info, cpu);
+		cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
+		topo->booted_cores = 0;
+		if (topology_mode != TOPOLOGY_MODE_HW) {
+			id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
+			topo->thread_id = cpu;
+			topo->core_id = cpu;
+			topo->socket_id = id;
+			topo->book_id = id;
+			topo->drawer_id = id;
+		}
+	}
+	for_each_online_cpu(cpu) {
+		topo = &cpu_topology[cpu];
+		pkg_first = cpumask_first(&topo->core_mask);
+		topo_package = &cpu_topology[pkg_first];
+		if (cpu == pkg_first) {
+			for_each_cpu(sibling, &topo->core_mask) {
+				topo_sibling = &cpu_topology[sibling];
+				smt_first = cpumask_first(&topo_sibling->thread_mask);
+				if (sibling == smt_first)
+					topo_package->booted_cores++;
+			}
+		} else {
+			topo->booted_cores = topo_package->booted_cores;
+		}
+	}
+}
+
+void store_topology(struct sysinfo_15_1_x *info)
+{
+	stsi(info, 15, 1, topology_mnest_limit());
+}
+
+static void __arch_update_dedicated_flag(void *arg)
+{
+	if (topology_cpu_dedicated(smp_processor_id()))
+		set_cpu_flag(CIF_DEDICATED_CPU);
+	else
+		clear_cpu_flag(CIF_DEDICATED_CPU);
+}
+
+static int __arch_update_cpu_topology(void)
+{
+	struct sysinfo_15_1_x *info = tl_info;
+	int rc = 0;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	if (MACHINE_HAS_TOPOLOGY) {
+		rc = 1;
+		store_topology(info);
+		tl_to_masks(info);
+	}
+	update_cpu_masks();
+	if (!MACHINE_HAS_TOPOLOGY)
+		topology_update_polarization_simple();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return rc;
+}
+
+int arch_update_cpu_topology(void)
+{
+	struct device *dev;
+	int cpu, rc;
+
+	rc = __arch_update_cpu_topology();
+	on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
+	for_each_online_cpu(cpu) {
+		dev = get_cpu_device(cpu);
+		if (dev)
+			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+	}
+	return rc;
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	rebuild_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_flush_work(void)
+{
+	flush_work(&topology_work);
+}
+
+static void topology_timer_fn(struct timer_list *unused)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static struct timer_list topology_timer;
+
+static atomic_t topology_poll = ATOMIC_INIT(0);
+
+static void set_topology_timer(void)
+{
+	if (atomic_add_unless(&topology_poll, -1, 0))
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
+	else
+		mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
+}
+
+void topology_expect_change(void)
+{
+	if (!MACHINE_HAS_TOPOLOGY)
+		return;
+	/* This is racy, but it doesn't matter since it is just a heuristic.
+	 * Worst case is that we poll in a higher frequency for a bit longer.
+	 */
+	if (atomic_read(&topology_poll) > 60)
+		return;
+	atomic_add(60, &topology_poll);
+	set_topology_timer();
+}
+
+static int cpu_management;
+
+static ssize_t dispatching_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	cpus_read_lock();
+	mutex_lock(&smp_cpu_state_mutex);
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (rc)
+		goto out;
+	cpu_management = val;
+	topology_expect_change();
+out:
+	mutex_unlock(&smp_cpu_state_mutex);
+	cpus_read_unlock();
+	return rc ? rc : count;
+}
+static DEVICE_ATTR_RW(dispatching);
+
+static ssize_t cpu_polarization_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_get_polarization(cpu)) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
+static struct attribute *topology_cpu_attrs[] = {
+	&dev_attr_polarization.attr,
+	NULL,
+};
+
+static struct attribute_group topology_cpu_attr_group = {
+	.attrs = topology_cpu_attrs,
+};
+
+static ssize_t cpu_dedicated_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
+
+static struct attribute *topology_extra_cpu_attrs[] = {
+	&dev_attr_dedicated.attr,
+	NULL,
+};
+
+static struct attribute_group topology_extra_cpu_attr_group = {
+	.attrs = topology_extra_cpu_attrs,
+};
+
+int topology_cpu_init(struct cpu *cpu)
+{
+	int rc;
+
+	rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+	if (rc || !MACHINE_HAS_TOPOLOGY)
+		return rc;
+	rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
+	if (rc)
+		sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
+	return rc;
+}
+
+static const struct cpumask *cpu_thread_mask(int cpu)
+{
+	return &cpu_topology[cpu].thread_mask;
+}
+
+
+const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].core_mask;
+}
+
+static const struct cpumask *cpu_book_mask(int cpu)
+{
+	return &cpu_topology[cpu].book_mask;
+}
+
+static const struct cpumask *cpu_drawer_mask(int cpu)
+{
+	return &cpu_topology[cpu].drawer_mask;
+}
+
+static struct sched_domain_topology_level s390_topology[] = {
+	{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+	{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void __init alloc_masks(struct sysinfo_15_1_x *info,
+			       struct mask_info *mask, int offset)
+{
+	int i, nr_masks;
+
+	nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
+	for (i = 0; i < info->mnest - offset; i++)
+		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
+	nr_masks = max(nr_masks, 1);
+	for (i = 0; i < nr_masks; i++) {
+		mask->next = memblock_alloc(sizeof(*mask->next), 8);
+		if (!mask->next)
+			panic("%s: Failed to allocate %zu bytes align=0x%x\n",
+			      __func__, sizeof(*mask->next), 8);
+		mask = mask->next;
+	}
+}
+
+void __init topology_init_early(void)
+{
+	struct sysinfo_15_1_x *info;
+
+	set_sched_topology(s390_topology);
+	if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
+		if (MACHINE_HAS_TOPOLOGY)
+			topology_mode = TOPOLOGY_MODE_HW;
+		else
+			topology_mode = TOPOLOGY_MODE_SINGLE;
+	}
+	if (!MACHINE_HAS_TOPOLOGY)
+		goto out;
+	tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	if (!tl_info)
+		panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+		      __func__, PAGE_SIZE, PAGE_SIZE);
+	info = tl_info;
+	store_topology(info);
+	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
+		info->mag[0], info->mag[1], info->mag[2], info->mag[3],
+		info->mag[4], info->mag[5], info->mnest);
+	alloc_masks(info, &socket_info, 1);
+	alloc_masks(info, &book_info, 2);
+	alloc_masks(info, &drawer_info, 3);
+out:
+	cpumask_set_cpu(0, &cpu_setup_mask);
+	__arch_update_cpu_topology();
+	__arch_update_dedicated_flag(NULL);
+}
+
+static inline int topology_get_mode(int enabled)
+{
+	if (!enabled)
+		return TOPOLOGY_MODE_SINGLE;
+	return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
+}
+
+static inline int topology_is_enabled(void)
+{
+	return topology_mode != TOPOLOGY_MODE_SINGLE;
+}
+
+static int __init topology_setup(char *str)
+{
+	bool enabled;
+	int rc;
+
+	rc = kstrtobool(str, &enabled);
+	if (rc)
+		return rc;
+	topology_mode = topology_get_mode(enabled);
+	return 0;
+}
+early_param("topology", topology_setup);
+
+static int topology_ctl_handler(struct ctl_table *ctl, int write,
+				void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int enabled = topology_is_enabled();
+	int new_mode;
+	int rc;
+	struct ctl_table ctl_entry = {
+		.procname	= ctl->procname,
+		.data		= &enabled,
+		.maxlen		= sizeof(int),
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	};
+
+	rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+	if (rc < 0 || !write)
+		return rc;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	new_mode = topology_get_mode(enabled);
+	if (topology_mode != new_mode) {
+		topology_mode = new_mode;
+		topology_schedule_update();
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	topology_flush_work();
+
+	return rc;
+}
+
+static struct ctl_table topology_ctl_table[] = {
+	{
+		.procname	= "topology",
+		.mode		= 0644,
+		.proc_handler	= topology_ctl_handler,
+	},
+	{ },
+};
+
+static int __init topology_init(void)
+{
+	struct device *dev_root;
+	int rc = 0;
+
+	timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
+	if (MACHINE_HAS_TOPOLOGY)
+		set_topology_timer();
+	else
+		topology_update_polarization_simple();
+	register_sysctl("s390", topology_ctl_table);
+
+	dev_root = bus_get_dev_root(&cpu_subsys);
+	if (dev_root) {
+		rc = device_create_file(dev_root, &dev_attr_dispatching);
+		put_device(dev_root);
+	}
+	return rc;
+}
+device_initcall(topology_init);
diff --git a/arch/s390/kernel/trace.c b/arch/s390/kernel/trace.c
new file mode 100644
index 0000000000..11a669f3cc
--- /dev/null
+++ b/arch/s390/kernel/trace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tracepoint definitions for s390
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/percpu.h>
+#define CREATE_TRACE_POINTS
+#include <asm/trace/diag.h>
+
+EXPORT_TRACEPOINT_SYMBOL(s390_diagnose);
+
+static DEFINE_PER_CPU(unsigned int, diagnose_trace_depth);
+
+void notrace trace_s390_diagnose_norecursion(int diag_nr)
+{
+	unsigned long flags;
+	unsigned int *depth;
+
+	/* Avoid lockdep recursion. */
+	if (IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+	local_irq_save(flags);
+	depth = this_cpu_ptr(&diagnose_trace_depth);
+	if (*depth == 0) {
+		(*depth)++;
+		trace_s390_diagnose(diag_nr);
+		(*depth)--;
+	}
+	local_irq_restore(flags);
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
new file mode 100644
index 0000000000..1d2aa448d1
--- /dev/null
+++ b/arch/s390/kernel/traps.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  S390 version
+ *    Copyright IBM Corp. 1999, 2000
+ *    Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
+ *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *
+ *  Derived from "arch/i386/kernel/traps.c"
+ *    Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+/*
+ * 'Traps.c' handles hardware traps and faults after we have saved some
+ * state in 'asm.s'.
+ */
+#include "asm/irqflags.h"
+#include "asm/ptrace.h"
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/randomize_kstack.h>
+#include <linux/extable.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/cpu.h>
+#include <linux/entry-common.h>
+#include <asm/asm-extable.h>
+#include <asm/fpu/api.h>
+#include <asm/vtime.h>
+#include "entry.h"
+
+static inline void __user *get_trap_ip(struct pt_regs *regs)
+{
+	unsigned long address;
+
+	if (regs->int_code & 0x200)
+		address = current->thread.trap_tdb.data[3];
+	else
+		address = regs->psw.addr;
+	return (void __user *) (address - (regs->int_code >> 16));
+}
+
+int is_valid_bugaddr(unsigned long addr)
+{
+	return 1;
+}
+
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	if (user_mode(regs)) {
+		force_sig_fault(si_signo, si_code, get_trap_ip(regs));
+		report_user_fault(regs, si_signo, 0);
+        } else {
+		if (!fixup_exception(regs))
+			die(regs, str);
+        }
+}
+
+static void do_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
+{
+	if (notify_die(DIE_TRAP, str, regs, 0,
+		       regs->int_code, si_signo) == NOTIFY_STOP)
+		return;
+	do_report_trap(regs, si_signo, si_code, str);
+}
+NOKPROBE_SYMBOL(do_trap);
+
+void do_per_trap(struct pt_regs *regs)
+{
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
+		return;
+	if (!current->ptrace)
+		return;
+	force_sig_fault(SIGTRAP, TRAP_HWBKPT,
+		(void __force __user *) current->thread.per_event.address);
+}
+NOKPROBE_SYMBOL(do_per_trap);
+
+static void default_trap_handler(struct pt_regs *regs)
+{
+	if (user_mode(regs)) {
+		report_user_fault(regs, SIGSEGV, 0);
+		force_exit_sig(SIGSEGV);
+	} else
+		die(regs, "Unknown program exception");
+}
+
+#define DO_ERROR_INFO(name, signr, sicode, str) \
+static void name(struct pt_regs *regs)		\
+{						\
+	do_trap(regs, signr, sicode, str);	\
+}
+
+DO_ERROR_INFO(addressing_exception, SIGILL, ILL_ILLADR,
+	      "addressing exception")
+DO_ERROR_INFO(execute_exception, SIGILL, ILL_ILLOPN,
+	      "execute exception")
+DO_ERROR_INFO(divide_exception, SIGFPE, FPE_INTDIV,
+	      "fixpoint divide exception")
+DO_ERROR_INFO(overflow_exception, SIGFPE, FPE_INTOVF,
+	      "fixpoint overflow exception")
+DO_ERROR_INFO(hfp_overflow_exception, SIGFPE, FPE_FLTOVF,
+	      "HFP overflow exception")
+DO_ERROR_INFO(hfp_underflow_exception, SIGFPE, FPE_FLTUND,
+	      "HFP underflow exception")
+DO_ERROR_INFO(hfp_significance_exception, SIGFPE, FPE_FLTRES,
+	      "HFP significance exception")
+DO_ERROR_INFO(hfp_divide_exception, SIGFPE, FPE_FLTDIV,
+	      "HFP divide exception")
+DO_ERROR_INFO(hfp_sqrt_exception, SIGFPE, FPE_FLTINV,
+	      "HFP square root exception")
+DO_ERROR_INFO(operand_exception, SIGILL, ILL_ILLOPN,
+	      "operand exception")
+DO_ERROR_INFO(privileged_op, SIGILL, ILL_PRVOPC,
+	      "privileged operation")
+DO_ERROR_INFO(special_op_exception, SIGILL, ILL_ILLOPN,
+	      "special operation exception")
+DO_ERROR_INFO(transaction_exception, SIGILL, ILL_ILLOPN,
+	      "transaction constraint exception")
+
+static inline void do_fp_trap(struct pt_regs *regs, __u32 fpc)
+{
+	int si_code = 0;
+	/* FPC[2] is Data Exception Code */
+	if ((fpc & 0x00000300) == 0) {
+		/* bits 6 and 7 of DXC are 0 iff IEEE exception */
+		if (fpc & 0x8000) /* invalid fp operation */
+			si_code = FPE_FLTINV;
+		else if (fpc & 0x4000) /* div by 0 */
+			si_code = FPE_FLTDIV;
+		else if (fpc & 0x2000) /* overflow */
+			si_code = FPE_FLTOVF;
+		else if (fpc & 0x1000) /* underflow */
+			si_code = FPE_FLTUND;
+		else if (fpc & 0x0800) /* inexact */
+			si_code = FPE_FLTRES;
+	}
+	do_trap(regs, SIGFPE, si_code, "floating point exception");
+}
+
+static void translation_specification_exception(struct pt_regs *regs)
+{
+	/* May never happen. */
+	panic("Translation-Specification Exception");
+}
+
+static void illegal_op(struct pt_regs *regs)
+{
+        __u8 opcode[6];
+	__u16 __user *location;
+	int is_uprobe_insn = 0;
+	int signal = 0;
+
+	location = get_trap_ip(regs);
+
+	if (user_mode(regs)) {
+		if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
+			return;
+		if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
+			if (current->ptrace)
+				force_sig_fault(SIGTRAP, TRAP_BRKPT, location);
+			else
+				signal = SIGILL;
+#ifdef CONFIG_UPROBES
+		} else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+			is_uprobe_insn = 1;
+#endif
+		} else
+			signal = SIGILL;
+	}
+	/*
+	 * We got either an illegal op in kernel mode, or user space trapped
+	 * on a uprobes illegal instruction. See if kprobes or uprobes picks
+	 * it up. If not, SIGILL.
+	 */
+	if (is_uprobe_insn || !user_mode(regs)) {
+		if (notify_die(DIE_BPT, "bpt", regs, 0,
+			       3, SIGTRAP) != NOTIFY_STOP)
+			signal = SIGILL;
+	}
+	if (signal)
+		do_trap(regs, signal, ILL_ILLOPC, "illegal operation");
+}
+NOKPROBE_SYMBOL(illegal_op);
+
+DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
+	      "specification exception");
+
+static void vector_exception(struct pt_regs *regs)
+{
+	int si_code, vic;
+
+	if (!MACHINE_HAS_VX) {
+		do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+		return;
+	}
+
+	/* get vector interrupt code from fpc */
+	save_fpu_regs();
+	vic = (current->thread.fpu.fpc & 0xf00) >> 8;
+	switch (vic) {
+	case 1: /* invalid vector operation */
+		si_code = FPE_FLTINV;
+		break;
+	case 2: /* division by zero */
+		si_code = FPE_FLTDIV;
+		break;
+	case 3: /* overflow */
+		si_code = FPE_FLTOVF;
+		break;
+	case 4: /* underflow */
+		si_code = FPE_FLTUND;
+		break;
+	case 5:	/* inexact */
+		si_code = FPE_FLTRES;
+		break;
+	default: /* unknown cause */
+		si_code = 0;
+	}
+	do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static void data_exception(struct pt_regs *regs)
+{
+	save_fpu_regs();
+	if (current->thread.fpu.fpc & FPC_DXC_MASK)
+		do_fp_trap(regs, current->thread.fpu.fpc);
+	else
+		do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
+}
+
+static void space_switch_exception(struct pt_regs *regs)
+{
+	/* Set user psw back to home space mode. */
+	if (user_mode(regs))
+		regs->psw.mask |= PSW_ASC_HOME;
+	/* Send SIGILL. */
+	do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
+}
+
+static void monitor_event_exception(struct pt_regs *regs)
+{
+	if (user_mode(regs))
+		return;
+
+	switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+	case BUG_TRAP_TYPE_NONE:
+		fixup_exception(regs);
+		break;
+	case BUG_TRAP_TYPE_WARN:
+		break;
+	case BUG_TRAP_TYPE_BUG:
+		die(regs, "monitor event");
+		break;
+	}
+}
+
+void kernel_stack_overflow(struct pt_regs *regs)
+{
+	bust_spinlocks(1);
+	printk("Kernel stack overflow.\n");
+	show_regs(regs);
+	bust_spinlocks(0);
+	panic("Corrupt kernel stack, can't continue.");
+}
+NOKPROBE_SYMBOL(kernel_stack_overflow);
+
+static void __init test_monitor_call(void)
+{
+	int val = 1;
+
+	if (!IS_ENABLED(CONFIG_BUG))
+		return;
+	asm volatile(
+		"	mc	0,0\n"
+		"0:	xgr	%0,%0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (val));
+	if (!val)
+		panic("Monitor call doesn't work!\n");
+}
+
+void __init trap_init(void)
+{
+	local_mcck_enable();
+	test_monitor_call();
+}
+
+static void (*pgm_check_table[128])(struct pt_regs *regs);
+
+void noinstr __do_pgm_check(struct pt_regs *regs)
+{
+	unsigned int trapnr;
+	irqentry_state_t state;
+
+	regs->int_code = S390_lowcore.pgm_int_code;
+	regs->int_parm_long = S390_lowcore.trans_exc_code;
+
+	state = irqentry_enter(regs);
+
+	if (user_mode(regs)) {
+		update_timer_sys();
+		if (!static_branch_likely(&cpu_has_bear)) {
+			if (regs->last_break < 4096)
+				regs->last_break = 1;
+		}
+		current->thread.last_break = regs->last_break;
+	}
+
+	if (S390_lowcore.pgm_code & 0x0200) {
+		/* transaction abort */
+		current->thread.trap_tdb = S390_lowcore.pgm_tdb;
+	}
+
+	if (S390_lowcore.pgm_code & PGM_INT_CODE_PER) {
+		if (user_mode(regs)) {
+			struct per_event *ev = &current->thread.per_event;
+
+			set_thread_flag(TIF_PER_TRAP);
+			ev->address = S390_lowcore.per_address;
+			ev->cause = S390_lowcore.per_code_combined;
+			ev->paid = S390_lowcore.per_access_id;
+		} else {
+			/* PER event in kernel is kprobes */
+			__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+			do_per_trap(regs);
+			goto out;
+		}
+	}
+
+	if (!irqs_disabled_flags(regs->psw.mask))
+		trace_hardirqs_on();
+	__arch_local_irq_ssm(regs->psw.mask & ~PSW_MASK_PER);
+
+	trapnr = regs->int_code & PGM_INT_CODE_MASK;
+	if (trapnr)
+		pgm_check_table[trapnr](regs);
+out:
+	local_irq_disable();
+	irqentry_exit(regs, state);
+}
+
+/*
+ * The program check table contains exactly 128 (0x00-0x7f) entries. Each
+ * line defines the function to be called corresponding to the program check
+ * interruption code.
+ */
+static void (*pgm_check_table[128])(struct pt_regs *regs) = {
+	[0x00]		= default_trap_handler,
+	[0x01]		= illegal_op,
+	[0x02]		= privileged_op,
+	[0x03]		= execute_exception,
+	[0x04]		= do_protection_exception,
+	[0x05]		= addressing_exception,
+	[0x06]		= specification_exception,
+	[0x07]		= data_exception,
+	[0x08]		= overflow_exception,
+	[0x09]		= divide_exception,
+	[0x0a]		= overflow_exception,
+	[0x0b]		= divide_exception,
+	[0x0c]		= hfp_overflow_exception,
+	[0x0d]		= hfp_underflow_exception,
+	[0x0e]		= hfp_significance_exception,
+	[0x0f]		= hfp_divide_exception,
+	[0x10]		= do_dat_exception,
+	[0x11]		= do_dat_exception,
+	[0x12]		= translation_specification_exception,
+	[0x13]		= special_op_exception,
+	[0x14]		= default_trap_handler,
+	[0x15]		= operand_exception,
+	[0x16]		= default_trap_handler,
+	[0x17]		= default_trap_handler,
+	[0x18]		= transaction_exception,
+	[0x19]		= default_trap_handler,
+	[0x1a]		= default_trap_handler,
+	[0x1b]		= vector_exception,
+	[0x1c]		= space_switch_exception,
+	[0x1d]		= hfp_sqrt_exception,
+	[0x1e ... 0x37] = default_trap_handler,
+	[0x38]		= do_dat_exception,
+	[0x39]		= do_dat_exception,
+	[0x3a]		= do_dat_exception,
+	[0x3b]		= do_dat_exception,
+	[0x3c]		= default_trap_handler,
+	[0x3d]		= do_secure_storage_access,
+	[0x3e]		= do_non_secure_storage_access,
+	[0x3f]		= do_secure_storage_violation,
+	[0x40]		= monitor_event_exception,
+	[0x41 ... 0x7f] = default_trap_handler,
+};
+
+#define COND_TRAP(x) asm(			\
+	".weak " __stringify(x) "\n\t"		\
+	".set  " __stringify(x) ","		\
+	__stringify(default_trap_handler))
+
+COND_TRAP(do_secure_storage_access);
+COND_TRAP(do_non_secure_storage_access);
+COND_TRAP(do_secure_storage_violation);
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 0000000000..0ece156fdd
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+	return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+	return (sp <= state->sp) ||
+		(sp > state->stack_info.end - sizeof(struct stack_frame));
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+	struct stack_info *info = &state->stack_info;
+	unsigned long *mask = &state->stack_mask;
+
+	/* New stack pointer leaves the current stack */
+	if (get_stack_info(sp, state->task, info, mask) != 0 ||
+	    !on_stack(info, sp, sizeof(struct stack_frame)))
+		/* 'sp' does not point to a valid stack */
+		return false;
+	return true;
+}
+
+static inline bool is_final_pt_regs(struct unwind_state *state,
+				    struct pt_regs *regs)
+{
+	/* user mode or kernel thread pt_regs at the bottom of task stack */
+	if (task_pt_regs(state->task) == regs)
+		return true;
+
+	/* user mode pt_regs at the bottom of irq stack */
+	return state->stack_info.type == STACK_TYPE_IRQ &&
+	       state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+	       READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	struct stack_info *info = &state->stack_info;
+	struct stack_frame *sf;
+	struct pt_regs *regs;
+	unsigned long sp, ip;
+	bool reliable;
+
+	regs = state->regs;
+	if (unlikely(regs)) {
+		sp = state->sp;
+		sf = (struct stack_frame *) sp;
+		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+		reliable = false;
+		regs = NULL;
+		/* skip bogus %r14 or if is the same as regs->psw.addr */
+		if (!__kernel_text_address(ip) || state->ip == unwind_recover_ret_addr(state, ip)) {
+			state->regs = NULL;
+			return unwind_next_frame(state);
+		}
+	} else {
+		sf = (struct stack_frame *) state->sp;
+		sp = READ_ONCE_NOCHECK(sf->back_chain);
+		if (likely(sp)) {
+			/* Non-zero back-chain points to the previous frame */
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
+			sf = (struct stack_frame *) sp;
+			ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+			reliable = true;
+		} else {
+			/* No back-chain, look for a pt_regs structure */
+			sp = state->sp + STACK_FRAME_OVERHEAD;
+			if (!on_stack(info, sp, sizeof(struct pt_regs)))
+				goto out_err;
+			regs = (struct pt_regs *) sp;
+			if (is_final_pt_regs(state, regs))
+				goto out_stop;
+			ip = READ_ONCE_NOCHECK(regs->psw.addr);
+			sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+			if (unlikely(outside_of_stack(state, sp))) {
+				if (!update_stack_info(state, sp))
+					goto out_err;
+			}
+			reliable = true;
+		}
+	}
+
+	/* Sanity check: ABI requires SP to be aligned 8 bytes. */
+	if (sp & 0x7)
+		goto out_err;
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->regs = regs;
+	state->reliable = reliable;
+	state->ip = unwind_recover_ret_addr(state, ip);
+	return true;
+
+out_err:
+	state->error = true;
+out_stop:
+	state->stack_info.type = STACK_TYPE_UNKNOWN;
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long first_frame)
+{
+	struct stack_info *info = &state->stack_info;
+	struct stack_frame *sf;
+	unsigned long ip, sp;
+
+	memset(state, 0, sizeof(*state));
+	state->task = task;
+	state->regs = regs;
+
+	/* Don't even attempt to start from user mode regs: */
+	if (regs && user_mode(regs)) {
+		info->type = STACK_TYPE_UNKNOWN;
+		return;
+	}
+
+	/* Get the instruction pointer from pt_regs or the stack frame */
+	if (regs) {
+		ip = regs->psw.addr;
+		sp = regs->gprs[15];
+	} else if (task == current) {
+		sp = current_frame_address();
+	} else {
+		sp = task->thread.ksp;
+	}
+
+	/* Get current stack pointer and initialize stack info */
+	if (!update_stack_info(state, sp)) {
+		/* Something is wrong with the stack pointer */
+		info->type = STACK_TYPE_UNKNOWN;
+		state->error = true;
+		return;
+	}
+
+	if (!regs) {
+		/* Stack frame is within valid stack */
+		sf = (struct stack_frame *)sp;
+		ip = READ_ONCE_NOCHECK(sf->gprs[8]);
+	}
+
+	/* Update unwind state */
+	state->sp = sp;
+	state->reliable = true;
+	state->ip = unwind_recover_ret_addr(state, ip);
+
+	if (!first_frame)
+		return;
+	/* Skip through the call chain to the specified starting frame */
+	while (!unwind_done(state)) {
+		if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+			if (state->sp >= first_frame)
+				break;
+		}
+		unwind_next_frame(state);
+	}
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 0000000000..b88345ef8b
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  User-space Probes (UProbes) for s390
+ *
+ *    Copyright IBM Corp. 2014
+ *    Author(s): Jan Willeke,
+ */
+
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <linux/sched/task_stack.h>
+
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/kprobes.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define	UPROBE_TRAP_NR	UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+			     unsigned long addr)
+{
+	return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if (psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT)
+		return -EINVAL;
+	if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT)
+		return -EINVAL;
+	clear_thread_flag(TIF_PER_TRAP);
+	auprobe->saved_per = psw_bits(regs->psw).per;
+	auprobe->saved_int_code = regs->int_code;
+	regs->int_code = UPROBE_TRAP_NR;
+	regs->psw.addr = current->utask->xol_vaddr;
+	set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+	struct pt_regs *regs = task_pt_regs(tsk);
+
+	if (regs->int_code != UPROBE_TRAP_NR)
+		return true;
+	return false;
+}
+
+static int check_per_event(unsigned short cause, unsigned long control,
+			   struct pt_regs *regs)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return 0;
+	/* user space single step */
+	if (control == 0)
+		return 1;
+	/* over indication for storage alteration */
+	if ((control & 0x20200000) && (cause & 0x2000))
+		return 1;
+	if (cause & 0x8000) {
+		/* all branches */
+		if ((control & 0x80800000) == 0x80000000)
+			return 1;
+		/* branch into selected range */
+		if (((control & 0x80800000) == 0x80800000) &&
+		    regs->psw.addr >= current->thread.per_user.start &&
+		    regs->psw.addr <= current->thread.per_user.end)
+			return 1;
+	}
+	return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	int fixup = probe_get_fixup_type(auprobe->insn);
+	struct uprobe_task *utask = current->utask;
+
+	clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+	update_cr_regs(current);
+	psw_bits(regs->psw).per = auprobe->saved_per;
+	regs->int_code = auprobe->saved_int_code;
+
+	if (fixup & FIXUP_PSW_NORMAL)
+		regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+	if (fixup & FIXUP_RETURN_REGISTER) {
+		int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+		regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+	}
+	if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+		int ilen = insn_length(auprobe->insn[0] >> 8);
+
+		if (regs->psw.addr - utask->xol_vaddr == ilen)
+			regs->psw.addr = utask->vaddr + ilen;
+	}
+	if (check_per_event(current->thread.per_event.cause,
+			    current->thread.per_user.control, regs)) {
+		/* fix per address */
+		current->thread.per_event.address = utask->vaddr;
+		/* trigger per event */
+		set_thread_flag(TIF_PER_TRAP);
+	}
+	return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+				 void *data)
+{
+	struct die_args *args = data;
+	struct pt_regs *regs = args->regs;
+
+	if (!user_mode(regs))
+		return NOTIFY_DONE;
+	if (regs->int_code & 0x200) /* Trap during transaction */
+		return NOTIFY_DONE;
+	switch (val) {
+	case DIE_BPT:
+		if (uprobe_pre_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	case DIE_SSTEP:
+		if (uprobe_post_sstep_notifier(regs))
+			return NOTIFY_STOP;
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+	regs->int_code = auprobe->saved_int_code;
+	regs->psw.addr = current->utask->vaddr;
+	current->thread.per_event.address = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+						struct pt_regs *regs)
+{
+	unsigned long orig;
+
+	orig = regs->gprs[14];
+	regs->gprs[14] = trampoline;
+	return orig;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+			     struct pt_regs *regs)
+{
+	if (ctx == RP_CHECK_CHAIN_CALL)
+		return user_stack_pointer(regs) <= ret->stack;
+	else
+		return user_stack_pointer(regs) < ret->stack;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+	psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP		1
+#define EMU_SPECIFICATION	2
+#define EMU_ADDRESSING		3
+
+#define emu_load_ril(ptr, output)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else						\
+		*(output) = input;			\
+	__rc;						\
+})
+
+#define emu_store_ril(regs, ptr, input)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(ptr) __ptr = (ptr);			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)__ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (put_user(*(input), __ptr))		\
+		__rc = EMU_ADDRESSING;			\
+	if (__rc == 0)					\
+		sim_stor_event(regs,			\
+			       (void __force *)__ptr,	\
+			       mask + 1);		\
+	__rc;						\
+})
+
+#define emu_cmp_ril(regs, ptr, cmp)			\
+({							\
+	unsigned int mask = sizeof(*(ptr)) - 1;		\
+	__typeof__(*(ptr)) input;			\
+	int __rc = 0;					\
+							\
+	if ((u64 __force)ptr & mask)			\
+		__rc = EMU_SPECIFICATION;		\
+	else if (get_user(input, ptr))			\
+		__rc = EMU_ADDRESSING;			\
+	else if (input > *(cmp))			\
+		psw_bits((regs)->psw).cc = 1;		\
+	else if (input < *(cmp))			\
+		psw_bits((regs)->psw).cc = 2;		\
+	else						\
+		psw_bits((regs)->psw).cc = 0;		\
+	__rc;						\
+})
+
+struct insn_ril {
+	u8 opc0;
+	u8 reg	: 4;
+	u8 opc1 : 4;
+	s32 disp;
+} __packed;
+
+union split_register {
+	u64 u64;
+	u32 u32[2];
+	u16 u16[4];
+	s64 s64;
+	s32 s32[2];
+	s16 s16[4];
+};
+
+/*
+ * If user per registers are setup to trace storage alterations and an
+ * emulated store took place on a fitting address a user trap is generated.
+ */
+static void sim_stor_event(struct pt_regs *regs, void *addr, int len)
+{
+	if (!(regs->psw.mask & PSW_MASK_PER))
+		return;
+	if (!(current->thread.per_user.control & PER_EVENT_STORE))
+		return;
+	if ((void *)current->thread.per_user.start > (addr + len))
+		return;
+	if ((void *)current->thread.per_user.end < addr)
+		return;
+	current->thread.per_event.address = regs->psw.addr;
+	current->thread.per_event.cause = PER_EVENT_STORE >> 16;
+	set_thread_flag(TIF_PER_TRAP);
+}
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	union split_register *rx;
+	struct insn_ril *insn;
+	unsigned int ilen;
+	void *uptr;
+	int rc = 0;
+
+	insn = (struct insn_ril *) &auprobe->insn;
+	rx = (union split_register *) &regs->gprs[insn->reg];
+	uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+	ilen = insn_length(insn->opc0);
+
+	switch (insn->opc0) {
+	case 0xc0:
+		switch (insn->opc1) {
+		case 0x00: /* larl */
+			rx->u64 = (unsigned long)uptr;
+			break;
+		}
+		break;
+	case 0xc4:
+		switch (insn->opc1) {
+		case 0x02: /* llhrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x04: /* lghrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+			break;
+		case 0x05: /* lhrl */
+			rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x06: /* llghrl */
+			rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x08: /* lgrl */
+			rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* lgfrl */
+			rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0d: /* lrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x0e: /* llgfrl */
+			rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* sthrl */
+			rc = emu_store_ril(regs, (u16 __user *)uptr, &rx->u16[3]);
+			break;
+		case 0x0b: /* stgrl */
+			rc = emu_store_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* strl */
+			rc = emu_store_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	case 0xc6:
+		switch (insn->opc1) {
+		case 0x04: /* cghrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+			break;
+		case 0x05: /* chrl */
+			rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x06: /* clghrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+			break;
+		case 0x07: /* clhrl */
+			rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+			break;
+		case 0x08: /* cgrl */
+			rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+			break;
+		case 0x0a: /* clgrl */
+			rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+			break;
+		case 0x0c: /* cgfrl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+			break;
+		case 0x0d: /* crl */
+			rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+			break;
+		case 0x0e: /* clgfrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+			break;
+		case 0x0f: /* clrl */
+			rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+			break;
+		}
+		break;
+	}
+	adjust_psw_addr(&regs->psw, ilen);
+	switch (rc) {
+	case EMU_ILLEGAL_OP:
+		regs->int_code = ilen << 16 | 0x0001;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+		break;
+	case EMU_SPECIFICATION:
+		regs->int_code = ilen << 16 | 0x0006;
+		do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+		break;
+	case EMU_ADDRESSING:
+		regs->int_code = ilen << 16 | 0x0005;
+		do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+		break;
+	}
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	if ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_24BIT) ||
+	    ((psw_bits(regs->psw).eaba == PSW_BITS_AMODE_31BIT) &&
+	     !is_compat_task())) {
+		regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+		do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+		return true;
+	}
+	if (probe_is_insn_relative_long(auprobe->insn)) {
+		handle_insn_ril(auprobe, regs);
+		return true;
+	}
+	return false;
+}
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
new file mode 100644
index 0000000000..fc07bc39e6
--- /dev/null
+++ b/arch/s390/kernel/uv.c
@@ -0,0 +1,717 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Ultravisor functions and initialization
+ *
+ * Copyright IBM Corp. 2019, 2020
+ */
+#define KMSG_COMPONENT "prot_virt"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <linux/memblock.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/uv.h>
+
+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
+/*
+ * uv_info contains both host and guest information but it's currently only
+ * expected to be used within modules if it's the KVM module or for
+ * any PV guest module.
+ *
+ * The kernel itself will write these values once in uv_query_info()
+ * and then make some of them readable via a sysfs interface.
+ */
+struct uv_info __bootdata_preserved(uv_info);
+EXPORT_SYMBOL(uv_info);
+
+#if IS_ENABLED(CONFIG_KVM)
+int __bootdata_preserved(prot_virt_host);
+EXPORT_SYMBOL(prot_virt_host);
+
+static int __init uv_init(phys_addr_t stor_base, unsigned long stor_len)
+{
+	struct uv_cb_init uvcb = {
+		.header.cmd = UVC_CMD_INIT_UV,
+		.header.len = sizeof(uvcb),
+		.stor_origin = stor_base,
+		.stor_len = stor_len,
+	};
+
+	if (uv_call(0, (uint64_t)&uvcb)) {
+		pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",
+		       uvcb.header.rc, uvcb.header.rrc);
+		return -1;
+	}
+	return 0;
+}
+
+void __init setup_uv(void)
+{
+	void *uv_stor_base;
+
+	if (!is_prot_virt_host())
+		return;
+
+	uv_stor_base = memblock_alloc_try_nid(
+		uv_info.uv_base_stor_len, SZ_1M, SZ_2G,
+		MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+	if (!uv_stor_base) {
+		pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",
+			uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	if (uv_init(__pa(uv_stor_base), uv_info.uv_base_stor_len)) {
+		memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+		goto fail;
+	}
+
+	pr_info("Reserving %luMB as ultravisor base storage\n",
+		uv_info.uv_base_stor_len >> 20);
+	return;
+fail:
+	pr_info("Disabling support for protected virtualization");
+	prot_virt_host = 0;
+}
+
+/*
+ * Requests the Ultravisor to pin the page in the shared state. This will
+ * cause an intercept when the guest attempts to unshare the pinned page.
+ */
+int uv_pin_shared(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_PIN_PAGE_SHARED,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr,
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(uv_pin_shared);
+
+/*
+ * Requests the Ultravisor to destroy a guest page and make it
+ * accessible to the host. The destroy clears the page instead of
+ * exporting.
+ *
+ * @paddr: Absolute host address of page to be destroyed
+ */
+static int uv_destroy_page(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_DESTR_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb)) {
+		/*
+		 * Older firmware uses 107/d as an indication of a non secure
+		 * page. Let us emulate the newer variant (no-op).
+		 */
+		if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd)
+			return 0;
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * The caller must already hold a reference to the page
+ */
+int uv_destroy_owned_page(unsigned long paddr)
+{
+	struct page *page = phys_to_page(paddr);
+	int rc;
+
+	get_page(page);
+	rc = uv_destroy_page(paddr);
+	if (!rc)
+		clear_bit(PG_arch_1, &page->flags);
+	put_page(page);
+	return rc;
+}
+
+/*
+ * Requests the Ultravisor to encrypt a guest page and make it
+ * accessible to the host for paging (export).
+ *
+ * @paddr: Absolute host address of page to be exported
+ */
+int uv_convert_from_secure(unsigned long paddr)
+{
+	struct uv_cb_cfs uvcb = {
+		.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.paddr = paddr
+	};
+
+	if (uv_call(0, (u64)&uvcb))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * The caller must already hold a reference to the page
+ */
+int uv_convert_owned_from_secure(unsigned long paddr)
+{
+	struct page *page = phys_to_page(paddr);
+	int rc;
+
+	get_page(page);
+	rc = uv_convert_from_secure(paddr);
+	if (!rc)
+		clear_bit(PG_arch_1, &page->flags);
+	put_page(page);
+	return rc;
+}
+
+/*
+ * Calculate the expected ref_count for a page that would otherwise have no
+ * further pins. This was cribbed from similar functions in other places in
+ * the kernel, but with some slight modifications. We know that a secure
+ * page can not be a huge page for example.
+ */
+static int expected_page_refs(struct page *page)
+{
+	int res;
+
+	res = page_mapcount(page);
+	if (PageSwapCache(page)) {
+		res++;
+	} else if (page_mapping(page)) {
+		res++;
+		if (page_has_private(page))
+			res++;
+	}
+	return res;
+}
+
+static int make_page_secure(struct page *page, struct uv_cb_header *uvcb)
+{
+	int expected, cc = 0;
+
+	if (PageWriteback(page))
+		return -EAGAIN;
+	expected = expected_page_refs(page);
+	if (!page_ref_freeze(page, expected))
+		return -EBUSY;
+	set_bit(PG_arch_1, &page->flags);
+	/*
+	 * If the UVC does not succeed or fail immediately, we don't want to
+	 * loop for long, or we might get stall notifications.
+	 * On the other hand, this is a complex scenario and we are holding a lot of
+	 * locks, so we can't easily sleep and reschedule. We try only once,
+	 * and if the UVC returned busy or partial completion, we return
+	 * -EAGAIN and we let the callers deal with it.
+	 */
+	cc = __uv_call(0, (u64)uvcb);
+	page_ref_unfreeze(page, expected);
+	/*
+	 * Return -ENXIO if the page was not mapped, -EINVAL for other errors.
+	 * If busy or partially completed, return -EAGAIN.
+	 */
+	if (cc == UVC_CC_OK)
+		return 0;
+	else if (cc == UVC_CC_BUSY || cc == UVC_CC_PARTIAL)
+		return -EAGAIN;
+	return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
+}
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+	/*
+	 * The misc feature indicates, among other things, that importing a
+	 * shared page from a different protected VM will automatically also
+	 * transfer its ownership.
+	 */
+	if (uv_has_feature(BIT_UV_FEAT_MISC))
+		return false;
+	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+		return false;
+	return atomic_read(&mm->context.protected_count) > 1;
+}
+
+/*
+ * Requests the Ultravisor to make a page accessible to a guest.
+ * If it's brought in the first time, it will be cleared. If
+ * it has been exported before, it will be decrypted and integrity
+ * checked.
+ */
+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+{
+	struct vm_area_struct *vma;
+	bool local_drain = false;
+	spinlock_t *ptelock;
+	unsigned long uaddr;
+	struct page *page;
+	pte_t *ptep;
+	int rc;
+
+again:
+	rc = -EFAULT;
+	mmap_read_lock(gmap->mm);
+
+	uaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(uaddr))
+		goto out;
+	vma = vma_lookup(gmap->mm, uaddr);
+	if (!vma)
+		goto out;
+	/*
+	 * Secure pages cannot be huge and userspace should not combine both.
+	 * In case userspace does it anyway this will result in an -EFAULT for
+	 * the unpack. The guest is thus never reaching secure mode. If
+	 * userspace is playing dirty tricky with mapping huge pages later
+	 * on this will result in a segmentation fault.
+	 */
+	if (is_vm_hugetlb_page(vma))
+		goto out;
+
+	rc = -ENXIO;
+	ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
+	if (!ptep)
+		goto out;
+	if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
+		page = pte_page(*ptep);
+		rc = -EAGAIN;
+		if (trylock_page(page)) {
+			if (should_export_before_import(uvcb, gmap->mm))
+				uv_convert_from_secure(page_to_phys(page));
+			rc = make_page_secure(page, uvcb);
+			unlock_page(page);
+		}
+	}
+	pte_unmap_unlock(ptep, ptelock);
+out:
+	mmap_read_unlock(gmap->mm);
+
+	if (rc == -EAGAIN) {
+		/*
+		 * If we are here because the UVC returned busy or partial
+		 * completion, this is just a useless check, but it is safe.
+		 */
+		wait_on_page_writeback(page);
+	} else if (rc == -EBUSY) {
+		/*
+		 * If we have tried a local drain and the page refcount
+		 * still does not match our expected safe value, try with a
+		 * system wide drain. This is needed if the pagevecs holding
+		 * the page are on a different CPU.
+		 */
+		if (local_drain) {
+			lru_add_drain_all();
+			/* We give up here, and let the caller try again */
+			return -EAGAIN;
+		}
+		/*
+		 * We are here if the page refcount does not match the
+		 * expected safe value. The main culprits are usually
+		 * pagevecs. With lru_add_drain() we drain the pagevecs
+		 * on the local CPU so that hopefully the refcount will
+		 * reach the expected safe value.
+		 */
+		lru_add_drain();
+		local_drain = true;
+		/* And now we try again immediately after draining */
+		goto again;
+	} else if (rc == -ENXIO) {
+		if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
+			return -EFAULT;
+		return -EAGAIN;
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_make_secure);
+
+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
+{
+	struct uv_cb_cts uvcb = {
+		.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
+		.header.len = sizeof(uvcb),
+		.guest_handle = gmap->guest_handle,
+		.gaddr = gaddr,
+	};
+
+	return gmap_make_secure(gmap, gaddr, &uvcb);
+}
+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
+
+/**
+ * gmap_destroy_page - Destroy a guest page.
+ * @gmap: the gmap of the guest
+ * @gaddr: the guest address to destroy
+ *
+ * An attempt will be made to destroy the given guest page. If the attempt
+ * fails, an attempt is made to export the page. If both attempts fail, an
+ * appropriate error is returned.
+ */
+int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+{
+	struct vm_area_struct *vma;
+	unsigned long uaddr;
+	struct page *page;
+	int rc;
+
+	rc = -EFAULT;
+	mmap_read_lock(gmap->mm);
+
+	uaddr = __gmap_translate(gmap, gaddr);
+	if (IS_ERR_VALUE(uaddr))
+		goto out;
+	vma = vma_lookup(gmap->mm, uaddr);
+	if (!vma)
+		goto out;
+	/*
+	 * Huge pages should not be able to become secure
+	 */
+	if (is_vm_hugetlb_page(vma))
+		goto out;
+
+	rc = 0;
+	/* we take an extra reference here */
+	page = follow_page(vma, uaddr, FOLL_WRITE | FOLL_GET);
+	if (IS_ERR_OR_NULL(page))
+		goto out;
+	rc = uv_destroy_owned_page(page_to_phys(page));
+	/*
+	 * Fault handlers can race; it is possible that two CPUs will fault
+	 * on the same secure page. One CPU can destroy the page, reboot,
+	 * re-enter secure mode and import it, while the second CPU was
+	 * stuck at the beginning of the handler. At some point the second
+	 * CPU will be able to progress, and it will not be able to destroy
+	 * the page. In that case we do not want to terminate the process,
+	 * we instead try to export the page.
+	 */
+	if (rc)
+		rc = uv_convert_owned_from_secure(page_to_phys(page));
+	put_page(page);
+out:
+	mmap_read_unlock(gmap->mm);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_destroy_page);
+
+/*
+ * To be called with the page locked or with an extra reference! This will
+ * prevent gmap_make_secure from touching the page concurrently. Having 2
+ * parallel make_page_accessible is fine, as the UV calls will become a
+ * no-op if the page is already exported.
+ */
+int arch_make_page_accessible(struct page *page)
+{
+	int rc = 0;
+
+	/* Hugepage cannot be protected, so nothing to do */
+	if (PageHuge(page))
+		return 0;
+
+	/*
+	 * PG_arch_1 is used in 3 places:
+	 * 1. for kernel page tables during early boot
+	 * 2. for storage keys of huge pages and KVM
+	 * 3. As an indication that this page might be secure. This can
+	 *    overindicate, e.g. we set the bit before calling
+	 *    convert_to_secure.
+	 * As secure pages are never huge, all 3 variants can co-exists.
+	 */
+	if (!test_bit(PG_arch_1, &page->flags))
+		return 0;
+
+	rc = uv_pin_shared(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	rc = uv_convert_from_secure(page_to_phys(page));
+	if (!rc) {
+		clear_bit(PG_arch_1, &page->flags);
+		return 0;
+	}
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(arch_make_page_accessible);
+
+#endif
+
+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
+static ssize_t uv_query_facilities(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n%lx\n%lx\n%lx\n",
+			  uv_info.inst_calls_list[0],
+			  uv_info.inst_calls_list[1],
+			  uv_info.inst_calls_list[2],
+			  uv_info.inst_calls_list[3]);
+}
+
+static struct kobj_attribute uv_query_facilities_attr =
+	__ATTR(facilities, 0444, uv_query_facilities, NULL);
+
+static ssize_t uv_query_supp_se_hdr_ver(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_ver_attr =
+	__ATTR(supp_se_hdr_ver, 0444, uv_query_supp_se_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_se_hdr_pcf(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_se_hdr_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_se_hdr_pcf_attr =
+	__ATTR(supp_se_hdr_pcf, 0444, uv_query_supp_se_hdr_pcf, NULL);
+
+static ssize_t uv_query_dump_cpu_len(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.guest_cpu_stor_len);
+}
+
+static struct kobj_attribute uv_query_dump_cpu_len_attr =
+	__ATTR(uv_query_dump_cpu_len, 0444, uv_query_dump_cpu_len, NULL);
+
+static ssize_t uv_query_dump_storage_state_len(struct kobject *kobj,
+					       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_storage_state_len);
+}
+
+static struct kobj_attribute uv_query_dump_storage_state_len_attr =
+	__ATTR(dump_storage_state_len, 0444, uv_query_dump_storage_state_len, NULL);
+
+static ssize_t uv_query_dump_finalize_len(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.conf_dump_finalize_len);
+}
+
+static struct kobj_attribute uv_query_dump_finalize_len_attr =
+	__ATTR(dump_finalize_len, 0444, uv_query_dump_finalize_len, NULL);
+
+static ssize_t uv_query_feature_indications(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
+}
+
+static struct kobj_attribute uv_query_feature_indications_attr =
+	__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
+
+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_guest_cpu_id + 1);
+}
+
+static struct kobj_attribute uv_query_max_guest_cpus_attr =
+	__ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);
+
+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_num_sec_conf);
+}
+
+static struct kobj_attribute uv_query_max_guest_vms_attr =
+	__ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);
+
+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.max_sec_stor_addr);
+}
+
+static struct kobj_attribute uv_query_max_guest_addr_attr =
+	__ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);
+
+static ssize_t uv_query_supp_att_req_hdr_ver(struct kobject *kobj,
+					     struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_req_hdr_ver);
+}
+
+static struct kobj_attribute uv_query_supp_att_req_hdr_ver_attr =
+	__ATTR(supp_att_req_hdr_ver, 0444, uv_query_supp_att_req_hdr_ver, NULL);
+
+static ssize_t uv_query_supp_att_pflags(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_att_pflags);
+}
+
+static struct kobj_attribute uv_query_supp_att_pflags_attr =
+	__ATTR(supp_att_pflags, 0444, uv_query_supp_att_pflags, NULL);
+
+static ssize_t uv_query_supp_add_secret_req_ver(struct kobject *kobj,
+						struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_req_ver);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_req_ver_attr =
+	__ATTR(supp_add_secret_req_ver, 0444, uv_query_supp_add_secret_req_ver, NULL);
+
+static ssize_t uv_query_supp_add_secret_pcf(struct kobject *kobj,
+					    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_add_secret_pcf);
+}
+
+static struct kobj_attribute uv_query_supp_add_secret_pcf_attr =
+	__ATTR(supp_add_secret_pcf, 0444, uv_query_supp_add_secret_pcf, NULL);
+
+static ssize_t uv_query_supp_secret_types(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%lx\n", uv_info.supp_secret_types);
+}
+
+static struct kobj_attribute uv_query_supp_secret_types_attr =
+	__ATTR(supp_secret_types, 0444, uv_query_supp_secret_types, NULL);
+
+static ssize_t uv_query_max_secrets(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", uv_info.max_secrets);
+}
+
+static struct kobj_attribute uv_query_max_secrets_attr =
+	__ATTR(max_secrets, 0444, uv_query_max_secrets, NULL);
+
+static struct attribute *uv_query_attrs[] = {
+	&uv_query_facilities_attr.attr,
+	&uv_query_feature_indications_attr.attr,
+	&uv_query_max_guest_cpus_attr.attr,
+	&uv_query_max_guest_vms_attr.attr,
+	&uv_query_max_guest_addr_attr.attr,
+	&uv_query_supp_se_hdr_ver_attr.attr,
+	&uv_query_supp_se_hdr_pcf_attr.attr,
+	&uv_query_dump_storage_state_len_attr.attr,
+	&uv_query_dump_finalize_len_attr.attr,
+	&uv_query_dump_cpu_len_attr.attr,
+	&uv_query_supp_att_req_hdr_ver_attr.attr,
+	&uv_query_supp_att_pflags_attr.attr,
+	&uv_query_supp_add_secret_req_ver_attr.attr,
+	&uv_query_supp_add_secret_pcf_attr.attr,
+	&uv_query_supp_secret_types_attr.attr,
+	&uv_query_max_secrets_attr.attr,
+	NULL,
+};
+
+static struct attribute_group uv_query_attr_group = {
+	.attrs = uv_query_attrs,
+};
+
+static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	int val = 0;
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+	val = prot_virt_guest;
+#endif
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
+				    struct kobj_attribute *attr, char *buf)
+{
+	int val = 0;
+
+#if IS_ENABLED(CONFIG_KVM)
+	val = prot_virt_host;
+#endif
+
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static struct kobj_attribute uv_prot_virt_guest =
+	__ATTR(prot_virt_guest, 0444, uv_is_prot_virt_guest, NULL);
+
+static struct kobj_attribute uv_prot_virt_host =
+	__ATTR(prot_virt_host, 0444, uv_is_prot_virt_host, NULL);
+
+static const struct attribute *uv_prot_virt_attrs[] = {
+	&uv_prot_virt_guest.attr,
+	&uv_prot_virt_host.attr,
+	NULL,
+};
+
+static struct kset *uv_query_kset;
+static struct kobject *uv_kobj;
+
+static int __init uv_info_init(void)
+{
+	int rc = -ENOMEM;
+
+	if (!test_facility(158))
+		return 0;
+
+	uv_kobj = kobject_create_and_add("uv", firmware_kobj);
+	if (!uv_kobj)
+		return -ENOMEM;
+
+	rc = sysfs_create_files(uv_kobj, uv_prot_virt_attrs);
+	if (rc)
+		goto out_kobj;
+
+	uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);
+	if (!uv_query_kset) {
+		rc = -ENOMEM;
+		goto out_ind_files;
+	}
+
+	rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);
+	if (!rc)
+		return 0;
+
+	kset_unregister(uv_query_kset);
+out_ind_files:
+	sysfs_remove_files(uv_kobj, uv_prot_virt_attrs);
+out_kobj:
+	kobject_del(uv_kobj);
+	kobject_put(uv_kobj);
+	return rc;
+}
+device_initcall(uv_info_init);
+#endif
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
new file mode 100644
index 0000000000..bbaefd84f1
--- /dev/null
+++ b/arch/s390/kernel/vdso.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso setup for s390
+ *
+ *  Copyright IBM Corp. 2008
+ *  Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/time_namespace.h>
+#include <linux/random.h>
+#include <vdso/datapage.h>
+#include <asm/vdso.h>
+
+extern char vdso64_start[], vdso64_end[];
+extern char vdso32_start[], vdso32_end[];
+
+static struct vm_special_mapping vvar_mapping;
+
+static union {
+	struct vdso_data	data[CS_BASES];
+	u8			page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+enum vvar_pages {
+	VVAR_DATA_PAGE_OFFSET,
+	VVAR_TIMENS_PAGE_OFFSET,
+	VVAR_NR_PAGES,
+};
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+	return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The VVAR page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+	struct mm_struct *mm = task->mm;
+	VMA_ITERATOR(vmi, mm, 0);
+	struct vm_area_struct *vma;
+
+	mmap_read_lock(mm);
+	for_each_vma(vmi, vma) {
+		if (!vma_is_special_mapping(vma, &vvar_mapping))
+			continue;
+		zap_vma_pages(vma);
+		break;
+	}
+	mmap_read_unlock(mm);
+	return 0;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+			     struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *timens_page = find_timens_vvar_page(vma);
+	unsigned long addr, pfn;
+	vm_fault_t err;
+
+	switch (vmf->pgoff) {
+	case VVAR_DATA_PAGE_OFFSET:
+		pfn = virt_to_pfn(vdso_data);
+		if (timens_page) {
+			/*
+			 * Fault in VVAR page too, since it will be accessed
+			 * to get clock data anyway.
+			 */
+			addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
+			err = vmf_insert_pfn(vma, addr, pfn);
+			if (unlikely(err & VM_FAULT_ERROR))
+				return err;
+			pfn = page_to_pfn(timens_page);
+		}
+		break;
+#ifdef CONFIG_TIME_NS
+	case VVAR_TIMENS_PAGE_OFFSET:
+		/*
+		 * If a task belongs to a time namespace then a namespace
+		 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+		 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+		 * offset.
+		 * See also the comment near timens_setup_vdso_data().
+		 */
+		if (!timens_page)
+			return VM_FAULT_SIGBUS;
+		pfn = virt_to_pfn(vdso_data);
+		break;
+#endif /* CONFIG_TIME_NS */
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+	return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+		       struct vm_area_struct *vma)
+{
+	current->mm->context.vdso_base = vma->vm_start;
+	return 0;
+}
+
+static struct vm_special_mapping vvar_mapping = {
+	.name = "[vvar]",
+	.fault = vvar_fault,
+};
+
+static struct vm_special_mapping vdso64_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+static struct vm_special_mapping vdso32_mapping = {
+	.name = "[vdso]",
+	.mremap = vdso_mremap,
+};
+
+int vdso_getcpu_init(void)
+{
+	set_tod_programmable_field(smp_processor_id());
+	return 0;
+}
+early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
+
+static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
+{
+	unsigned long vvar_start, vdso_text_start, vdso_text_len;
+	struct vm_special_mapping *vdso_mapping;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int rc;
+
+	BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+	if (mmap_write_lock_killable(mm))
+		return -EINTR;
+
+	if (is_compat_task()) {
+		vdso_text_len = vdso32_end - vdso32_start;
+		vdso_mapping = &vdso32_mapping;
+	} else {
+		vdso_text_len = vdso64_end - vdso64_start;
+		vdso_mapping = &vdso64_mapping;
+	}
+	vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
+	rc = vvar_start;
+	if (IS_ERR_VALUE(vvar_start))
+		goto out;
+	vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
+				       VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+				       VM_PFNMAP,
+				       &vvar_mapping);
+	rc = PTR_ERR(vma);
+	if (IS_ERR(vma))
+		goto out;
+	vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+	/* VM_MAYWRITE for COW so gdb can set breakpoints */
+	vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       vdso_mapping);
+	if (IS_ERR(vma)) {
+		do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
+		rc = PTR_ERR(vma);
+	} else {
+		current->mm->context.vdso_base = vdso_text_start;
+		rc = 0;
+	}
+out:
+	mmap_write_unlock(mm);
+	return rc;
+}
+
+static unsigned long vdso_addr(unsigned long start, unsigned long len)
+{
+	unsigned long addr, end, offset;
+
+	/*
+	 * Round up the start address. It can start out unaligned as a result
+	 * of stack start randomization.
+	 */
+	start = PAGE_ALIGN(start);
+
+	/* Round the lowest possible end address up to a PMD boundary. */
+	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+	if (end >= VDSO_BASE)
+		end = VDSO_BASE;
+	end -= len;
+
+	if (end > start) {
+		offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
+		addr = start + (offset << PAGE_SHIFT);
+	} else {
+		addr = start;
+	}
+	return addr;
+}
+
+unsigned long vdso_size(void)
+{
+	unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
+
+	if (is_compat_task())
+		size += vdso32_end - vdso32_start;
+	else
+		size += vdso64_end - vdso64_start;
+	return PAGE_ALIGN(size);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	unsigned long addr = VDSO_BASE;
+	unsigned long size = vdso_size();
+
+	if (current->flags & PF_RANDOMIZE)
+		addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
+	return map_vdso(addr, size);
+}
+
+static struct page ** __init vdso_setup_pages(void *start, void *end)
+{
+	int pages = (end - start) >> PAGE_SHIFT;
+	struct page **pagelist;
+	int i;
+
+	pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+	if (!pagelist)
+		panic("%s: Cannot allocate page list for VDSO", __func__);
+	for (i = 0; i < pages; i++)
+		pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
+	return pagelist;
+}
+
+static int __init vdso_init(void)
+{
+	vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
+	if (IS_ENABLED(CONFIG_COMPAT))
+		vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
+	return 0;
+}
+arch_initcall(vdso_init);
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
new file mode 100644
index 0000000000..5167384843
--- /dev/null
+++ b/arch/s390/kernel/vdso32/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
new file mode 100644
index 0000000000..23e868b79a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+KCOV_INSTRUMENT := n
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+obj-vdso32 = vdso_user_wrapper-32.o note-32.o
+
+# Build rules
+
+targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_32 += -m31 -s
+
+KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_32 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 += -m31 -fPIC -shared -fno-common -fno-builtin
+
+LDFLAGS_vdso32.so.dbg += -fPIC -shared -soname=linux-vdso32.so.1 \
+	--hash-style=both --build-id=sha1 -melf_s390 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+
+obj-y += vdso32_wrapper.o
+targets += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
+
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
+	$(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+$(obj-vdso32): %-32.o: %.S FORCE
+	$(call if_changed_dep,vdso32as)
+
+# actual build commands
+quiet_cmd_vdso32as = VDSO32A $@
+      cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+      cmd_vdso32cc = $(CC) $(c_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso32.so: $(obj)/vdso32.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso32.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso32/gen_vdso_offsets.sh b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
new file mode 100755
index 0000000000..9c4f951e22
--- /dev/null
+++ b/arch/s390/kernel/vdso32/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_compat_\(.*\)/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
new file mode 100644
index 0000000000..db19d0680a
--- /dev/null
+++ b/arch/s390/kernel/vdso32/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
new file mode 100644
index 0000000000..edf5ff1deb
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32.lds.S
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
+OUTPUT_ARCH(s390:31-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_compat_restart_syscall;
+		__kernel_compat_rt_sigreturn;
+		__kernel_compat_sigreturn;
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
new file mode 100644
index 0000000000..de2fb93047
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso32_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso32_start, vdso32_end
+	.balign PAGE_SIZE
+vdso32_start:
+	.incbin "arch/s390/kernel/vdso32/vdso32.so"
+	.balign PAGE_SIZE
+vdso32_end:
+
+	.previous
diff --git a/arch/s390/kernel/vdso32/vdso_user_wrapper.S b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
new file mode 100644
index 0000000000..2e645003fd
--- /dev/null
+++ b/arch/s390/kernel/vdso32/vdso_user_wrapper.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <asm/dwarf.h>
+
+.macro vdso_syscall func,syscall
+	.globl __kernel_compat_\func
+	.type  __kernel_compat_\func,@function
+	__ALIGN
+__kernel_compat_\func:
+	CFI_STARTPROC
+	svc	\syscall
+	/* Make sure we notice when a syscall returns, which shouldn't happen */
+	.word	0
+	CFI_ENDPROC
+	.size	__kernel_compat_\func,.-__kernel_compat_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vdso64/.gitignore b/arch/s390/kernel/vdso64/.gitignore
new file mode 100644
index 0000000000..4ec80685fe
--- /dev/null
+++ b/arch/s390/kernel/vdso64/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso64.lds
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
new file mode 100644
index 0000000000..fc1c6ff817
--- /dev/null
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: GPL-2.0
+# List of files in the vdso
+
+KCOV_INSTRUMENT := n
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile
+obj-vdso64 = vdso_user_wrapper.o note.o
+obj-cvdso64 = vdso64_generic.o getcpu.o
+VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
+CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
+CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
+
+# Build rules
+
+targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+obj-cvdso64 := $(addprefix $(obj)/, $(obj-cvdso64))
+
+KBUILD_AFLAGS += -DBUILD_VDSO
+KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING
+
+KBUILD_AFLAGS_64 := $(filter-out -m64,$(KBUILD_AFLAGS))
+KBUILD_AFLAGS_64 += -m64
+
+KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
+KBUILD_CFLAGS_64 := $(filter-out -mno-pic-data-is-text-relative,$(KBUILD_CFLAGS_64))
+KBUILD_CFLAGS_64 += -m64 -fPIC -fno-common -fno-builtin
+ldflags-y := -fPIC -shared -soname=linux-vdso64.so.1 \
+	     --hash-style=both --build-id=sha1 -T
+
+$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
+$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
+
+obj-y += vdso64_wrapper.o
+targets += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling, ubsan and kasan for VDSO code
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+
+# Force dependency (incbin is bad)
+$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
+
+quiet_cmd_vdso_and_check = VDSO    $@
+      cmd_vdso_and_check = $(cmd_ld); $(cmd_vdso_check)
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj-cvdso64) FORCE
+	$(call if_changed,vdso_and_check)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+	$(call if_changed,objcopy)
+
+# assembly rules for the .S files
+$(obj-vdso64): %.o: %.S FORCE
+	$(call if_changed_dep,vdso64as)
+
+$(obj-cvdso64): %.o: %.c FORCE
+	$(call if_changed_dep,vdso64cc)
+
+# actual build commands
+quiet_cmd_vdso64as = VDSO64A $@
+      cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
+quiet_cmd_vdso64cc = VDSO64C $@
+      cmd_vdso64cc = $(CC) $(c_flags) -c -o $@ $<
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso64.so: $(obj)/vdso64.so.dbg
+	@mkdir -p $(MODLIB)/vdso
+	$(call cmd,vdso_install)
+
+vdso_install: vdso64.so
+
+# Generate VDSO offsets using helper script
+gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
+quiet_cmd_vdsosym = VDSOSYM $@
+	cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
+
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+	$(call if_changed,vdsosym)
diff --git a/arch/s390/kernel/vdso64/gen_vdso_offsets.sh b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
new file mode 100755
index 0000000000..37f05cb38d
--- /dev/null
+++ b/arch/s390/kernel/vdso64/gen_vdso_offsets.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Inspired by arm64 version.
+#
+
+LC_ALL=C
+sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/s390/kernel/vdso64/getcpu.c b/arch/s390/kernel/vdso64/getcpu.c
new file mode 100644
index 0000000000..5c5d4a848b
--- /dev/null
+++ b/arch/s390/kernel/vdso64/getcpu.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright IBM Corp. 2020 */
+
+#include <linux/compiler.h>
+#include <linux/getcpu.h>
+#include <asm/timex.h>
+#include "vdso.h"
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
+{
+	union tod_clock clk;
+
+	/* CPU number is stored in the programmable field of the TOD clock */
+	store_tod_clock_ext(&clk);
+	if (cpu)
+		*cpu = clk.pf;
+	/* NUMA node is always zero */
+	if (node)
+		*node = 0;
+	return 0;
+}
diff --git a/arch/s390/kernel/vdso64/note.S b/arch/s390/kernel/vdso64/note.S
new file mode 100644
index 0000000000..db19d0680a
--- /dev/null
+++ b/arch/s390/kernel/vdso64/note.S
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+	.long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h
new file mode 100644
index 0000000000..34c7a2312f
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_S390_KERNEL_VDSO64_VDSO_H
+#define __ARCH_S390_KERNEL_VDSO64_VDSO_H
+
+#include <vdso/datapage.h>
+
+struct getcpu_cache;
+
+int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
+
+#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */
diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S
new file mode 100644
index 0000000000..4461ea151e
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64.lds.S
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is the infamous ld script for the 64 bits vdso
+ * library
+ */
+
+#include <asm/page.h>
+#include <asm/vdso.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(_start)
+
+SECTIONS
+{
+	PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+	PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
+	. = VDSO_LBASE + SIZEOF_HEADERS;
+
+	.hash		: { *(.hash) }			:text
+	.gnu.hash	: { *(.gnu.hash) }
+	.dynsym		: { *(.dynsym) }
+	.dynstr		: { *(.dynstr) }
+	.gnu.version	: { *(.gnu.version) }
+	.gnu.version_d	: { *(.gnu.version_d) }
+	.gnu.version_r	: { *(.gnu.version_r) }
+
+	.note		: { *(.note.*) }		:text	:note
+
+	. = ALIGN(16);
+	.text		: {
+		*(.text .stub .text.* .gnu.linkonce.t.*)
+	} :text
+	PROVIDE(__etext = .);
+	PROVIDE(_etext = .);
+	PROVIDE(etext = .);
+
+	/*
+	 * Other stuff is appended to the text segment:
+	 */
+	.rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+	.rodata1	: { *(.rodata1) }
+
+	.dynamic	: { *(.dynamic) }		:text	:dynamic
+
+	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr
+	.eh_frame	: { KEEP (*(.eh_frame)) }	:text
+	.gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
+
+	.rela.dyn ALIGN(8) : { *(.rela.dyn) }
+	.got ALIGN(8)	: { *(.got .toc) }
+	.got.plt ALIGN(8) : { *(.got.plt) }
+
+	_end = .;
+	PROVIDE(end = .);
+
+	/*
+	 * Stabs debugging sections are here too.
+	 */
+	.stab	       0 : { *(.stab) }
+	.stabstr       0 : { *(.stabstr) }
+	.stab.excl     0 : { *(.stab.excl) }
+	.stab.exclstr  0 : { *(.stab.exclstr) }
+	.stab.index    0 : { *(.stab.index) }
+	.stab.indexstr 0 : { *(.stab.indexstr) }
+	.comment       0 : { *(.comment) }
+
+	/*
+	 * DWARF debug sections.
+	 * Symbols in the DWARF debugging sections are relative to the
+	 * beginning of the section so we begin them at 0.
+	 */
+	/* DWARF 1 */
+	.debug		0 : { *(.debug) }
+	.line		0 : { *(.line) }
+	/* GNU DWARF 1 extensions */
+	.debug_srcinfo	0 : { *(.debug_srcinfo) }
+	.debug_sfnames	0 : { *(.debug_sfnames) }
+	/* DWARF 1.1 and DWARF 2 */
+	.debug_aranges	0 : { *(.debug_aranges) }
+	.debug_pubnames 0 : { *(.debug_pubnames) }
+	/* DWARF 2 */
+	.debug_info	0 : { *(.debug_info .gnu.linkonce.wi.*) }
+	.debug_abbrev	0 : { *(.debug_abbrev) }
+	.debug_line	0 : { *(.debug_line) }
+	.debug_frame	0 : { *(.debug_frame) }
+	.debug_str	0 : { *(.debug_str) }
+	.debug_loc	0 : { *(.debug_loc) }
+	.debug_macinfo	0 : { *(.debug_macinfo) }
+	/* SGI/MIPS DWARF 2 extensions */
+	.debug_weaknames 0 : { *(.debug_weaknames) }
+	.debug_funcnames 0 : { *(.debug_funcnames) }
+	.debug_typenames 0 : { *(.debug_typenames) }
+	.debug_varnames  0 : { *(.debug_varnames) }
+	/* DWARF 3 */
+	.debug_pubtypes 0 : { *(.debug_pubtypes) }
+	.debug_ranges	0 : { *(.debug_ranges) }
+	.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
+
+	/DISCARD/	: {
+		*(.note.GNU-stack)
+		*(.branch_lt)
+		*(.data .data.* .gnu.linkonce.d.* .sdata*)
+		*(.bss .sbss .dynbss .dynsbss)
+	}
+}
+
+/*
+ * Very old versions of ld do not recognize this name token; use the constant.
+ */
+#define PT_GNU_EH_FRAME	0x6474e550
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+	text		PT_LOAD FILEHDR PHDRS FLAGS(5);	/* PF_R|PF_X */
+	dynamic		PT_DYNAMIC FLAGS(4);		/* PF_R */
+	note		PT_NOTE FLAGS(4);		/* PF_R */
+	eh_frame_hdr	PT_GNU_EH_FRAME;
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+	VDSO_VERSION_STRING {
+	global:
+		/*
+		 * Has to be there for the kernel to find
+		 */
+		__kernel_gettimeofday;
+		__kernel_clock_gettime;
+		__kernel_clock_getres;
+		__kernel_getcpu;
+		__kernel_restart_syscall;
+		__kernel_rt_sigreturn;
+		__kernel_sigreturn;
+	local: *;
+	};
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_generic.c b/arch/s390/kernel/vdso64/vdso64_generic.c
new file mode 100644
index 0000000000..a9aa75643c
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_generic.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../../../lib/vdso/gettimeofday.c"
+#include "vdso.h"
+
+int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv,
+			     struct timezone *tz)
+{
+	return __cvdso_gettimeofday(tv, tz);
+}
+
+int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_gettime(clock, ts);
+}
+
+int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts)
+{
+	return __cvdso_clock_getres(clock, ts);
+}
diff --git a/arch/s390/kernel/vdso64/vdso64_wrapper.S b/arch/s390/kernel/vdso64/vdso64_wrapper.S
new file mode 100644
index 0000000000..6721849986
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso64_wrapper.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+	__PAGE_ALIGNED_DATA
+
+	.globl vdso64_start, vdso64_end
+	.balign PAGE_SIZE
+vdso64_start:
+	.incbin "arch/s390/kernel/vdso64/vdso64.so"
+	.balign PAGE_SIZE
+vdso64_end:
+
+	.previous
diff --git a/arch/s390/kernel/vdso64/vdso_user_wrapper.S b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
new file mode 100644
index 0000000000..57f62596e5
--- /dev/null
+++ b/arch/s390/kernel/vdso64/vdso_user_wrapper.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+#include <asm/vdso.h>
+#include <asm/unistd.h>
+#include <asm/asm-offsets.h>
+#include <asm/dwarf.h>
+#include <asm/ptrace.h>
+
+#define WRAPPER_FRAME_SIZE (STACK_FRAME_OVERHEAD+8)
+
+/*
+ * Older glibc version called vdso without allocating a stackframe. This wrapper
+ * is just used to allocate a stackframe. See
+ * https://sourceware.org/git/?p=glibc.git;a=commit;h=478593e6374f3818da39332260dc453cb19cfa1e
+ * for details.
+ */
+.macro vdso_func func
+	.globl __kernel_\func
+	.type  __kernel_\func,@function
+	__ALIGN
+__kernel_\func:
+	CFI_STARTPROC
+	aghi	%r15,-WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET (STACK_FRAME_OVERHEAD + WRAPPER_FRAME_SIZE)
+	CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
+	stg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	brasl	%r14,__s390_vdso_\func
+	lg	%r14,STACK_FRAME_OVERHEAD(%r15)
+	aghi	%r15,WRAPPER_FRAME_SIZE
+	CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
+	CFI_RESTORE 15
+	br	%r14
+	CFI_ENDPROC
+	.size	__kernel_\func,.-__kernel_\func
+.endm
+
+vdso_func gettimeofday
+vdso_func clock_getres
+vdso_func clock_gettime
+vdso_func getcpu
+
+.macro vdso_syscall func,syscall
+	.globl __kernel_\func
+	.type  __kernel_\func,@function
+	__ALIGN
+__kernel_\func:
+	CFI_STARTPROC
+	svc	\syscall
+	/* Make sure we notice when a syscall returns, which shouldn't happen */
+	.word	0
+	CFI_ENDPROC
+	.size	__kernel_\func,.-__kernel_\func
+.endm
+
+vdso_syscall restart_syscall,__NR_restart_syscall
+vdso_syscall sigreturn,__NR_sigreturn
+vdso_syscall rt_sigreturn,__NR_rt_sigreturn
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
new file mode 100644
index 0000000000..2ae201ebf9
--- /dev/null
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* ld script to make s390 Linux kernel
+ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com)
+ */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/ftrace.lds.h>
+
+/*
+ * Put .bss..swapper_pg_dir as the first thing in .bss. This will
+ * make sure it has 16k alignment.
+ */
+#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) \
+			   *(.bss..invalid_pg_dir)
+
+#define RO_EXCEPTION_TABLE_ALIGN	16
+
+/* Handle ro_after_init data on our own. */
+#define RO_AFTER_INIT_DATA
+
+#define RUNTIME_DISCARD_EXIT
+
+#define EMITS_PT_NOTE
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+ENTRY(startup_continue)
+jiffies = jiffies_64;
+
+PHDRS {
+	text PT_LOAD FLAGS(5);	/* R_E */
+	data PT_LOAD FLAGS(7);	/* RWE */
+	note PT_NOTE FLAGS(0);	/* ___ */
+}
+
+SECTIONS
+{
+	. = 0x100000;
+	.text : {
+		_stext = .;		/* Start of text section */
+		_text = .;		/* Text and read-only data */
+		HEAD_TEXT
+		TEXT_TEXT
+		SCHED_TEXT
+		LOCK_TEXT
+		KPROBES_TEXT
+		IRQENTRY_TEXT
+		SOFTIRQENTRY_TEXT
+		FTRACE_HOTPATCH_TRAMPOLINES_TEXT
+		*(.text.*_indirect_*)
+		*(.gnu.warning)
+		. = ALIGN(PAGE_SIZE);
+		_etext = .;		/* End of text section */
+	} :text = 0x0700
+
+	RO_DATA(PAGE_SIZE)
+
+	. = ALIGN(PAGE_SIZE);
+	_sdata = .;		/* Start of data section */
+
+	. = ALIGN(PAGE_SIZE);
+	__start_ro_after_init = .;
+	.data..ro_after_init : {
+		 *(.data..ro_after_init)
+		JUMP_TABLE_DATA
+	} :data
+	. = ALIGN(PAGE_SIZE);
+	__end_ro_after_init = .;
+
+	RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
+	BOOT_DATA_PRESERVED
+
+	. = ALIGN(8);
+	.amode31.refs : {
+		_start_amode31_refs = .;
+		*(.amode31.refs)
+		_end_amode31_refs = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	_edata = .;		/* End of data section */
+
+	/* will be freed after init */
+	. = ALIGN(PAGE_SIZE);	/* Init code and data */
+	__init_begin = .;
+
+	. = ALIGN(PAGE_SIZE);
+	.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+		_sinittext = .;
+		INIT_TEXT
+		. = ALIGN(PAGE_SIZE);
+		_einittext = .;
+	}
+
+	/*
+	 * .exit.text is discarded at runtime, not link time,
+	 * to deal with references from __bug_table
+	*/
+	.exit.text : {
+		EXIT_TEXT
+	}
+
+	.exit.data : {
+		EXIT_DATA
+	}
+
+	/*
+	 * struct alt_inst entries. From the header (alternative.h):
+	 * "Alternative instructions for different CPU types or capabilities"
+	 * Think locking instructions on spinlocks.
+	 * Note, that it is a part of __init region.
+	 */
+	. = ALIGN(8);
+	.altinstructions : {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
+	/*
+	 * And here are the replacement instructions. The linker sticks
+	 * them as binary blobs. The .altinstructions has enough data to
+	 * get the address and the length of them to patch the kernel safely.
+	 * Note, that it is a part of __init region.
+	 */
+	.altinstr_replacement : {
+		*(.altinstr_replacement)
+	}
+
+	/*
+	 * Table with the patch locations to undo expolines
+	*/
+	. = ALIGN(4);
+	.nospec_call_table : {
+		__nospec_call_start = . ;
+		*(.s390_indirect*)
+		__nospec_call_end = . ;
+	}
+	.nospec_return_table : {
+		__nospec_return_start = . ;
+		*(.s390_return*)
+		__nospec_return_end = . ;
+	}
+
+	BOOT_DATA
+
+	/*
+	 * .amode31 section for code, data, ex_table that need to stay
+	 * below 2 GB, even when the kernel is relocated above 2 GB.
+	 */
+	. = ALIGN(PAGE_SIZE);
+	_samode31 = .;
+	.amode31.text : {
+		_stext_amode31 = .;
+		*(.amode31.text)
+		*(.amode31.text.*_indirect_*)
+		. = ALIGN(PAGE_SIZE);
+		_etext_amode31 = .;
+	}
+	. = ALIGN(16);
+	.amode31.ex_table : {
+		_start_amode31_ex_table = .;
+		KEEP(*(.amode31.ex_table))
+		_stop_amode31_ex_table = .;
+	}
+	. = ALIGN(PAGE_SIZE);
+	.amode31.data : {
+		*(.amode31.data)
+	}
+	. = ALIGN(PAGE_SIZE);
+	_eamode31 = .;
+
+	/* early.c uses stsi, which requires page aligned data. */
+	. = ALIGN(PAGE_SIZE);
+	INIT_DATA_SECTION(0x100)
+
+	PERCPU_SECTION(0x100)
+
+	.dynsym ALIGN(8) : {
+		__dynsym_start = .;
+		*(.dynsym)
+		__dynsym_end = .;
+	}
+	.rela.dyn ALIGN(8) : {
+		__rela_dyn_start = .;
+		*(.rela*)
+		__rela_dyn_end = .;
+	}
+
+	. = ALIGN(PAGE_SIZE);
+	__init_end = .;		/* freed after init ends here */
+
+	BSS_SECTION(PAGE_SIZE, 4 * PAGE_SIZE, PAGE_SIZE)
+
+	. = ALIGN(PAGE_SIZE);
+	_end = . ;
+
+	/*
+	 * uncompressed image info used by the decompressor
+	 * it should match struct vmlinux_info
+	 */
+	.vmlinux.info 0 (INFO) : {
+		QUAD(_stext)					/* default_lma */
+		QUAD(startup_continue)				/* entry */
+		QUAD(__bss_start - _stext)			/* image_size */
+		QUAD(__bss_stop - __bss_start)			/* bss_size */
+		QUAD(__boot_data_start)				/* bootdata_off */
+		QUAD(__boot_data_end - __boot_data_start)	/* bootdata_size */
+		QUAD(__boot_data_preserved_start)		/* bootdata_preserved_off */
+		QUAD(__boot_data_preserved_end -
+		     __boot_data_preserved_start)		/* bootdata_preserved_size */
+		QUAD(__dynsym_start)				/* dynsym_start */
+		QUAD(__rela_dyn_start)				/* rela_dyn_start */
+		QUAD(__rela_dyn_end)				/* rela_dyn_end */
+		QUAD(_eamode31 - _samode31)			/* amode31_size */
+		QUAD(init_mm)
+		QUAD(swapper_pg_dir)
+		QUAD(invalid_pg_dir)
+#ifdef CONFIG_KASAN
+		QUAD(kasan_early_shadow_page)
+		QUAD(kasan_early_shadow_pte)
+		QUAD(kasan_early_shadow_pmd)
+		QUAD(kasan_early_shadow_pud)
+		QUAD(kasan_early_shadow_p4d)
+#endif
+	} :NONE
+
+	/* Debugging sections.	*/
+	STABS_DEBUG
+	DWARF_DEBUG
+	ELF_DETAILS
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : {
+		*(.eh_frame)
+		*(.interp)
+	}
+}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
new file mode 100644
index 0000000000..e0a88dcaf5
--- /dev/null
+++ b/arch/s390/kernel/vtime.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Virtual cpu timer based timer functions.
+ *
+ *    Copyright IBM Corp. 2004, 2012
+ *    Author(s): Jan Glauber <jan.glauber@de.ibm.com>
+ */
+
+#include <linux/kernel_stat.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/timex.h>
+#include <linux/types.h>
+#include <linux/time.h>
+#include <asm/alternative.h>
+#include <asm/cputime.h>
+#include <asm/vtimer.h>
+#include <asm/vtime.h>
+#include <asm/cpu_mf.h>
+#include <asm/smp.h>
+
+#include "entry.h"
+
+static void virt_timer_expire(void);
+
+static LIST_HEAD(virt_timer_list);
+static DEFINE_SPINLOCK(virt_timer_lock);
+static atomic64_t virt_timer_current;
+static atomic64_t virt_timer_elapsed;
+
+DEFINE_PER_CPU(u64, mt_cycles[8]);
+static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 };
+static DEFINE_PER_CPU(u64, mt_scaling_jiffies);
+
+static inline u64 get_vtimer(void)
+{
+	u64 timer;
+
+	asm volatile("stpt %0" : "=Q" (timer));
+	return timer;
+}
+
+static inline void set_vtimer(u64 expires)
+{
+	u64 timer;
+
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+		"	spt	%1"	/* Set new value imm. afterwards */
+		: "=Q" (timer) : "Q" (expires));
+	S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
+	S390_lowcore.last_update_timer = expires;
+}
+
+static inline int virt_timer_forward(u64 elapsed)
+{
+	BUG_ON(!irqs_disabled());
+
+	if (list_empty(&virt_timer_list))
+		return 0;
+	elapsed = atomic64_add_return(elapsed, &virt_timer_elapsed);
+	return elapsed >= atomic64_read(&virt_timer_current);
+}
+
+static void update_mt_scaling(void)
+{
+	u64 cycles_new[8], *cycles_old;
+	u64 delta, fac, mult, div;
+	int i;
+
+	stcctm(MT_DIAG, smp_cpu_mtid + 1, cycles_new);
+	cycles_old = this_cpu_ptr(mt_cycles);
+	fac = 1;
+	mult = div = 0;
+	for (i = 0; i <= smp_cpu_mtid; i++) {
+		delta = cycles_new[i] - cycles_old[i];
+		div += delta;
+		mult *= i + 1;
+		mult += delta * fac;
+		fac *= i + 1;
+	}
+	div *= fac;
+	if (div > 0) {
+		/* Update scaling factor */
+		__this_cpu_write(mt_scaling_mult, mult);
+		__this_cpu_write(mt_scaling_div, div);
+		memcpy(cycles_old, cycles_new,
+		       sizeof(u64) * (smp_cpu_mtid + 1));
+	}
+	__this_cpu_write(mt_scaling_jiffies, jiffies_64);
+}
+
+static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new)
+{
+	u64 delta;
+
+	delta = new - *tsk_vtime;
+	*tsk_vtime = new;
+	return delta;
+}
+
+
+static inline u64 scale_vtime(u64 vtime)
+{
+	u64 mult = __this_cpu_read(mt_scaling_mult);
+	u64 div = __this_cpu_read(mt_scaling_div);
+
+	if (smp_cpu_mtid)
+		return vtime * mult / div;
+	return vtime;
+}
+
+static void account_system_index_scaled(struct task_struct *p, u64 cputime,
+					enum cpu_usage_stat index)
+{
+	p->stimescaled += cputime_to_nsecs(scale_vtime(cputime));
+	account_system_index_time(p, cputime_to_nsecs(cputime), index);
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+static int do_account_vtime(struct task_struct *tsk)
+{
+	u64 timer, clock, user, guest, system, hardirq, softirq;
+
+	timer = S390_lowcore.last_update_timer;
+	clock = S390_lowcore.last_update_clock;
+	asm volatile(
+		"	stpt	%0\n"	/* Store current cpu timer value */
+		"	stckf	%1"	/* Store current tod clock value */
+		: "=Q" (S390_lowcore.last_update_timer),
+		  "=Q" (S390_lowcore.last_update_clock)
+		: : "cc");
+	clock = S390_lowcore.last_update_clock - clock;
+	timer -= S390_lowcore.last_update_timer;
+
+	if (hardirq_count())
+		S390_lowcore.hardirq_timer += timer;
+	else
+		S390_lowcore.system_timer += timer;
+
+	/* Update MT utilization calculation */
+	if (smp_cpu_mtid &&
+	    time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
+		update_mt_scaling();
+
+	/* Calculate cputime delta */
+	user = update_tsk_timer(&tsk->thread.user_timer,
+				READ_ONCE(S390_lowcore.user_timer));
+	guest = update_tsk_timer(&tsk->thread.guest_timer,
+				 READ_ONCE(S390_lowcore.guest_timer));
+	system = update_tsk_timer(&tsk->thread.system_timer,
+				  READ_ONCE(S390_lowcore.system_timer));
+	hardirq = update_tsk_timer(&tsk->thread.hardirq_timer,
+				   READ_ONCE(S390_lowcore.hardirq_timer));
+	softirq = update_tsk_timer(&tsk->thread.softirq_timer,
+				   READ_ONCE(S390_lowcore.softirq_timer));
+	S390_lowcore.steal_timer +=
+		clock - user - guest - system - hardirq - softirq;
+
+	/* Push account value */
+	if (user) {
+		account_user_time(tsk, cputime_to_nsecs(user));
+		tsk->utimescaled += cputime_to_nsecs(scale_vtime(user));
+	}
+
+	if (guest) {
+		account_guest_time(tsk, cputime_to_nsecs(guest));
+		tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest));
+	}
+
+	if (system)
+		account_system_index_scaled(tsk, system, CPUTIME_SYSTEM);
+	if (hardirq)
+		account_system_index_scaled(tsk, hardirq, CPUTIME_IRQ);
+	if (softirq)
+		account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ);
+
+	return virt_timer_forward(user + guest + system + hardirq + softirq);
+}
+
+void vtime_task_switch(struct task_struct *prev)
+{
+	do_account_vtime(prev);
+	prev->thread.user_timer = S390_lowcore.user_timer;
+	prev->thread.guest_timer = S390_lowcore.guest_timer;
+	prev->thread.system_timer = S390_lowcore.system_timer;
+	prev->thread.hardirq_timer = S390_lowcore.hardirq_timer;
+	prev->thread.softirq_timer = S390_lowcore.softirq_timer;
+	S390_lowcore.user_timer = current->thread.user_timer;
+	S390_lowcore.guest_timer = current->thread.guest_timer;
+	S390_lowcore.system_timer = current->thread.system_timer;
+	S390_lowcore.hardirq_timer = current->thread.hardirq_timer;
+	S390_lowcore.softirq_timer = current->thread.softirq_timer;
+}
+
+/*
+ * In s390, accounting pending user time also implies
+ * accounting system time in order to correctly compute
+ * the stolen time accounting.
+ */
+void vtime_flush(struct task_struct *tsk)
+{
+	u64 steal, avg_steal;
+
+	if (do_account_vtime(tsk))
+		virt_timer_expire();
+
+	steal = S390_lowcore.steal_timer;
+	avg_steal = S390_lowcore.avg_steal_timer / 2;
+	if ((s64) steal > 0) {
+		S390_lowcore.steal_timer = 0;
+		account_steal_time(cputime_to_nsecs(steal));
+		avg_steal += steal;
+	}
+	S390_lowcore.avg_steal_timer = avg_steal;
+}
+
+static u64 vtime_delta(void)
+{
+	u64 timer = S390_lowcore.last_update_timer;
+
+	S390_lowcore.last_update_timer = get_vtimer();
+
+	return timer - S390_lowcore.last_update_timer;
+}
+
+/*
+ * Update process times based on virtual cpu times stored by entry.S
+ * to the lowcore fields user_timer, system_timer & steal_clock.
+ */
+void vtime_account_kernel(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	if (tsk->flags & PF_VCPU)
+		S390_lowcore.guest_timer += delta;
+	else
+		S390_lowcore.system_timer += delta;
+
+	virt_timer_forward(delta);
+}
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
+
+void vtime_account_softirq(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	S390_lowcore.softirq_timer += delta;
+
+	virt_timer_forward(delta);
+}
+
+void vtime_account_hardirq(struct task_struct *tsk)
+{
+	u64 delta = vtime_delta();
+
+	S390_lowcore.hardirq_timer += delta;
+
+	virt_timer_forward(delta);
+}
+
+/*
+ * Sorted add to a list. List is linear searched until first bigger
+ * element is found.
+ */
+static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
+{
+	struct vtimer_list *tmp;
+
+	list_for_each_entry(tmp, head, entry) {
+		if (tmp->expires > timer->expires) {
+			list_add_tail(&timer->entry, &tmp->entry);
+			return;
+		}
+	}
+	list_add_tail(&timer->entry, head);
+}
+
+/*
+ * Handler for expired virtual CPU timer.
+ */
+static void virt_timer_expire(void)
+{
+	struct vtimer_list *timer, *tmp;
+	unsigned long elapsed;
+	LIST_HEAD(cb_list);
+
+	/* walk timer list, fire all expired timers */
+	spin_lock(&virt_timer_lock);
+	elapsed = atomic64_read(&virt_timer_elapsed);
+	list_for_each_entry_safe(timer, tmp, &virt_timer_list, entry) {
+		if (timer->expires < elapsed)
+			/* move expired timer to the callback queue */
+			list_move_tail(&timer->entry, &cb_list);
+		else
+			timer->expires -= elapsed;
+	}
+	if (!list_empty(&virt_timer_list)) {
+		timer = list_first_entry(&virt_timer_list,
+					 struct vtimer_list, entry);
+		atomic64_set(&virt_timer_current, timer->expires);
+	}
+	atomic64_sub(elapsed, &virt_timer_elapsed);
+	spin_unlock(&virt_timer_lock);
+
+	/* Do callbacks and recharge periodic timers */
+	list_for_each_entry_safe(timer, tmp, &cb_list, entry) {
+		list_del_init(&timer->entry);
+		timer->function(timer->data);
+		if (timer->interval) {
+			/* Recharge interval timer */
+			timer->expires = timer->interval +
+				atomic64_read(&virt_timer_elapsed);
+			spin_lock(&virt_timer_lock);
+			list_add_sorted(timer, &virt_timer_list);
+			spin_unlock(&virt_timer_lock);
+		}
+	}
+}
+
+void init_virt_timer(struct vtimer_list *timer)
+{
+	timer->function = NULL;
+	INIT_LIST_HEAD(&timer->entry);
+}
+EXPORT_SYMBOL(init_virt_timer);
+
+static inline int vtimer_pending(struct vtimer_list *timer)
+{
+	return !list_empty(&timer->entry);
+}
+
+static void internal_add_vtimer(struct vtimer_list *timer)
+{
+	if (list_empty(&virt_timer_list)) {
+		/* First timer, just program it. */
+		atomic64_set(&virt_timer_current, timer->expires);
+		atomic64_set(&virt_timer_elapsed, 0);
+		list_add(&timer->entry, &virt_timer_list);
+	} else {
+		/* Update timer against current base. */
+		timer->expires += atomic64_read(&virt_timer_elapsed);
+		if (likely((s64) timer->expires <
+			   (s64) atomic64_read(&virt_timer_current)))
+			/* The new timer expires before the current timer. */
+			atomic64_set(&virt_timer_current, timer->expires);
+		/* Insert new timer into the list. */
+		list_add_sorted(timer, &virt_timer_list);
+	}
+}
+
+static void __add_vtimer(struct vtimer_list *timer, int periodic)
+{
+	unsigned long flags;
+
+	timer->interval = periodic ? timer->expires : 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+}
+
+/*
+ * add_virt_timer - add a oneshot virtual CPU timer
+ */
+void add_virt_timer(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 0);
+}
+EXPORT_SYMBOL(add_virt_timer);
+
+/*
+ * add_virt_timer_int - add an interval virtual CPU timer
+ */
+void add_virt_timer_periodic(struct vtimer_list *timer)
+{
+	__add_vtimer(timer, 1);
+}
+EXPORT_SYMBOL(add_virt_timer_periodic);
+
+static int __mod_vtimer(struct vtimer_list *timer, u64 expires, int periodic)
+{
+	unsigned long flags;
+	int rc;
+
+	BUG_ON(!timer->function);
+
+	if (timer->expires == expires && vtimer_pending(timer))
+		return 1;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	rc = vtimer_pending(timer);
+	if (rc)
+		list_del_init(&timer->entry);
+	timer->interval = periodic ? expires : 0;
+	timer->expires = expires;
+	internal_add_vtimer(timer);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return rc;
+}
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 0);
+}
+EXPORT_SYMBOL(mod_virt_timer);
+
+/*
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, u64 expires)
+{
+	return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
+ * Delete a virtual timer.
+ *
+ * returns whether the deleted timer was pending (1) or not (0)
+ */
+int del_virt_timer(struct vtimer_list *timer)
+{
+	unsigned long flags;
+
+	if (!vtimer_pending(timer))
+		return 0;
+	spin_lock_irqsave(&virt_timer_lock, flags);
+	list_del_init(&timer->entry);
+	spin_unlock_irqrestore(&virt_timer_lock, flags);
+	return 1;
+}
+EXPORT_SYMBOL(del_virt_timer);
+
+/*
+ * Start the virtual CPU timer on the current CPU.
+ */
+void vtime_init(void)
+{
+	/* set initial cpu timer */
+	set_vtimer(VTIMER_MAX_SLICE);
+	/* Setup initial MT scaling values */
+	if (smp_cpu_mtid) {
+		__this_cpu_write(mt_scaling_jiffies, jiffies);
+		__this_cpu_write(mt_scaling_mult, 1);
+		__this_cpu_write(mt_scaling_div, 1);
+		stcctm(MT_DIAG, smp_cpu_mtid + 1, this_cpu_ptr(mt_cycles));
+	}
+}